Spaces:

Futuresony
/

FutureX

Sleeping

App Files Files Community

Futuresony commited on May 13

Commit

ed0ccfa

verified ·

1 Parent(s): c3a8689

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -1

app.py CHANGED Viewed

@@ -5,7 +5,58 @@ import torch
 # Use a CPU-compatible base model (replace this with your actual full-precision model)
 base_model_id = "unsloth/gemma-2b"  # Replace with real CPU-compatible model
-lora_model_id = "Futuresony/CCM-AI"
 # Load the base model on CPU
 base_model = AutoModelForCausalLM.from_pretrained(

 # Use a CPU-compatible base model (replace this with your actual full-precision model)
 base_model_id = "unsloth/gemma-2b"  # Replace with real CPU-compatible model
+lora_model_id = "import gradio as gr
+from huggingface_hub import InferenceClient
+import os
+# 🔹 Hugging Face Credentials
+HF_REPO = "Futuresony/gemma2-9b-lora-alpaca"
+HF_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
+client = InferenceClient(HF_REPO, token=HF_TOKEN)
+def format_alpaca_prompt(user_input, system_prompt, history):
+    """Formats input in Alpaca/LLaMA style"""
+    history_str = "\n".join([f"### Instruction:\n{h[0]}\n### Response:\n{h[1]}" for h in history])
+    prompt = f"""{system_prompt}
+{history_str}
+### Instruction:
+{user_input}
+### Response:
+"""
+    return prompt
+def respond(message, history, system_message, max_tokens, temperature, top_p):
+    formatted_prompt = format_alpaca_prompt(message, system_message, history)
+    response = client.text_generation(
+        formatted_prompt,
+        max_new_tokens=max_tokens,
+        temperature=temperature,
+        top_p=top_p,
+    )
+    # ✅ Extract only the response
+    cleaned_response = response.split("### Response:")[-1].strip()
+    history.append((message, cleaned_response))  # ✅ Update history with the new message and response
+    yield cleaned_response  # ✅ Output only the answer
+demo = gr.ChatInterface(
+    respond,
+    additional_inputs=[
+        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+        gr.Slider(minimum=1, maximum=250, value=128, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.9, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.99, step=0.01, label="Top-p (nucleus sampling)"),
+    ],
+)
+if __name__ == "__main__":
+    demo.launch()"
 # Load the base model on CPU
 base_model = AutoModelForCausalLM.from_pretrained(