Spaces:

HCF-Studios
/

gpt-oss-20b-person_chat

Runtime error

App Files Files Community

ak0601 commited on Sep 19

Commit

b97cf6d

verified ·

1 Parent(s): 118bf1e

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -11

app.py CHANGED Viewed

@@ -1,16 +1,105 @@
 import gradio as gr
-import os
-os.environ["UNSLOTH_DEVICE"] = "cuda"
-from unsloth import FastLanguageModel
 import torch
 HF_TOKEN = os.environ["HF_TOKEN"]
 # -------------------- Load Model --------------------
-model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name="ak0601/gpt-oss-20b-persona-chat",  # your trained model
-    max_seq_length=1024,
-    dtype=None,
-    load_in_4bit=True,
-    device_map="auto",
     token=HF_TOKEN
 )
@@ -49,10 +138,10 @@ def generate_reply(conversation):
 # -------------------- Gradio Functions --------------------
 def start_chat(persona):
     conversation = [
-    {"role": "system", "content": f"""You are a digital twin.
 ONLY respond based on persona and user input.
 \nPersona: {persona}"""},
-]
     return conversation, [(None, "How can I help you?")]
 def chat(user_message, history, conversation):

+# import gradio as gr
+# import os
+# os.environ["UNSLOTH_DEVICE"] = "cuda"
+# from unsloth import FastLanguageModel
+# import torch
+# HF_TOKEN = os.environ["HF_TOKEN"]
+# # -------------------- Load Model --------------------
+# model, tokenizer = FastLanguageModel.from_pretrained(
+#     model_name="ak0601/gpt-oss-20b-persona-chat",  # your trained model
+#     max_seq_length=1024,
+#     dtype=None,
+#     load_in_4bit=True,
+#     device_map="auto",
+#     token=HF_TOKEN
+# )
+# # -------------------- Conversation Formatter --------------------
+# def format_conversation(conversation):
+#     text = ""
+#     for turn in conversation:
+#         if turn["role"] == "system":
+#             text += f"[SYSTEM] {turn['content']}\n"
+#         elif turn["role"] == "user":
+#             text += f"[USER] {turn['content']}\n"
+#         elif turn["role"] == "assistant":
+#             text += f"[ASSISTANT] {turn['content']}\n"
+#     text += "[ASSISTANT]"
+#     return text
+# def generate_reply(conversation):
+#     inputs = tokenizer(
+#         format_conversation(conversation),
+#         return_tensors="pt"
+#     ).to(model.device)
+#     output_ids = model.generate(
+#         **inputs,
+#         max_new_tokens=256,
+#         temperature=0.7,
+#         top_p=0.9,
+#         repetition_penalty=1.1,
+#         eos_token_id=tokenizer.eos_token_id,
+#     )
+#     response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+#     response = response.split("[ASSISTANT]")[-1].strip()
+#     return response
+# # -------------------- Gradio Functions --------------------
+# def start_chat(persona):
+#     conversation = [
+#     {"role": "system", "content": f"""You are a digital twin.
+# ONLY respond based on persona and user input.
+# \nPersona: {persona}"""},
+# ]
+#     return conversation, [(None, "How can I help you?")]
+# def chat(user_message, history, conversation):
+#     conversation.append({"role": "user", "content": user_message})
+#     reply = generate_reply(conversation)
+#     conversation.append({"role": "assistant", "content": reply})
+#     history.append((user_message, reply))
+#     return history, conversation
+# # -------------------- Gradio UI --------------------
+# with gr.Blocks() as demo:
+#     gr.Markdown("## 🤖 Digital Twin Chat")
+#     persona_box = gr.Textbox(label="Enter your persona",
+#                              value="I am male. I am unsociable. I have a weakness for sweets. I am a jack of all, master of none.")
+#     start_btn = gr.Button("Start Chat")
+#     chatbot = gr.Chatbot()
+#     msg = gr.Textbox(label="Your message")
+#     state_conversation = gr.State([])
+#     state_history = gr.State([])
+#     start_btn.click(start_chat, inputs=persona_box, outputs=[state_conversation, chatbot])
+#     msg.submit(chat, inputs=[msg, chatbot, state_conversation], outputs=[chatbot, state_conversation])
+# demo.launch()
 import gradio as gr
 import torch
+import os
+from transformers import AutoModelForCausalLM, AutoTokenizer
 HF_TOKEN = os.environ["HF_TOKEN"]
 # -------------------- Load Model --------------------
+model_name = "ak0601/gpt-oss-20b-persona-chat"
+tokenizer = AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    device_map="auto",         # automatically places model on GPU
+    torch_dtype=torch.float16, # efficient for H200
+    load_in_4bit=True,         # quantization if available
     token=HF_TOKEN
 )
 # -------------------- Gradio Functions --------------------
 def start_chat(persona):
     conversation = [
+        {"role": "system", "content": f"""You are a digital twin.
 ONLY respond based on persona and user input.
 \nPersona: {persona}"""},
+    ]
     return conversation, [(None, "How can I help you?")]
 def chat(user_message, history, conversation):