# import gradio as gr # import os # os.environ["UNSLOTH_DEVICE"] = "cuda" # from unsloth import FastLanguageModel # import torch # HF_TOKEN = os.environ["HF_TOKEN"] # # -------------------- Load Model -------------------- # model, tokenizer = FastLanguageModel.from_pretrained( # model_name="ak0601/gpt-oss-20b-persona-chat", # your trained model # max_seq_length=1024, # dtype=None, # load_in_4bit=True, # device_map="auto", # token=HF_TOKEN # ) # # -------------------- Conversation Formatter -------------------- # def format_conversation(conversation): # text = "" # for turn in conversation: # if turn["role"] == "system": # text += f"[SYSTEM] {turn['content']}\n" # elif turn["role"] == "user": # text += f"[USER] {turn['content']}\n" # elif turn["role"] == "assistant": # text += f"[ASSISTANT] {turn['content']}\n" # text += "[ASSISTANT]" # return text # def generate_reply(conversation): # inputs = tokenizer( # format_conversation(conversation), # return_tensors="pt" # ).to(model.device) # output_ids = model.generate( # **inputs, # max_new_tokens=256, # temperature=0.7, # top_p=0.9, # repetition_penalty=1.1, # eos_token_id=tokenizer.eos_token_id, # ) # response = tokenizer.decode(output_ids[0], skip_special_tokens=True) # response = response.split("[ASSISTANT]")[-1].strip() # return response # # -------------------- Gradio Functions -------------------- # def start_chat(persona): # conversation = [ # {"role": "system", "content": f"""You are a digital twin. # ONLY respond based on persona and user input. # \nPersona: {persona}"""}, # ] # return conversation, [(None, "How can I help you?")] # def chat(user_message, history, conversation): # conversation.append({"role": "user", "content": user_message}) # reply = generate_reply(conversation) # conversation.append({"role": "assistant", "content": reply}) # history.append((user_message, reply)) # return history, conversation # # -------------------- Gradio UI -------------------- # with gr.Blocks() as demo: # gr.Markdown("## 🤖 Digital Twin Chat") # persona_box = gr.Textbox(label="Enter your persona", # value="I am male. I am unsociable. I have a weakness for sweets. I am a jack of all, master of none.") # start_btn = gr.Button("Start Chat") # chatbot = gr.Chatbot() # msg = gr.Textbox(label="Your message") # state_conversation = gr.State([]) # state_history = gr.State([]) # start_btn.click(start_chat, inputs=persona_box, outputs=[state_conversation, chatbot]) # msg.submit(chat, inputs=[msg, chatbot, state_conversation], outputs=[chatbot, state_conversation]) # demo.launch() import gradio as gr import torch import os from transformers import AutoModelForCausalLM, AutoTokenizer HF_TOKEN = os.environ["HF_TOKEN"] # -------------------- Load Model -------------------- model_name = "ak0601/gpt-oss-20b-persona-chat" tokenizer = AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN) model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", # automatically places model on GPU torch_dtype=torch.float16, # efficient for H200 load_in_4bit=True, # quantization if available token=HF_TOKEN ) # -------------------- Conversation Formatter -------------------- def format_conversation(conversation): text = "" for turn in conversation: if turn["role"] == "system": text += f"[SYSTEM] {turn['content']}\n" elif turn["role"] == "user": text += f"[USER] {turn['content']}\n" elif turn["role"] == "assistant": text += f"[ASSISTANT] {turn['content']}\n" text += "[ASSISTANT]" return text def generate_reply(conversation): inputs = tokenizer( format_conversation(conversation), return_tensors="pt" ).to(model.device) output_ids = model.generate( **inputs, max_new_tokens=256, temperature=0.7, top_p=0.9, repetition_penalty=1.1, eos_token_id=tokenizer.eos_token_id, ) response = tokenizer.decode(output_ids[0], skip_special_tokens=True) response = response.split("[ASSISTANT]")[-1].strip() return response # -------------------- Gradio Functions -------------------- def start_chat(persona): conversation = [ {"role": "system", "content": f"""You are a digital twin. ONLY respond based on persona and user input. \nPersona: {persona}"""}, ] return conversation, [(None, "How can I help you?")] def chat(user_message, history, conversation): conversation.append({"role": "user", "content": user_message}) reply = generate_reply(conversation) conversation.append({"role": "assistant", "content": reply}) history.append((user_message, reply)) return history, conversation # -------------------- Gradio UI -------------------- with gr.Blocks() as demo: gr.Markdown("## 🤖 Digital Twin Chat") persona_box = gr.Textbox(label="Enter your persona", value="I am male. I am unsociable. I have a weakness for sweets. I am a jack of all, master of none.") start_btn = gr.Button("Start Chat") chatbot = gr.Chatbot() msg = gr.Textbox(label="Your message") state_conversation = gr.State([]) state_history = gr.State([]) start_btn.click(start_chat, inputs=persona_box, outputs=[state_conversation, chatbot]) msg.submit(chat, inputs=[msg, chatbot, state_conversation], outputs=[chatbot, state_conversation]) demo.launch()