ak0601's picture
Update app.py
b97cf6d verified
# import gradio as gr
# import os
# os.environ["UNSLOTH_DEVICE"] = "cuda"
# from unsloth import FastLanguageModel
# import torch
# HF_TOKEN = os.environ["HF_TOKEN"]
# # -------------------- Load Model --------------------
# model, tokenizer = FastLanguageModel.from_pretrained(
# model_name="ak0601/gpt-oss-20b-persona-chat", # your trained model
# max_seq_length=1024,
# dtype=None,
# load_in_4bit=True,
# device_map="auto",
# token=HF_TOKEN
# )
# # -------------------- Conversation Formatter --------------------
# def format_conversation(conversation):
# text = ""
# for turn in conversation:
# if turn["role"] == "system":
# text += f"[SYSTEM] {turn['content']}\n"
# elif turn["role"] == "user":
# text += f"[USER] {turn['content']}\n"
# elif turn["role"] == "assistant":
# text += f"[ASSISTANT] {turn['content']}\n"
# text += "[ASSISTANT]"
# return text
# def generate_reply(conversation):
# inputs = tokenizer(
# format_conversation(conversation),
# return_tensors="pt"
# ).to(model.device)
# output_ids = model.generate(
# **inputs,
# max_new_tokens=256,
# temperature=0.7,
# top_p=0.9,
# repetition_penalty=1.1,
# eos_token_id=tokenizer.eos_token_id,
# )
# response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# response = response.split("[ASSISTANT]")[-1].strip()
# return response
# # -------------------- Gradio Functions --------------------
# def start_chat(persona):
# conversation = [
# {"role": "system", "content": f"""You are a digital twin.
# ONLY respond based on persona and user input.
# \nPersona: {persona}"""},
# ]
# return conversation, [(None, "How can I help you?")]
# def chat(user_message, history, conversation):
# conversation.append({"role": "user", "content": user_message})
# reply = generate_reply(conversation)
# conversation.append({"role": "assistant", "content": reply})
# history.append((user_message, reply))
# return history, conversation
# # -------------------- Gradio UI --------------------
# with gr.Blocks() as demo:
# gr.Markdown("## πŸ€– Digital Twin Chat")
# persona_box = gr.Textbox(label="Enter your persona",
# value="I am male. I am unsociable. I have a weakness for sweets. I am a jack of all, master of none.")
# start_btn = gr.Button("Start Chat")
# chatbot = gr.Chatbot()
# msg = gr.Textbox(label="Your message")
# state_conversation = gr.State([])
# state_history = gr.State([])
# start_btn.click(start_chat, inputs=persona_box, outputs=[state_conversation, chatbot])
# msg.submit(chat, inputs=[msg, chatbot, state_conversation], outputs=[chatbot, state_conversation])
# demo.launch()
import gradio as gr
import torch
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
HF_TOKEN = os.environ["HF_TOKEN"]
# -------------------- Load Model --------------------
model_name = "ak0601/gpt-oss-20b-persona-chat"
tokenizer = AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto", # automatically places model on GPU
torch_dtype=torch.float16, # efficient for H200
load_in_4bit=True, # quantization if available
token=HF_TOKEN
)
# -------------------- Conversation Formatter --------------------
def format_conversation(conversation):
text = ""
for turn in conversation:
if turn["role"] == "system":
text += f"[SYSTEM] {turn['content']}\n"
elif turn["role"] == "user":
text += f"[USER] {turn['content']}\n"
elif turn["role"] == "assistant":
text += f"[ASSISTANT] {turn['content']}\n"
text += "[ASSISTANT]"
return text
def generate_reply(conversation):
inputs = tokenizer(
format_conversation(conversation),
return_tensors="pt"
).to(model.device)
output_ids = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.1,
eos_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
response = response.split("[ASSISTANT]")[-1].strip()
return response
# -------------------- Gradio Functions --------------------
def start_chat(persona):
conversation = [
{"role": "system", "content": f"""You are a digital twin.
ONLY respond based on persona and user input.
\nPersona: {persona}"""},
]
return conversation, [(None, "How can I help you?")]
def chat(user_message, history, conversation):
conversation.append({"role": "user", "content": user_message})
reply = generate_reply(conversation)
conversation.append({"role": "assistant", "content": reply})
history.append((user_message, reply))
return history, conversation
# -------------------- Gradio UI --------------------
with gr.Blocks() as demo:
gr.Markdown("## πŸ€– Digital Twin Chat")
persona_box = gr.Textbox(label="Enter your persona",
value="I am male. I am unsociable. I have a weakness for sweets. I am a jack of all, master of none.")
start_btn = gr.Button("Start Chat")
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Your message")
state_conversation = gr.State([])
state_history = gr.State([])
start_btn.click(start_chat, inputs=persona_box, outputs=[state_conversation, chatbot])
msg.submit(chat, inputs=[msg, chatbot, state_conversation], outputs=[chatbot, state_conversation])
demo.launch()