ak0601 commited on
Commit
b97cf6d
·
verified ·
1 Parent(s): 118bf1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -11
app.py CHANGED
@@ -1,16 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- import os
3
- os.environ["UNSLOTH_DEVICE"] = "cuda"
4
- from unsloth import FastLanguageModel
5
  import torch
 
 
 
6
  HF_TOKEN = os.environ["HF_TOKEN"]
 
7
  # -------------------- Load Model --------------------
8
- model, tokenizer = FastLanguageModel.from_pretrained(
9
- model_name="ak0601/gpt-oss-20b-persona-chat", # your trained model
10
- max_seq_length=1024,
11
- dtype=None,
12
- load_in_4bit=True,
13
- device_map="auto",
 
 
 
14
  token=HF_TOKEN
15
  )
16
 
@@ -49,10 +138,10 @@ def generate_reply(conversation):
49
  # -------------------- Gradio Functions --------------------
50
  def start_chat(persona):
51
  conversation = [
52
- {"role": "system", "content": f"""You are a digital twin.
53
  ONLY respond based on persona and user input.
54
  \nPersona: {persona}"""},
55
- ]
56
  return conversation, [(None, "How can I help you?")]
57
 
58
  def chat(user_message, history, conversation):
 
1
+ # import gradio as gr
2
+ # import os
3
+ # os.environ["UNSLOTH_DEVICE"] = "cuda"
4
+ # from unsloth import FastLanguageModel
5
+ # import torch
6
+ # HF_TOKEN = os.environ["HF_TOKEN"]
7
+ # # -------------------- Load Model --------------------
8
+ # model, tokenizer = FastLanguageModel.from_pretrained(
9
+ # model_name="ak0601/gpt-oss-20b-persona-chat", # your trained model
10
+ # max_seq_length=1024,
11
+ # dtype=None,
12
+ # load_in_4bit=True,
13
+ # device_map="auto",
14
+ # token=HF_TOKEN
15
+ # )
16
+
17
+ # # -------------------- Conversation Formatter --------------------
18
+ # def format_conversation(conversation):
19
+ # text = ""
20
+ # for turn in conversation:
21
+ # if turn["role"] == "system":
22
+ # text += f"[SYSTEM] {turn['content']}\n"
23
+ # elif turn["role"] == "user":
24
+ # text += f"[USER] {turn['content']}\n"
25
+ # elif turn["role"] == "assistant":
26
+ # text += f"[ASSISTANT] {turn['content']}\n"
27
+ # text += "[ASSISTANT]"
28
+ # return text
29
+
30
+ # def generate_reply(conversation):
31
+ # inputs = tokenizer(
32
+ # format_conversation(conversation),
33
+ # return_tensors="pt"
34
+ # ).to(model.device)
35
+
36
+ # output_ids = model.generate(
37
+ # **inputs,
38
+ # max_new_tokens=256,
39
+ # temperature=0.7,
40
+ # top_p=0.9,
41
+ # repetition_penalty=1.1,
42
+ # eos_token_id=tokenizer.eos_token_id,
43
+ # )
44
+
45
+ # response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
46
+ # response = response.split("[ASSISTANT]")[-1].strip()
47
+ # return response
48
+
49
+ # # -------------------- Gradio Functions --------------------
50
+ # def start_chat(persona):
51
+ # conversation = [
52
+ # {"role": "system", "content": f"""You are a digital twin.
53
+ # ONLY respond based on persona and user input.
54
+ # \nPersona: {persona}"""},
55
+ # ]
56
+ # return conversation, [(None, "How can I help you?")]
57
+
58
+ # def chat(user_message, history, conversation):
59
+ # conversation.append({"role": "user", "content": user_message})
60
+ # reply = generate_reply(conversation)
61
+ # conversation.append({"role": "assistant", "content": reply})
62
+ # history.append((user_message, reply))
63
+ # return history, conversation
64
+
65
+ # # -------------------- Gradio UI --------------------
66
+ # with gr.Blocks() as demo:
67
+ # gr.Markdown("## 🤖 Digital Twin Chat")
68
+
69
+ # persona_box = gr.Textbox(label="Enter your persona",
70
+ # value="I am male. I am unsociable. I have a weakness for sweets. I am a jack of all, master of none.")
71
+ # start_btn = gr.Button("Start Chat")
72
+
73
+ # chatbot = gr.Chatbot()
74
+ # msg = gr.Textbox(label="Your message")
75
+
76
+ # state_conversation = gr.State([])
77
+ # state_history = gr.State([])
78
+
79
+ # start_btn.click(start_chat, inputs=persona_box, outputs=[state_conversation, chatbot])
80
+ # msg.submit(chat, inputs=[msg, chatbot, state_conversation], outputs=[chatbot, state_conversation])
81
+
82
+ # demo.launch()
83
+
84
+
85
+
86
  import gradio as gr
 
 
 
87
  import torch
88
+ import os
89
+ from transformers import AutoModelForCausalLM, AutoTokenizer
90
+
91
  HF_TOKEN = os.environ["HF_TOKEN"]
92
+
93
  # -------------------- Load Model --------------------
94
+ model_name = "ak0601/gpt-oss-20b-persona-chat"
95
+
96
+ tokenizer = AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN)
97
+
98
+ model = AutoModelForCausalLM.from_pretrained(
99
+ model_name,
100
+ device_map="auto", # automatically places model on GPU
101
+ torch_dtype=torch.float16, # efficient for H200
102
+ load_in_4bit=True, # quantization if available
103
  token=HF_TOKEN
104
  )
105
 
 
138
  # -------------------- Gradio Functions --------------------
139
  def start_chat(persona):
140
  conversation = [
141
+ {"role": "system", "content": f"""You are a digital twin.
142
  ONLY respond based on persona and user input.
143
  \nPersona: {persona}"""},
144
+ ]
145
  return conversation, [(None, "How can I help you?")]
146
 
147
  def chat(user_message, history, conversation):