legolasyiu commited on
Commit
ee884f8
·
verified ·
1 Parent(s): e287708

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -140
app.py CHANGED
@@ -1,176 +1,91 @@
1
-
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
- import gradio as gr
4
  import gradio as gr
5
  from gradio import ChatMessage
6
  from typing import Iterator
7
 
8
  checkpoint = "EpistemeAI/metatune-gpt20b-R0"
9
- device = "cuda" # "cuda" or "cpu"
10
- tokenizer = AutoTokenizer.from_pretrained(checkpoint)
11
- model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
12
 
 
 
 
 
 
 
13
 
14
- def format_chat_history(messages: list) -> list:
15
- """
16
- Formats the chat history into a structure Gemini can understand
17
- """
18
- formatted_history = []
19
- for message in messages:
20
- # Skip thinking messages (messages with metadata)
21
- if not (message.get("role") == "assistant" and "metadata" in message):
22
- formatted_history.append({
23
- "role": "user" if message.get("role") == "user" else "assistant",
24
- "parts": [message.get("content", "")]
25
- })
26
- return formatted_history
27
 
28
- def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
29
- """
30
- Streams thoughts and response with conversation history support.
31
- """
32
  try:
33
- print(f"\n=== New Request ===")
34
- print(f"User message: {user_message}")
35
-
36
- # Format chat history for Gemini
37
- chat_history = format_chat_history(messages)
38
-
39
- # Initialize Gemini chat
40
- chat = model.start_chat(history=chat_history)
41
- response = chat.send_message(user_message, stream=True)
42
-
43
- # Initialize buffers and flags
44
- thought_buffer = ""
45
- response_buffer = ""
46
- thinking_complete = False
47
 
48
- # Add initial thinking message
49
- messages.append(
50
- ChatMessage(
51
- role="assistant",
52
- content="",
53
- metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
54
- )
 
 
55
  )
 
56
 
57
- for chunk in response:
58
- parts = chunk.candidates[0].content.parts
59
- current_chunk = parts[0].text
60
-
61
- if len(parts) == 2 and not thinking_complete:
62
- # Complete thought and start response
63
- thought_buffer += current_chunk
64
- print(f"\n=== Complete Thought ===\n{thought_buffer}")
65
-
66
- messages[-1] = ChatMessage(
67
- role="assistant",
68
- content=thought_buffer,
69
- metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
70
- )
71
- yield messages
72
-
73
- # Start response
74
- response_buffer = parts[1].text
75
- print(f"\n=== Starting Response ===\n{response_buffer}")
76
-
77
- messages.append(
78
- ChatMessage(
79
- role="assistant",
80
- content=response_buffer
81
- )
82
- )
83
- thinking_complete = True
84
-
85
- elif thinking_complete:
86
- # Stream response
87
- response_buffer += current_chunk
88
- print(f"\n=== Response Chunk ===\n{current_chunk}")
89
-
90
- messages[-1] = ChatMessage(
91
- role="assistant",
92
- content=response_buffer
93
- )
94
-
95
- else:
96
- # Stream thinking
97
- thought_buffer += current_chunk
98
- print(f"\n=== Thinking Chunk ===\n{current_chunk}")
99
-
100
- messages[-1] = ChatMessage(
101
- role="assistant",
102
- content=thought_buffer,
103
- metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
104
- )
105
-
106
- yield messages
107
-
108
- print(f"\n=== Final Response ===\n{response_buffer}")
109
-
110
  except Exception as e:
111
- print(f"\n=== Error ===\n{str(e)}")
112
- messages.append(
113
- ChatMessage(
114
- role="assistant",
115
- content=f"I apologize, but I encountered an error: {str(e)}"
116
- )
117
- )
118
  yield messages
119
 
120
- def user_message(msg: str, history: list) -> tuple[str, list]:
121
- """Adds user message to chat history"""
122
  history.append(ChatMessage(role="user", content=msg))
123
  return "", history
124
-
125
 
126
- # Create the Gradio interface
127
  with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
128
- #with gr.Column():
129
- gr.Markdown("# Chat with Metatune gpt oss 20b and See its Thoughts 💭")
130
 
131
- chatbot = gr.Chatbot(
132
- type="messages",
133
- label="metatune gpt oss 20b 'Thinking' Chatbot",
134
- render_markdown=True,
135
- scale=1,
136
- #avatar_images=(None,"https://lh3.googleusercontent.com/oxz0sUBF0iYoN4VvhqWTmux-cxfD1rxuYkuFEfm1SFaseXEsjjE4Je_C_V3UQPuJ87sImQK3HfQ3RXiaRnQetjaZbjJJUkiPL5jFJ1WRl5FKJZYibUA=w214-h214-n-nu")
137
- )
138
 
139
- with gr.Row(equal_height=True):
140
- input_box = gr.Textbox(
141
- lines=1,
142
- label="Chat Message",
143
- placeholder="Type your message here...",
144
- scale=4
145
- )
146
-
147
- clear_button = gr.Button("Clear Chat", scale=1)
148
 
149
- # Set up event handlers
150
- msg_store = gr.State("") # Store for preserving user message
151
-
152
  input_box.submit(
153
- lambda msg: (msg, msg, ""), # Store message and clear input
154
  inputs=[input_box],
155
  outputs=[msg_store, input_box, input_box],
156
- queue=False
157
  ).then(
158
- user_message, # Add user message to chat
159
  inputs=[msg_store, chatbot],
160
  outputs=[input_box, chatbot],
161
- queue=False
162
  ).then(
163
- stream_gemini_response, # Generate and stream response
164
  inputs=[msg_store, chatbot],
165
- outputs=chatbot
166
  )
167
 
168
- clear_button.click(
169
- lambda: ([], "", ""),
170
- outputs=[chatbot, input_box, msg_store],
171
- queue=False
172
- )
173
 
174
- # Launch the interface
175
  if __name__ == "__main__":
176
  demo.launch(debug=True)
 
 
1
  from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import torch
3
  import gradio as gr
4
  from gradio import ChatMessage
5
  from typing import Iterator
6
 
7
  checkpoint = "EpistemeAI/metatune-gpt20b-R0"
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
9
 
10
+ # Load model + tokenizer
11
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ checkpoint,
14
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
15
+ ).to(device)
16
 
17
+ def format_history_for_model(messages):
18
+ """Convert the message list into a single string prompt"""
19
+ chat_prompt = ""
20
+ for msg in messages:
21
+ role = msg["role"]
22
+ content = msg["content"]
23
+ if role == "user":
24
+ chat_prompt += f"User: {content}\n"
25
+ else:
26
+ chat_prompt += f"Assistant: {content}\n"
27
+ return chat_prompt.strip()
 
 
28
 
29
+ def stream_response(user_message: str, messages: list) -> Iterator[list]:
 
 
 
30
  try:
31
+ print(f"User: {user_message}")
32
+ prompt = format_history_for_model(messages) + f"\nUser: {user_message}\nAssistant:"
33
+
34
+ # Tokenize
35
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
 
 
 
 
 
 
 
 
 
36
 
37
+ # Stream output tokens
38
+ generated = model.generate(
39
+ **inputs,
40
+ max_new_tokens=256,
41
+ temperature=0.7,
42
+ do_sample=True,
43
+ top_p=0.9,
44
+ repetition_penalty=1.1,
45
+ pad_token_id=tokenizer.eos_token_id,
46
  )
47
+ output_text = tokenizer.decode(generated[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
48
 
49
+ # Send back message
50
+ messages.append(ChatMessage(role="assistant", content=output_text))
51
+ yield messages
52
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  except Exception as e:
54
+ messages.append(ChatMessage(role="assistant", content=f"Error: {str(e)}"))
 
 
 
 
 
 
55
  yield messages
56
 
57
+ def user_message(msg: str, history: list):
 
58
  history.append(ChatMessage(role="user", content=msg))
59
  return "", history
 
60
 
61
+ # --- UI ---
62
  with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
63
+ gr.Markdown("# Chat with Metatune GPT 20B 💭")
 
64
 
65
+ chatbot = gr.Chatbot(type="messages", label="Metatune 20B Chatbot", render_markdown=True)
66
+ with gr.Row():
67
+ input_box = gr.Textbox(label="Message", placeholder="Type your message here...")
68
+ clear_button = gr.Button("Clear")
 
 
 
69
 
70
+ msg_store = gr.State("")
 
 
 
 
 
 
 
 
71
 
 
 
 
72
  input_box.submit(
73
+ lambda msg: (msg, msg, ""),
74
  inputs=[input_box],
75
  outputs=[msg_store, input_box, input_box],
76
+ queue=False,
77
  ).then(
78
+ user_message,
79
  inputs=[msg_store, chatbot],
80
  outputs=[input_box, chatbot],
81
+ queue=False,
82
  ).then(
83
+ stream_response,
84
  inputs=[msg_store, chatbot],
85
+ outputs=chatbot,
86
  )
87
 
88
+ clear_button.click(lambda: ([], "", ""), outputs=[chatbot, input_box, msg_store])
 
 
 
 
89
 
 
90
  if __name__ == "__main__":
91
  demo.launch(debug=True)