metatune-20b

Sleeping

App Files Files Community

legolasyiu commited on Oct 29

Commit

ee884f8

verified ·

1 Parent(s): e287708

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -140

app.py CHANGED Viewed

@@ -1,176 +1,91 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer
-import gradio as gr
 import gradio as gr
 from gradio import ChatMessage
 from typing import Iterator
 checkpoint = "EpistemeAI/metatune-gpt20b-R0"
-device = "cuda"  # "cuda" or "cpu"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
-def format_chat_history(messages: list) -> list:
-    """
-    Formats the chat history into a structure Gemini can understand
-    """
-    formatted_history = []
-    for message in messages:
-        # Skip thinking messages (messages with metadata)
-        if not (message.get("role") == "assistant" and "metadata" in message):
-            formatted_history.append({
-                "role": "user" if message.get("role") == "user" else "assistant",
-                "parts": [message.get("content", "")]
-            })
-    return formatted_history
-def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
-    """
-    Streams thoughts and response with conversation history support.
-    """
     try:
-        print(f"\n=== New Request ===")
-        print(f"User message: {user_message}")
-        # Format chat history for Gemini
-        chat_history = format_chat_history(messages)
-        # Initialize Gemini chat
-        chat = model.start_chat(history=chat_history)
-        response = chat.send_message(user_message, stream=True)
-        # Initialize buffers and flags
-        thought_buffer = ""
-        response_buffer = ""
-        thinking_complete = False
-        # Add initial thinking message
-        messages.append(
-            ChatMessage(
-                role="assistant",
-                content="",
-                metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
-            )
         )
-        for chunk in response:
-            parts = chunk.candidates[0].content.parts
-            current_chunk = parts[0].text
-            if len(parts) == 2 and not thinking_complete:
-                # Complete thought and start response
-                thought_buffer += current_chunk
-                print(f"\n=== Complete Thought ===\n{thought_buffer}")
-                messages[-1] = ChatMessage(
-                    role="assistant",
-                    content=thought_buffer,
-                    metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
-                )
-                yield messages
-                # Start response
-                response_buffer = parts[1].text
-                print(f"\n=== Starting Response ===\n{response_buffer}")
-                messages.append(
-                    ChatMessage(
-                        role="assistant",
-                        content=response_buffer
-                    )
-                )
-                thinking_complete = True
-            elif thinking_complete:
-                # Stream response
-                response_buffer += current_chunk
-                print(f"\n=== Response Chunk ===\n{current_chunk}")
-                messages[-1] = ChatMessage(
-                    role="assistant",
-                    content=response_buffer
-                )
-            else:
-                # Stream thinking
-                thought_buffer += current_chunk
-                print(f"\n=== Thinking Chunk ===\n{current_chunk}")
-                messages[-1] = ChatMessage(
-                    role="assistant",
-                    content=thought_buffer,
-                    metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
-                )
-            yield messages
-        print(f"\n=== Final Response ===\n{response_buffer}")
     except Exception as e:
-        print(f"\n=== Error ===\n{str(e)}")
-        messages.append(
-            ChatMessage(
-                role="assistant",
-                content=f"I apologize, but I encountered an error: {str(e)}"
-            )
-        )
         yield messages
-def user_message(msg: str, history: list) -> tuple[str, list]:
-    """Adds user message to chat history"""
     history.append(ChatMessage(role="user", content=msg))
     return "", history
-# Create the Gradio interface
 with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
-  #with gr.Column():
-    gr.Markdown("# Chat with Metatune gpt oss 20b and See its Thoughts 💭")
-    chatbot = gr.Chatbot(
-        type="messages",
-        label="metatune gpt oss 20b 'Thinking' Chatbot",
-        render_markdown=True,
-        scale=1,
-        #avatar_images=(None,"https://lh3.googleusercontent.com/oxz0sUBF0iYoN4VvhqWTmux-cxfD1rxuYkuFEfm1SFaseXEsjjE4Je_C_V3UQPuJ87sImQK3HfQ3RXiaRnQetjaZbjJJUkiPL5jFJ1WRl5FKJZYibUA=w214-h214-n-nu")
-    )
-    with gr.Row(equal_height=True):
-        input_box = gr.Textbox(
-            lines=1,
-            label="Chat Message",
-            placeholder="Type your message here...",
-            scale=4
-        )
-        clear_button = gr.Button("Clear Chat", scale=1)
-    # Set up event handlers
-    msg_store = gr.State("")  # Store for preserving user message
     input_box.submit(
-        lambda msg: (msg, msg, ""),  # Store message and clear input
         inputs=[input_box],
         outputs=[msg_store, input_box, input_box],
-        queue=False
     ).then(
-        user_message,  # Add user message to chat
         inputs=[msg_store, chatbot],
         outputs=[input_box, chatbot],
-        queue=False
     ).then(
-        stream_gemini_response,  # Generate and stream response
         inputs=[msg_store, chatbot],
-        outputs=chatbot
     )
-    clear_button.click(
-        lambda: ([], "", ""),
-        outputs=[chatbot, input_box, msg_store],
-        queue=False
-    )
-# Launch the interface
 if __name__ == "__main__":
     demo.launch(debug=True)

 from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
 import gradio as gr
 from gradio import ChatMessage
 from typing import Iterator
 checkpoint = "EpistemeAI/metatune-gpt20b-R0"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load model + tokenizer
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(
+    checkpoint,
+    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+).to(device)
+def format_history_for_model(messages):
+    """Convert the message list into a single string prompt"""
+    chat_prompt = ""
+    for msg in messages:
+        role = msg["role"]
+        content = msg["content"]
+        if role == "user":
+            chat_prompt += f"User: {content}\n"
+        else:
+            chat_prompt += f"Assistant: {content}\n"
+    return chat_prompt.strip()
+def stream_response(user_message: str, messages: list) -> Iterator[list]:
     try:
+        print(f"User: {user_message}")
+        prompt = format_history_for_model(messages) + f"\nUser: {user_message}\nAssistant:"
+        # Tokenize
+        inputs = tokenizer(prompt, return_tensors="pt").to(device)
+        # Stream output tokens
+        generated = model.generate(
+            **inputs,
+            max_new_tokens=256,
+            temperature=0.7,
+            do_sample=True,
+            top_p=0.9,
+            repetition_penalty=1.1,
+            pad_token_id=tokenizer.eos_token_id,
         )
+        output_text = tokenizer.decode(generated[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
+        # Send back message
+        messages.append(ChatMessage(role="assistant", content=output_text))
+        yield messages
     except Exception as e:
+        messages.append(ChatMessage(role="assistant", content=f"Error: {str(e)}"))
         yield messages
+def user_message(msg: str, history: list):
     history.append(ChatMessage(role="user", content=msg))
     return "", history
+# --- UI ---
 with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
+    gr.Markdown("# Chat with Metatune GPT 20B 💭")
+    chatbot = gr.Chatbot(type="messages", label="Metatune 20B Chatbot", render_markdown=True)
+    with gr.Row():
+        input_box = gr.Textbox(label="Message", placeholder="Type your message here...")
+        clear_button = gr.Button("Clear")
+    msg_store = gr.State("")
     input_box.submit(
+        lambda msg: (msg, msg, ""),
         inputs=[input_box],
         outputs=[msg_store, input_box, input_box],
+        queue=False,
     ).then(
+        user_message,
         inputs=[msg_store, chatbot],
         outputs=[input_box, chatbot],
+        queue=False,
     ).then(
+        stream_response,
         inputs=[msg_store, chatbot],
+        outputs=chatbot,
     )
+    clear_button.click(lambda: ([], "", ""), outputs=[chatbot, input_box, msg_store])
 if __name__ == "__main__":
     demo.launch(debug=True)