Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,036 Bytes
d973e29 4034e3f d973e29 4034e3f d973e29 4034e3f d973e29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import gradio as gr
import spaces
from transformers import pipeline
import torch
MODEL_ID = "LLM360/K2-Think"
pipe = pipeline(
"text-generation",
model=MODEL_ID,
torch_dtype="auto",
device_map="auto",
)
@spaces.GPU(duration=120)
def respond(message, history):
if history is None:
history = []
new_history = history + [{"role": "user", "content": message}]
outputs = pipe(
new_history,
max_new_tokens=32768,
)
response = outputs[0]["generated_text"][-1]["content"]
new_history.append({"role": "assistant", "content": response})
return "", new_history
with gr.Blocks(title="K2-Think Chat") as demo:
gr.Markdown("# K2-Think Chat App")
chatbot = gr.Chatbot(type="messages", height=500)
msg = gr.Textbox(placeholder="Type your message here...", scale=7)
clear_btn = gr.Button("Clear Chat")
msg.submit(respond, [msg, chatbot], [msg, chatbot])
clear_btn.click(lambda: None, None, chatbot, queue=False)
if __name__ == "__main__":
demo.launch() |