Spaces:

ZoroaStrella
/

RekaFlash

Running on Zero

App Files Files Community

ZoroaStrella commited on Mar 11

Commit

e0e0cdd

1 Parent(s): 96ecc26

Update code token

Browse files

Files changed (1) hide show

app.py +88 -23

app.py CHANGED Viewed

@@ -1,36 +1,101 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-import os
 client = InferenceClient(model="RekaAI/reka-flash-3", token=os.getenv("HF_TOKEN"))
-def generate_response(message, chat_history, system_prompt="You are a helpful assistant.",
-                     max_length=512, temperature=0.7, top_p=0.9, top_k=50, repetition_penalty=1.0):
-    full_prompt = f"{system_prompt}\n\n"
-    for turn in chat_history:
-        full_prompt += f"{turn['role'].capitalize()}: {turn['content']}\n"
-    full_prompt += f"Human: {message}\nAssistant:"
     response = client.text_generation(
-        full_prompt,
-        max_new_tokens=max_length,
         temperature=temperature,
         top_p=top_p,
         top_k=top_k,
-        repetition_penalty=repetition_penalty,
-        stop_sequences=["\nHuman:", "\nAssistant:"]
     )
-    generated_text = response.strip()
-    chat_history.append({"role": "user", "content": message})
-    chat_history.append({"role": "assistant", "content": generated_text})
-    return "", chat_history
 with gr.Blocks() as demo:
-    chatbot = gr.Chatbot(type="messages")
-    msg = gr.Textbox()
-    clear = gr.Button("Clear")
-    msg.submit(generate_response, [msg, chatbot], [msg, chatbot])
-    clear.click(lambda: None, None, chatbot, queue=False)
-demo.launch()

+import os
 import gradio as gr
 from huggingface_hub import InferenceClient
+# Initialize the Inference Client
 client = InferenceClient(model="RekaAI/reka-flash-3", token=os.getenv("HF_TOKEN"))
+# Helper function to format the conversation history into a prompt
+def format_history(history):
+    prompt = "You are a helpful and harmless assistant.\n\n"
+    for item in history:
+        if item["role"] == "user":
+            prompt += f"Human: {item['content']}\n"
+        elif item["role"] == "assistant":
+            prompt += f"Assistant: {item['content']}\n"
+    prompt += "Assistant:"
+    return prompt
+# Function to handle message submission and response generation
+def submit(message, history, temperature, max_new_tokens, top_p, top_k):
+    # Add user's message to history
+    history = history + [{"role": "user", "content": message}]
+    # Add a "Thinking..." message to simulate the model's reasoning phase
+    thinking_message = {"role": "assistant", "content": "Thinking..."}
+    history = history + [thinking_message]
+    yield history, history  # Update chatbot and state
+    # Format the prompt excluding the "Thinking..." message
+    prompt = format_history(history[:-1])
+    # Stream the response from the Inference API
     response = client.text_generation(
+        prompt,
+        max_new_tokens=max_new_tokens,
         temperature=temperature,
         top_p=top_p,
         top_k=top_k,
+        repetition_penalty=1.0,
+        stop_sequences=["\nHuman:", "\nAssistant:"],
+        stream=True
     )
+    # Simulate "thinking" phase with the first 5 chunks
+    thought_chunks = 0
+    max_thought_chunks = 5
+    accumulated_thought = ""
+    for chunk in response:
+        if thought_chunks < max_thought_chunks:
+            accumulated_thought += chunk
+            thinking_message["content"] = "Thinking: " + accumulated_thought
+            thought_chunks += 1
+            if thought_chunks == max_thought_chunks:
+                # Finalize the "Thought" message and start the "Answer" message
+                thinking_message["content"] = "Thought: " + accumulated_thought
+                answer_message = {"role": "assistant", "content": "Answer:"}
+                history = history + [answer_message]
+        else:
+            # Append subsequent chunks to the "Answer" message
+            answer_message["content"] += chunk
+        yield history, history  # Update UI with each chunk
+    # Finalize the response
+    if 'answer_message' in locals():
+        answer_message["content"] += "\n\n[End of response]"
+    else:
+        thinking_message["content"] += "\n\n[No response generated]"
+    yield history, history
+# Build the Gradio interface
 with gr.Blocks() as demo:
+    # State to store the conversation history
+    history_state = gr.State([])
+    # Chatbot component to display messages
+    chatbot = gr.Chatbot(type="messages", height=400, label="Conversation")
+    # Layout with settings and input area
+    with gr.Row():
+        with gr.Column(scale=1):
+            # Advanced settings in a collapsible panel
+            with gr.Accordion("Advanced Settings", open=False):
+                temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=0.7)
+                max_tokens = gr.Slider(label="Max Tokens", minimum=1, maximum=1024, step=1, value=512)
+                top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, step=0.1, value=0.9)
+                top_k = gr.Slider(label="Top K", minimum=1, maximum=100, step=1, value=50)
+        with gr.Column(scale=4):
+            # Textbox for user input and buttons
+            textbox = gr.Textbox(label="Your message")
+            submit_btn = gr.Button("Submit")
+            clear_btn = gr.Button("Clear")
+    # Connect the submit button to the submit function
+    submit_btn.click(
+        submit,
+        inputs=[textbox, history_state, temperature, max_tokens, top_p, top_k],
+        outputs=[chatbot, history_state]
+    )
+    # Clear button resets the conversation
+    clear_btn.click(lambda: ([], []), outputs=[chatbot, history_state])
+# Launch the application
+if __name__ == "__main__":
+    demo.queue().launch()