Spaces:

algorithmicsuperintelligence
/

OptiLLM

Running

App Files Files Community

codelion commited on Sep 19, 2024

Commit

4527045

verified ·

1 Parent(s): 6e74755

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -14

app.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 from optillm.moa import mixture_of_agents
 def respond(
     model,
@@ -12,7 +15,8 @@ def respond(
     temperature,
     top_p,
 ):
-    client = InferenceClient(model)
     messages = [{"role": "system", "content": system_message}]
     for val in history:
@@ -23,19 +27,22 @@ def respond(
     messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
@@ -44,10 +51,11 @@ demo = gr.ChatInterface(
     respond,
     additional_inputs=[
         gr.Dropdown(
-            ["meta-llama/Meta-Llama-3.1-70B-Instruct", "meta-llama/Meta-Llama-3.1-8B-Instruct", "HuggingFaceH4/zephyr-7b-beta"], label="Model", info="Choose the base model"
         ),
         gr.Dropdown(
-            ["bon", "mcts", "moa"], label="Approach", info="Choose the approach"
         ),
         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),

 import gradio as gr
 from huggingface_hub import InferenceClient
 from optillm.moa import mixture_of_agents
+from optillm.mcts import chat_with_mcts
+from optillm.bon import best_of_n_sampling
 def respond(
     model,
     temperature,
     top_p,
 ):
+    # client = InferenceClient(model)
     messages = [{"role": "system", "content": system_message}]
     for val in history:
     messages.append({"role": "user", "content": message})
+    # response = ""
+    final_response = mixture_of_agents(system_message, message, client, model)
+    return final_response
+    # for message in client.chat_completion(
+    #     messages,
+    #     max_tokens=max_tokens,
+    #     stream=True,
+    #     temperature=temperature,
+    #     top_p=top_p,
+    # ):
+    #     token = message.choices[0].delta.content
+    #     response += token
+    #     yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
     respond,
     additional_inputs=[
         gr.Dropdown(
+            ["meta-llama/Meta-Llama-3.1-70B-Instruct", "meta-llama/Meta-Llama-3.1-8B-Instruct", "HuggingFaceH4/zephyr-7b-beta"],
+            value="meta-llama/Meta-Llama-3.1-70B-Instruct", label="Model", info="Choose the base model"
         ),
         gr.Dropdown(
+            ["bon", "mcts", "moa"], value="moa", label="Approach", info="Choose the approach"
         ),
         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),