Spaces:
Running
Running
user can define search timeout
Browse files
app.py
CHANGED
|
@@ -116,7 +116,7 @@ def format_conversation(history, system_prompt, tokenizer):
|
|
| 116 |
def chat_response(user_msg, chat_history, system_prompt,
|
| 117 |
enable_search, max_results, max_chars,
|
| 118 |
model_name, max_tokens, temperature,
|
| 119 |
-
top_k, top_p, repeat_penalty):
|
| 120 |
"""
|
| 121 |
Generates streaming chat responses, optionally with background web search.
|
| 122 |
"""
|
|
@@ -149,7 +149,7 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
| 149 |
|
| 150 |
# wait up to 1s for snippets, then replace debug with them
|
| 151 |
if enable_search:
|
| 152 |
-
thread_search.join(timeout=
|
| 153 |
if search_results:
|
| 154 |
debug = "### Search results merged into prompt\n\n" + "\n".join(
|
| 155 |
f"- {r}" for r in search_results
|
|
@@ -280,6 +280,7 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
|
|
| 280 |
gr.Markdown("### Web Search Settings")
|
| 281 |
mr = gr.Number(value=6, precision=0, label="Max Results")
|
| 282 |
mc = gr.Number(value=600, precision=0, label="Max Chars/Result")
|
|
|
|
| 283 |
clr = gr.Button("Clear Chat")
|
| 284 |
cnl = gr.Button("Cancel Generation")
|
| 285 |
with gr.Column(scale=7):
|
|
@@ -292,6 +293,6 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
|
|
| 292 |
cnl.click(fn=cancel_generation, outputs=dbg)
|
| 293 |
txt.submit(fn=chat_response,
|
| 294 |
inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
|
| 295 |
-
model_dd, max_tok, temp, k, p, rp],
|
| 296 |
outputs=[chat, dbg])
|
| 297 |
demo.launch()
|
|
|
|
| 116 |
def chat_response(user_msg, chat_history, system_prompt,
|
| 117 |
enable_search, max_results, max_chars,
|
| 118 |
model_name, max_tokens, temperature,
|
| 119 |
+
top_k, top_p, repeat_penalty, search_timeout):
|
| 120 |
"""
|
| 121 |
Generates streaming chat responses, optionally with background web search.
|
| 122 |
"""
|
|
|
|
| 149 |
|
| 150 |
# wait up to 1s for snippets, then replace debug with them
|
| 151 |
if enable_search:
|
| 152 |
+
thread_search.join(timeout=float(search_timeout))
|
| 153 |
if search_results:
|
| 154 |
debug = "### Search results merged into prompt\n\n" + "\n".join(
|
| 155 |
f"- {r}" for r in search_results
|
|
|
|
| 280 |
gr.Markdown("### Web Search Settings")
|
| 281 |
mr = gr.Number(value=6, precision=0, label="Max Results")
|
| 282 |
mc = gr.Number(value=600, precision=0, label="Max Chars/Result")
|
| 283 |
+
st = gr.Slider(minimum=0.0, maximum=30.0, step=0.5, value=5.0, label="Search Timeout (s)")
|
| 284 |
clr = gr.Button("Clear Chat")
|
| 285 |
cnl = gr.Button("Cancel Generation")
|
| 286 |
with gr.Column(scale=7):
|
|
|
|
| 293 |
cnl.click(fn=cancel_generation, outputs=dbg)
|
| 294 |
txt.submit(fn=chat_response,
|
| 295 |
inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
|
| 296 |
+
model_dd, max_tok, temp, k, p, rp, st],
|
| 297 |
outputs=[chat, dbg])
|
| 298 |
demo.launch()
|