Spaces:
Running
Running
Remove cancel generation feature as it didn't work
Browse files
app.py
CHANGED
|
@@ -18,18 +18,6 @@ access_token=os.environ['HF_TOKEN']
|
|
| 18 |
# Optional: Disable GPU visibility if you wish to force CPU usage
|
| 19 |
# os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
| 20 |
|
| 21 |
-
# ------------------------------
|
| 22 |
-
# Global Cancellation Event
|
| 23 |
-
# ------------------------------
|
| 24 |
-
cancel_event = threading.Event()
|
| 25 |
-
|
| 26 |
-
# ------------------------------
|
| 27 |
-
# Stopping Criteria for Cancellation
|
| 28 |
-
# ------------------------------
|
| 29 |
-
class CancelStoppingCriteria(StoppingCriteria):
|
| 30 |
-
def __call__(self, input_ids, scores, **kwargs):
|
| 31 |
-
return cancel_event.is_set()
|
| 32 |
-
|
| 33 |
# ------------------------------
|
| 34 |
# Torch-Compatible Model Definitions with Adjusted Descriptions
|
| 35 |
# ------------------------------
|
|
@@ -415,7 +403,6 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
| 415 |
"""
|
| 416 |
Generates streaming chat responses, optionally with background web search.
|
| 417 |
"""
|
| 418 |
-
cancel_event.clear()
|
| 419 |
history = list(chat_history or [])
|
| 420 |
history.append({'role': 'user', 'content': user_msg})
|
| 421 |
|
|
@@ -506,7 +493,6 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
| 506 |
'top_p': top_p,
|
| 507 |
'repetition_penalty': repeat_penalty,
|
| 508 |
'streamer': streamer,
|
| 509 |
-
'stopping_criteria': [CancelStoppingCriteria()],
|
| 510 |
'return_full_text': False,
|
| 511 |
}
|
| 512 |
)
|
|
@@ -519,8 +505,6 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
| 519 |
|
| 520 |
# Stream tokens
|
| 521 |
for chunk in streamer:
|
| 522 |
-
if cancel_event.is_set():
|
| 523 |
-
break
|
| 524 |
text = chunk
|
| 525 |
|
| 526 |
# Detect start of thinking
|
|
@@ -545,8 +529,6 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
| 545 |
history.append({'role': 'assistant', 'content': answer_buf})
|
| 546 |
else:
|
| 547 |
history[-1]['content'] = thought_buf
|
| 548 |
-
if cancel_event.is_set():
|
| 549 |
-
break
|
| 550 |
yield history, debug
|
| 551 |
continue
|
| 552 |
|
|
@@ -562,8 +544,6 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
| 562 |
history.append({'role': 'assistant', 'content': answer_buf})
|
| 563 |
else:
|
| 564 |
history[-1]['content'] = thought_buf
|
| 565 |
-
if cancel_event.is_set():
|
| 566 |
-
break
|
| 567 |
yield history, debug
|
| 568 |
continue
|
| 569 |
|
|
@@ -572,8 +552,6 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
| 572 |
history.append({'role': 'assistant', 'content': ''})
|
| 573 |
answer_buf += text
|
| 574 |
history[-1]['content'] = answer_buf
|
| 575 |
-
if cancel_event.is_set():
|
| 576 |
-
break
|
| 577 |
yield history, debug
|
| 578 |
|
| 579 |
gen_thread.join()
|
|
@@ -585,11 +563,6 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
| 585 |
gc.collect()
|
| 586 |
|
| 587 |
|
| 588 |
-
def cancel_generation():
|
| 589 |
-
cancel_event.set()
|
| 590 |
-
return 'Generation cancelled.'
|
| 591 |
-
|
| 592 |
-
|
| 593 |
def update_default_prompt(enable_search):
|
| 594 |
return f"You are a helpful assistant."
|
| 595 |
|
|
@@ -642,7 +615,6 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
|
|
| 642 |
mc = gr.Number(value=50, precision=0, label="Max Chars/Result")
|
| 643 |
st = gr.Slider(minimum=0.0, maximum=30.0, step=0.5, value=5.0, label="Search Timeout (s)")
|
| 644 |
clr = gr.Button("Clear Chat")
|
| 645 |
-
cnl = gr.Button("Cancel Generation")
|
| 646 |
with gr.Column(scale=7):
|
| 647 |
chat = gr.Chatbot(type="messages")
|
| 648 |
txt = gr.Textbox(placeholder="Type your message and press Enter...")
|
|
@@ -670,7 +642,6 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
|
|
| 670 |
|
| 671 |
search_chk.change(fn=update_default_prompt, inputs=search_chk, outputs=sys_prompt)
|
| 672 |
clr.click(fn=lambda: ([], "", ""), outputs=[chat, txt, dbg])
|
| 673 |
-
cnl.click(fn=cancel_generation, outputs=dbg)
|
| 674 |
txt.submit(fn=chat_response,
|
| 675 |
inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
|
| 676 |
model_dd, max_tok, temp, k, p, rp, st],
|
|
|
|
| 18 |
# Optional: Disable GPU visibility if you wish to force CPU usage
|
| 19 |
# os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
# ------------------------------
|
| 22 |
# Torch-Compatible Model Definitions with Adjusted Descriptions
|
| 23 |
# ------------------------------
|
|
|
|
| 403 |
"""
|
| 404 |
Generates streaming chat responses, optionally with background web search.
|
| 405 |
"""
|
|
|
|
| 406 |
history = list(chat_history or [])
|
| 407 |
history.append({'role': 'user', 'content': user_msg})
|
| 408 |
|
|
|
|
| 493 |
'top_p': top_p,
|
| 494 |
'repetition_penalty': repeat_penalty,
|
| 495 |
'streamer': streamer,
|
|
|
|
| 496 |
'return_full_text': False,
|
| 497 |
}
|
| 498 |
)
|
|
|
|
| 505 |
|
| 506 |
# Stream tokens
|
| 507 |
for chunk in streamer:
|
|
|
|
|
|
|
| 508 |
text = chunk
|
| 509 |
|
| 510 |
# Detect start of thinking
|
|
|
|
| 529 |
history.append({'role': 'assistant', 'content': answer_buf})
|
| 530 |
else:
|
| 531 |
history[-1]['content'] = thought_buf
|
|
|
|
|
|
|
| 532 |
yield history, debug
|
| 533 |
continue
|
| 534 |
|
|
|
|
| 544 |
history.append({'role': 'assistant', 'content': answer_buf})
|
| 545 |
else:
|
| 546 |
history[-1]['content'] = thought_buf
|
|
|
|
|
|
|
| 547 |
yield history, debug
|
| 548 |
continue
|
| 549 |
|
|
|
|
| 552 |
history.append({'role': 'assistant', 'content': ''})
|
| 553 |
answer_buf += text
|
| 554 |
history[-1]['content'] = answer_buf
|
|
|
|
|
|
|
| 555 |
yield history, debug
|
| 556 |
|
| 557 |
gen_thread.join()
|
|
|
|
| 563 |
gc.collect()
|
| 564 |
|
| 565 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 566 |
def update_default_prompt(enable_search):
|
| 567 |
return f"You are a helpful assistant."
|
| 568 |
|
|
|
|
| 615 |
mc = gr.Number(value=50, precision=0, label="Max Chars/Result")
|
| 616 |
st = gr.Slider(minimum=0.0, maximum=30.0, step=0.5, value=5.0, label="Search Timeout (s)")
|
| 617 |
clr = gr.Button("Clear Chat")
|
|
|
|
| 618 |
with gr.Column(scale=7):
|
| 619 |
chat = gr.Chatbot(type="messages")
|
| 620 |
txt = gr.Textbox(placeholder="Type your message and press Enter...")
|
|
|
|
| 642 |
|
| 643 |
search_chk.change(fn=update_default_prompt, inputs=search_chk, outputs=sys_prompt)
|
| 644 |
clr.click(fn=lambda: ([], "", ""), outputs=[chat, txt, dbg])
|
|
|
|
| 645 |
txt.submit(fn=chat_response,
|
| 646 |
inputs=[txt, chat, sys_prompt, search_chk, mr, mc,
|
| 647 |
model_dd, max_tok, temp, k, p, rp, st],
|