Spaces:
Sleeping
Sleeping
VIRTUS
commited on
Commit
·
88ce0e7
1
Parent(s):
49079dd
fix: HF AUTH and GUI
Browse files
app.py
CHANGED
|
@@ -7,7 +7,6 @@ from tools import get_weather, CurrencyConverterTool
|
|
| 7 |
|
| 8 |
|
| 9 |
model_path = "Qwen/Qwen3-4B-Instruct-2507"
|
| 10 |
-
AGENT_SYSTEM_PROMPT = "You are a friendly and helpful Chatbot"
|
| 11 |
|
| 12 |
cuda = torch.cuda.is_available()
|
| 13 |
if cuda:
|
|
@@ -15,12 +14,25 @@ if cuda:
|
|
| 15 |
else:
|
| 16 |
print("\nRunning on Hugging Face Ecosystem\n")
|
| 17 |
|
| 18 |
-
def interact_with_agent(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
if cuda:
|
| 20 |
quantization = BitsAndBytesConfig(load_in_8bit=True)
|
| 21 |
model = TransformersModel(
|
| 22 |
model_id=model_path,
|
| 23 |
max_new_tokens=1024,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
model_kwargs={
|
| 25 |
"quantization_config": quantization
|
| 26 |
})
|
|
@@ -42,19 +54,26 @@ def interact_with_agent(prompt, history, hf_token: gr.OAuthToken,):
|
|
| 42 |
)
|
| 43 |
messages = []
|
| 44 |
yield messages
|
| 45 |
-
for msg in stream_to_gradio(agent,
|
| 46 |
messages.append(asdict(msg))
|
| 47 |
yield messages
|
| 48 |
yield messages
|
| 49 |
|
| 50 |
chatbot = gr.ChatInterface(
|
| 51 |
interact_with_agent,
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
)
|
| 59 |
|
| 60 |
with gr.Blocks() as demo:
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
model_path = "Qwen/Qwen3-4B-Instruct-2507"
|
|
|
|
| 10 |
|
| 11 |
cuda = torch.cuda.is_available()
|
| 12 |
if cuda:
|
|
|
|
| 14 |
else:
|
| 15 |
print("\nRunning on Hugging Face Ecosystem\n")
|
| 16 |
|
| 17 |
+
def interact_with_agent(
|
| 18 |
+
message,
|
| 19 |
+
history: list[dict[str, str]],
|
| 20 |
+
system_message,
|
| 21 |
+
max_tokens,
|
| 22 |
+
temperature,
|
| 23 |
+
top_p,
|
| 24 |
+
hf_token: gr.OAuthToken,
|
| 25 |
+
):
|
| 26 |
if cuda:
|
| 27 |
quantization = BitsAndBytesConfig(load_in_8bit=True)
|
| 28 |
model = TransformersModel(
|
| 29 |
model_id=model_path,
|
| 30 |
max_new_tokens=1024,
|
| 31 |
+
temperature=temperature,
|
| 32 |
+
hf_token=hf_token.token,
|
| 33 |
+
top_p=top_p,
|
| 34 |
+
max_tokens=max_tokens,
|
| 35 |
+
system_message=system_message,
|
| 36 |
model_kwargs={
|
| 37 |
"quantization_config": quantization
|
| 38 |
})
|
|
|
|
| 54 |
)
|
| 55 |
messages = []
|
| 56 |
yield messages
|
| 57 |
+
for msg in stream_to_gradio(agent, message):
|
| 58 |
messages.append(asdict(msg))
|
| 59 |
yield messages
|
| 60 |
yield messages
|
| 61 |
|
| 62 |
chatbot = gr.ChatInterface(
|
| 63 |
interact_with_agent,
|
| 64 |
+
type="messages",
|
| 65 |
+
additional_inputs=[
|
| 66 |
+
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
| 67 |
+
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
| 68 |
+
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
| 69 |
+
gr.Slider(
|
| 70 |
+
minimum=0.1,
|
| 71 |
+
maximum=1.0,
|
| 72 |
+
value=0.95,
|
| 73 |
+
step=0.05,
|
| 74 |
+
label="Top-p (nucleus sampling)",
|
| 75 |
+
),
|
| 76 |
+
],
|
| 77 |
)
|
| 78 |
|
| 79 |
with gr.Blocks() as demo:
|