Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,19 +6,17 @@ import io
|
|
| 6 |
import sys
|
| 7 |
import gradio as gr
|
| 8 |
from transformers import pipeline
|
|
|
|
| 9 |
|
| 10 |
# Suppress warnings
|
| 11 |
logging.getLogger("transformers").setLevel(logging.ERROR)
|
| 12 |
|
| 13 |
-
# Load model (do this once at startup)
|
| 14 |
-
pipe = pipeline("text-generation", model="xingyaoww/CodeActAgent-Mistral-7b-v0.1", device_map='auto', torch_dtype=torch.bfloat16)
|
| 15 |
-
|
| 16 |
# System prompt (same as original)
|
| 17 |
system_prompt = """
|
| 18 |
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user’s questions. The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute>" tag, for example: <execute> print("Hello World!") </execute>. The assistant should attempt fewer things at a time instead of putting too much code in one <execute> block. The assistant can install packages through PIP by <execute> !pip install [package needed] </execute> and should always import packages and define variables before starting to use them. For algorithms that return values (like search functions), ALWAYS define them as proper functions with def function_name(...): ... return value ... and then call the function to print or use the result. Do NOT use return outside a function. The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result. Whenever possible, execute the code for the user using <execute> instead of providing it. The assistant’s response should be concise, but do express their thoughts. Once the task is complete and tested successfully (e.g., correct index printed), stop generating more code and say 'Task complete' without <execute> tags.
|
| 19 |
"""
|
| 20 |
|
| 21 |
-
# List of example prompts (
|
| 22 |
prompt_list = [
|
| 23 |
"Print 'Hello, World!' using code. Once done, stop.",
|
| 24 |
"Compute and print the sum of numbers from 1 to 10 using a loop. Use code to do it.",
|
|
@@ -32,8 +30,12 @@ prompt_list = [
|
|
| 32 |
"Cause a deliberate error like divide by zero, then fix it in next step and print 10 / 2."
|
| 33 |
]
|
| 34 |
|
| 35 |
-
# Generator function
|
|
|
|
| 36 |
def run_agent(user_content):
|
|
|
|
|
|
|
|
|
|
| 37 |
# Initial messages
|
| 38 |
messages = [
|
| 39 |
{"role": "system", "content": system_prompt},
|
|
@@ -129,6 +131,6 @@ with gr.Blocks(title="Code Agent Simulator") as demo:
|
|
| 129 |
# On click, run the generator and stream to output
|
| 130 |
run_button.click(fn=run_agent, inputs=input_prompt, outputs=output_log)
|
| 131 |
|
| 132 |
-
# Launch (
|
| 133 |
if __name__ == "__main__":
|
| 134 |
demo.queue().launch()
|
|
|
|
| 6 |
import sys
|
| 7 |
import gradio as gr
|
| 8 |
from transformers import pipeline
|
| 9 |
+
import spaces # Required for ZeroGPU
|
| 10 |
|
| 11 |
# Suppress warnings
|
| 12 |
logging.getLogger("transformers").setLevel(logging.ERROR)
|
| 13 |
|
|
|
|
|
|
|
|
|
|
| 14 |
# System prompt (same as original)
|
| 15 |
system_prompt = """
|
| 16 |
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user’s questions. The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute>" tag, for example: <execute> print("Hello World!") </execute>. The assistant should attempt fewer things at a time instead of putting too much code in one <execute> block. The assistant can install packages through PIP by <execute> !pip install [package needed] </execute> and should always import packages and define variables before starting to use them. For algorithms that return values (like search functions), ALWAYS define them as proper functions with def function_name(...): ... return value ... and then call the function to print or use the result. Do NOT use return outside a function. The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result. Whenever possible, execute the code for the user using <execute> instead of providing it. The assistant’s response should be concise, but do express their thoughts. Once the task is complete and tested successfully (e.g., correct index printed), stop generating more code and say 'Task complete' without <execute> tags.
|
| 17 |
"""
|
| 18 |
|
| 19 |
+
# List of example prompts (same as original)
|
| 20 |
prompt_list = [
|
| 21 |
"Print 'Hello, World!' using code. Once done, stop.",
|
| 22 |
"Compute and print the sum of numbers from 1 to 10 using a loop. Use code to do it.",
|
|
|
|
| 30 |
"Cause a deliberate error like divide by zero, then fix it in next step and print 10 / 2."
|
| 31 |
]
|
| 32 |
|
| 33 |
+
# Generator function with GPU decorator and model loading inside
|
| 34 |
+
@spaces.GPU(duration=120) # Allocates GPU for up to 120s; adjust higher for complex prompts (no restrictions!)
|
| 35 |
def run_agent(user_content):
|
| 36 |
+
# Load model here to ensure GPU is available (avoids startup errors)
|
| 37 |
+
pipe = pipeline("text-generation", model="xingyaoww/CodeActAgent-Mistral-7b-v0.1", device_map='auto', torch_dtype=torch.bfloat16)
|
| 38 |
+
|
| 39 |
# Initial messages
|
| 40 |
messages = [
|
| 41 |
{"role": "system", "content": system_prompt},
|
|
|
|
| 131 |
# On click, run the generator and stream to output
|
| 132 |
run_button.click(fn=run_agent, inputs=input_prompt, outputs=output_log)
|
| 133 |
|
| 134 |
+
# Launch (enable queue for streaming)
|
| 135 |
if __name__ == "__main__":
|
| 136 |
demo.queue().launch()
|