Spaces:

Phoenix21
/

SampleCodeActAgent

Sleeping

App Files Files Community

Phoenix21 commited on Sep 24

Commit

39d4b59

verified ·

1 Parent(s): 417651c

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -6

app.py CHANGED Viewed

@@ -6,19 +6,17 @@ import io
 import sys
 import gradio as gr
 from transformers import pipeline
 # Suppress warnings
 logging.getLogger("transformers").setLevel(logging.ERROR)
-# Load model (do this once at startup)
-pipe = pipeline("text-generation", model="xingyaoww/CodeActAgent-Mistral-7b-v0.1", device_map='auto', torch_dtype=torch.bfloat16)
 # System prompt (same as original)
 system_prompt = """
 A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user’s questions. The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute>" tag, for example: <execute> print("Hello World!") </execute>. The assistant should attempt fewer things at a time instead of putting too much code in one <execute> block. The assistant can install packages through PIP by <execute> !pip install [package needed] </execute> and should always import packages and define variables before starting to use them. For algorithms that return values (like search functions), ALWAYS define them as proper functions with def function_name(...): ... return value ... and then call the function to print or use the result. Do NOT use return outside a function. The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result. Whenever possible, execute the code for the user using <execute> instead of providing it. The assistant’s response should be concise, but do express their thoughts. Once the task is complete and tested successfully (e.g., correct index printed), stop generating more code and say 'Task complete' without <execute> tags.
 """
-# List of example prompts (from previous)
 prompt_list = [
     "Print 'Hello, World!' using code. Once done, stop.",
     "Compute and print the sum of numbers from 1 to 10 using a loop. Use code to do it.",
@@ -32,8 +30,12 @@ prompt_list = [
     "Cause a deliberate error like divide by zero, then fix it in next step and print 10 / 2."
 ]
-# Generator function to run the simulation and yield logs in real-time
 def run_agent(user_content):
     # Initial messages
     messages = [
         {"role": "system", "content": system_prompt},
@@ -129,6 +131,6 @@ with gr.Blocks(title="Code Agent Simulator") as demo:
     # On click, run the generator and stream to output
     run_button.click(fn=run_agent, inputs=input_prompt, outputs=output_log)
-# Launch (for local) or for HF Spaces, this is app.py
 if __name__ == "__main__":
     demo.queue().launch()

 import sys
 import gradio as gr
 from transformers import pipeline
+import spaces  # Required for ZeroGPU
 # Suppress warnings
 logging.getLogger("transformers").setLevel(logging.ERROR)
 # System prompt (same as original)
 system_prompt = """
 A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user’s questions. The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute>" tag, for example: <execute> print("Hello World!") </execute>. The assistant should attempt fewer things at a time instead of putting too much code in one <execute> block. The assistant can install packages through PIP by <execute> !pip install [package needed] </execute> and should always import packages and define variables before starting to use them. For algorithms that return values (like search functions), ALWAYS define them as proper functions with def function_name(...): ... return value ... and then call the function to print or use the result. Do NOT use return outside a function. The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result. Whenever possible, execute the code for the user using <execute> instead of providing it. The assistant’s response should be concise, but do express their thoughts. Once the task is complete and tested successfully (e.g., correct index printed), stop generating more code and say 'Task complete' without <execute> tags.
 """
+# List of example prompts (same as original)
 prompt_list = [
     "Print 'Hello, World!' using code. Once done, stop.",
     "Compute and print the sum of numbers from 1 to 10 using a loop. Use code to do it.",
     "Cause a deliberate error like divide by zero, then fix it in next step and print 10 / 2."
 ]
+# Generator function with GPU decorator and model loading inside
+@spaces.GPU(duration=120)  # Allocates GPU for up to 120s; adjust higher for complex prompts (no restrictions!)
 def run_agent(user_content):
+    # Load model here to ensure GPU is available (avoids startup errors)
+    pipe = pipeline("text-generation", model="xingyaoww/CodeActAgent-Mistral-7b-v0.1", device_map='auto', torch_dtype=torch.bfloat16)
     # Initial messages
     messages = [
         {"role": "system", "content": system_prompt},
     # On click, run the generator and stream to output
     run_button.click(fn=run_agent, inputs=input_prompt, outputs=output_log)
+# Launch (enable queue for streaming)
 if __name__ == "__main__":
     demo.queue().launch()