Spaces:

Phoenix21
/

SampleCodeActAgent

Sleeping

App Files Files Community

Phoenix21 commited on Sep 24

Commit

9af6119

verified ·

1 Parent(s): 797bed8

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -8

app.py CHANGED Viewed

@@ -30,13 +30,19 @@ prompt_list = [
     "Cause a deliberate error like divide by zero, then fix it in next step and print 10 / 2."
 ]
-# Generator function with GPU decorator and model loading inside
-@spaces.GPU(duration=180)  # Increased to 180s for multi-turn safety; no limits!
 def run_agent(user_content):
-    yield "Initializing GPU and model... (this may take a moment if queued)\n\n"  # Immediate feedback for visibility
-    # Load model here to ensure GPU is available (avoids startup errors)
-    pipe = pipeline("text-generation", model="xingyaoww/CodeActAgent-Mistral-7b-v0.1", device_map='auto', torch_dtype=torch.bfloat16)
     # Initial messages
     messages = [
@@ -125,7 +131,7 @@ with gr.Blocks(title="Code Agent Simulator") as demo:
     gr.Markdown("# Code Agent Simulator on Hugging Face Spaces\nEnter a coding task prompt, and watch the agent simulate execution in real-time.")
     input_prompt = gr.Textbox(label="Enter your prompt", placeholder="e.g., Implement binary search...")
-    output_log = gr.Textbox(value="", lines=30, autoscroll=True, show_label=True, label="Simulation Log")  # Switched to Textbox for reliable real-time streaming
     run_button = gr.Button("Run Simulation")
     examples = gr.Examples(examples=prompt_list, inputs=[input_prompt])
@@ -133,6 +139,6 @@ with gr.Blocks(title="Code Agent Simulator") as demo:
     # On click, run the generator and stream to output
     run_button.click(fn=run_agent, inputs=input_prompt, outputs=output_log)
-# Launch (enable queue for streaming, explicit SSR for ZeroGPU detection fix)
 if __name__ == "__main__":
-    demo.queue().launch(ssr_mode=True)  # Explicitly enable SSR to resolve ZeroGPU glitches

     "Cause a deliberate error like divide by zero, then fix it in next step and print 10 / 2."
 ]
+# Load model globally (CPU-safe to avoid startup CUDA errors)
+pipe = pipeline("text-generation", model="xingyaoww/CodeActAgent-Mistral-7b-v0.1", device_map=None, torch_dtype=torch.float16)  # float16 fallback for init; no 'auto'
+# Generator function with GPU decorator
+@spaces.GPU(duration=180)  # 180s for safety with multi-turn/model move
 def run_agent(user_content):
+    yield "Allocating GPU... (may queue if busy)\n\n"
+    yield "Moving model to GPU and initializing...\n\n"
+    # Move to GPU here (unrestricted power move)
+    device = torch.device('cuda')
+    pipe.model.to(device)
+    pipe.device = device
     # Initial messages
     messages = [
     gr.Markdown("# Code Agent Simulator on Hugging Face Spaces\nEnter a coding task prompt, and watch the agent simulate execution in real-time.")
     input_prompt = gr.Textbox(label="Enter your prompt", placeholder="e.g., Implement binary search...")
+    output_log = gr.Textbox(value="", lines=30, autoscroll=True, show_label=True, label="Simulation Log")
     run_button = gr.Button("Run Simulation")
     examples = gr.Examples(examples=prompt_list, inputs=[input_prompt])
     # On click, run the generator and stream to output
     run_button.click(fn=run_agent, inputs=input_prompt, outputs=output_log)
+# Launch (disable SSR for stability, enable debug for logs)
 if __name__ == "__main__":
+    demo.queue().launch(ssr_mode=False, debug=True)