Spaces:
Sleeping
Sleeping
File size: 8,239 Bytes
272d10a 81b1da9 272d10a 39d4b59 81b1da9 272d10a 81b1da9 272d10a 39d4b59 272d10a 9af6119 2cbe3a9 9af6119 2cbe3a9 272d10a 2cbe3a9 797bed8 2cbe3a9 9af6119 39d4b59 272d10a 2cbe3a9 272d10a 2cbe3a9 272d10a 2cbe3a9 272d10a 2cbe3a9 272d10a 2cbe3a9 272d10a 2cbe3a9 272d10a 2cbe3a9 272d10a 2cbe3a9 272d10a 2cbe3a9 272d10a 2cbe3a9 272d10a 2cbe3a9 272d10a 2cbe3a9 272d10a 2cbe3a9 272d10a 2cbe3a9 272d10a 2cbe3a9 272d10a 9af6119 272d10a 9af6119 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import re
import logging
import torch
import builtins
import io
import sys
import gradio as gr
from transformers import pipeline
import spaces # Required for ZeroGPU
# Suppress warnings
logging.getLogger("transformers").setLevel(logging.ERROR)
# System prompt (same as original)
system_prompt = """
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user’s questions. The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute>" tag, for example: <execute> print("Hello World!") </execute>. The assistant should attempt fewer things at a time instead of putting too much code in one <execute> block. The assistant can install packages through PIP by <execute> !pip install [package needed] </execute> and should always import packages and define variables before starting to use them. For algorithms that return values (like search functions), ALWAYS define them as proper functions with def function_name(...): ... return value ... and then call the function to print or use the result. Do NOT use return outside a function. The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result. Whenever possible, execute the code for the user using <execute> instead of providing it. The assistant’s response should be concise, but do express their thoughts. Once the task is complete and tested successfully (e.g., correct index printed), stop generating more code and say 'Task complete' without <execute> tags.
"""
# List of example prompts (same as original)
prompt_list = [
"Print 'Hello, World!' using code. Once done, stop.",
"Compute and print the sum of numbers from 1 to 10 using a loop. Use code to do it.",
"Define a function def add(a, b) that returns a + b. Test it by calling add(3, 4) and printing the result.",
"Print the length of numbers from 0 to 9 using len and range.",
"Create a list [1,2,3] and print its length. Use code.",
"Implement a function def factorial(n) to compute factorial recursively. Test on 5 and print result.",
"Try to import math and print math.sqrt(16). If needed, install packages.",
"Find if 10 is even or odd using a function def is_even(n), return True/False, test and print.",
"Implement linear search to find index of 7 in [3,5,7,9], return -1 if not found. Test and print.",
"Cause a deliberate error like divide by zero, then fix it in next step and print 10 / 2."
]
# Load model globally (CPU-safe to avoid startup CUDA errors)
pipe = pipeline("text-generation", model="xingyaoww/CodeActAgent-Mistral-7b-v0.1", device_map=None, torch_dtype=torch.float16) # float16 for init; no 'auto'
# Generator function with GPU decorator
@spaces.GPU(duration=180) # 180s for safety
def run_agent(user_content):
full_log = ""
current_code = ""
current_exec_output = ""
yield [current_code, current_exec_output, full_log] # Initial empty
full_log += "Allocating GPU... (may queue if busy)\n\n"
yield [current_code, current_exec_output, full_log]
full_log += "Moving model to GPU and initializing...\n\n"
yield [current_code, current_exec_output, full_log]
# Move to GPU here
device = torch.device('cuda')
pipe.model.to(device)
pipe.device = device
# Initial messages
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_content},
]
# REPL state (restricted builtins)
repl_globals = {'__builtins__': {k: v for k, v in builtins.__dict__.items() if k in ['print', 'len', 'range', 'int']}}
# History for loop detection
prev_codes = set()
# Max turns
max_turns = 10
turn = 0
full_log += f"### Starting simulation for prompt: '{user_content}'\n\n"
yield [current_code, current_exec_output, full_log]
while turn < max_turns:
# Generate response
result = pipe(messages, max_new_tokens=512)
assistant_content = result[0]['generated_text'][-1]['content']
full_log += f"**Assistant (Turn {turn+1}):** {assistant_content}\n\n"
yield [current_code, current_exec_output, full_log]
# Stop checks
if re.search(r'(task complete|done|final answer)', assistant_content.lower()):
full_log += "Detected completion keyword. Stopping.\n"
yield [current_code, current_exec_output, full_log]
break
# Extract <execute>
execute_match = re.search(r'<execute>(.*?)</execute>', assistant_content, re.DOTALL)
if not execute_match:
full_log += "No code to execute. Task likely complete.\n"
yield [current_code, current_exec_output, full_log]
break
code = execute_match.group(1).strip()
current_code = code
# Loop detection
if code in prev_codes:
full_log += "Repeated code detected. Possible infinite loop—stopping.\n"
yield [current_code, current_exec_output, full_log]
break
prev_codes.add(code)
full_log += f"**Executing code:**\n```\n{code}\n```\n\n"
yield [current_code, current_exec_output, full_log]
# Exec with capture
old_stdout = sys.stdout
sys.stdout = io.StringIO()
try:
exec(code, repl_globals)
exec_output = sys.stdout.getvalue().strip() or "No output."
except Exception as e:
exec_output = f"Error: {str(e)}"
finally:
sys.stdout = old_stdout
current_exec_output = exec_output
full_log += f"**Execution Output:** {exec_output}\n\n"
yield [current_code, current_exec_output, full_log]
# Success stop: If output is pure digit (index), assume done
if re.match(r'^\d+$', exec_output.strip()):
full_log += "Pure index output detected. Task successful—stopping.\n"
yield [current_code, current_exec_output, full_log]
break
# Append feedback
messages.append({"role": "assistant", "content": assistant_content})
messages.append({"role": "user", "content": f"Observation: {exec_output}"})
turn += 1
# Final parse (grab last number as index if applicable)
if 'exec_output' in locals():
final_index = re.search(r'(\d+)$', exec_output)
if final_index:
full_log += f"**Extracted Result:** Index {final_index.group(1)}\n"
yield [current_code, current_exec_output, full_log]
else:
full_log += "No clear index found—check errors.\n"
yield [current_code, current_exec_output, full_log]
else:
full_log += "No execution output.\n"
yield [current_code, current_exec_output, full_log]
full_log += f"### End of simulation for prompt: '{user_content}'\n"
yield [current_code, current_exec_output, full_log]
# Gradio interface
with gr.Blocks(title="Code Agent Simulator") as demo:
gr.Markdown("# Code Agent Simulator on Hugging Face Spaces\nEnter a coding task prompt, and watch the agent simulate execution in real-time.")
input_prompt = gr.Textbox(label="Enter your prompt", placeholder="e.g., Implement binary search...")
with gr.Row():
generated_code = gr.Code(label="Generated Code", language="python", lines=15, show_label=True)
exec_output = gr.Textbox(label="Execution Output", lines=15, show_label=True)
full_log = gr.Textbox(label="Full Simulation Log", lines=20, autoscroll=True, show_label=True)
run_button = gr.Button("Run Simulation")
examples = gr.Examples(examples=prompt_list, inputs=[input_prompt])
# On click, run the generator and stream to multiple outputs
run_button.click(fn=run_agent, inputs=input_prompt, outputs=[generated_code, exec_output, full_log])
# Launch (disable SSR for stability, enable debug for logs)
if __name__ == "__main__":
demo.queue().launch(ssr_mode=False, debug=True) |