Spaces:

Phoenix21
/

SampleCodeActAgent

Sleeping

App Files Files Community

SampleCodeActAgent / app.py

Phoenix21

Update app.py

2cbe3a9 verified about 2 months ago

raw

history blame contribute delete

8.24 kB

	import re
	import logging
	import torch
	import builtins
	import io
	import sys
	import gradio as gr
	from transformers import pipeline
	import spaces # Required for ZeroGPU

	# Suppress warnings
	logging.getLogger("transformers").setLevel(logging.ERROR)

	# System prompt (same as original)
	system_prompt = """
	A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user’s questions. The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute>" tag, for example: <execute> print("Hello World!") </execute>. The assistant should attempt fewer things at a time instead of putting too much code in one <execute> block. The assistant can install packages through PIP by <execute> !pip install [package needed] </execute> and should always import packages and define variables before starting to use them. For algorithms that return values (like search functions), ALWAYS define them as proper functions with def function_name(...): ... return value ... and then call the function to print or use the result. Do NOT use return outside a function. The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result. Whenever possible, execute the code for the user using <execute> instead of providing it. The assistant’s response should be concise, but do express their thoughts. Once the task is complete and tested successfully (e.g., correct index printed), stop generating more code and say 'Task complete' without <execute> tags.
	"""

	# List of example prompts (same as original)
	prompt_list = [
	"Print 'Hello, World!' using code. Once done, stop.",
	"Compute and print the sum of numbers from 1 to 10 using a loop. Use code to do it.",
	"Define a function def add(a, b) that returns a + b. Test it by calling add(3, 4) and printing the result.",
	"Print the length of numbers from 0 to 9 using len and range.",
	"Create a list [1,2,3] and print its length. Use code.",
	"Implement a function def factorial(n) to compute factorial recursively. Test on 5 and print result.",
	"Try to import math and print math.sqrt(16). If needed, install packages.",
	"Find if 10 is even or odd using a function def is_even(n), return True/False, test and print.",
	"Implement linear search to find index of 7 in [3,5,7,9], return -1 if not found. Test and print.",
	"Cause a deliberate error like divide by zero, then fix it in next step and print 10 / 2."
	]

	# Load model globally (CPU-safe to avoid startup CUDA errors)
	pipe = pipeline("text-generation", model="xingyaoww/CodeActAgent-Mistral-7b-v0.1", device_map=None, torch_dtype=torch.float16) # float16 for init; no 'auto'

	# Generator function with GPU decorator
	@spaces.GPU(duration=180) # 180s for safety
	def run_agent(user_content):
	full_log = ""
	current_code = ""
	current_exec_output = ""

	yield [current_code, current_exec_output, full_log] # Initial empty

	full_log += "Allocating GPU... (may queue if busy)\n\n"
	yield [current_code, current_exec_output, full_log]

	full_log += "Moving model to GPU and initializing...\n\n"
	yield [current_code, current_exec_output, full_log]

	# Move to GPU here
	device = torch.device('cuda')
	pipe.model.to(device)
	pipe.device = device

	# Initial messages
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_content},
	]

	# REPL state (restricted builtins)
	repl_globals = {'__builtins__': {k: v for k, v in builtins.__dict__.items() if k in ['print', 'len', 'range', 'int']}}

	# History for loop detection
	prev_codes = set()

	# Max turns
	max_turns = 10
	turn = 0

	full_log += f"### Starting simulation for prompt: '{user_content}'\n\n"
	yield [current_code, current_exec_output, full_log]

	while turn < max_turns:
	# Generate response
	result = pipe(messages, max_new_tokens=512)
	assistant_content = result[0]['generated_text'][-1]['content']

	full_log += f"Assistant (Turn {turn+1}): {assistant_content}\n\n"
	yield [current_code, current_exec_output, full_log]

	# Stop checks
	if re.search(r'(task complete\|done\|final answer)', assistant_content.lower()):
	full_log += "Detected completion keyword. Stopping.\n"
	yield [current_code, current_exec_output, full_log]
	break

	# Extract <execute>
	execute_match = re.search(r'<execute>(.*?)</execute>', assistant_content, re.DOTALL)
	if not execute_match:
	full_log += "No code to execute. Task likely complete.\n"
	yield [current_code, current_exec_output, full_log]
	break

	code = execute_match.group(1).strip()
	current_code = code

	# Loop detection
	if code in prev_codes:
	full_log += "Repeated code detected. Possible infinite loop—stopping.\n"
	yield [current_code, current_exec_output, full_log]
	break
	prev_codes.add(code)

	full_log += f"Executing code:\n```\n{code}\n```\n\n"
	yield [current_code, current_exec_output, full_log]

	# Exec with capture
	old_stdout = sys.stdout
	sys.stdout = io.StringIO()
	try:
	exec(code, repl_globals)
	exec_output = sys.stdout.getvalue().strip() or "No output."
	except Exception as e:
	exec_output = f"Error: {str(e)}"
	finally:
	sys.stdout = old_stdout

	current_exec_output = exec_output
	full_log += f"Execution Output: {exec_output}\n\n"
	yield [current_code, current_exec_output, full_log]

	# Success stop: If output is pure digit (index), assume done
	if re.match(r'^\d+$', exec_output.strip()):
	full_log += "Pure index output detected. Task successful—stopping.\n"
	yield [current_code, current_exec_output, full_log]
	break

	# Append feedback
	messages.append({"role": "assistant", "content": assistant_content})
	messages.append({"role": "user", "content": f"Observation: {exec_output}"})

	turn += 1

	# Final parse (grab last number as index if applicable)
	if 'exec_output' in locals():
	final_index = re.search(r'(\d+)$', exec_output)
	if final_index:
	full_log += f"Extracted Result: Index {final_index.group(1)}\n"
	yield [current_code, current_exec_output, full_log]
	else:
	full_log += "No clear index found—check errors.\n"
	yield [current_code, current_exec_output, full_log]
	else:
	full_log += "No execution output.\n"
	yield [current_code, current_exec_output, full_log]

	full_log += f"### End of simulation for prompt: '{user_content}'\n"
	yield [current_code, current_exec_output, full_log]

	# Gradio interface
	with gr.Blocks(title="Code Agent Simulator") as demo:
	gr.Markdown("# Code Agent Simulator on Hugging Face Spaces\nEnter a coding task prompt, and watch the agent simulate execution in real-time.")

	input_prompt = gr.Textbox(label="Enter your prompt", placeholder="e.g., Implement binary search...")

	with gr.Row():
	generated_code = gr.Code(label="Generated Code", language="python", lines=15, show_label=True)
	exec_output = gr.Textbox(label="Execution Output", lines=15, show_label=True)

	full_log = gr.Textbox(label="Full Simulation Log", lines=20, autoscroll=True, show_label=True)

	run_button = gr.Button("Run Simulation")

	examples = gr.Examples(examples=prompt_list, inputs=[input_prompt])

	# On click, run the generator and stream to multiple outputs
	run_button.click(fn=run_agent, inputs=input_prompt, outputs=[generated_code, exec_output, full_log])

	# Launch (disable SSR for stability, enable debug for logs)
	if __name__ == "__main__":
	demo.queue().launch(ssr_mode=False, debug=True)