File size: 8,239 Bytes
272d10a
 
 
 
 
 
81b1da9
272d10a
39d4b59
81b1da9
272d10a
 
81b1da9
272d10a
 
 
 
 
39d4b59
272d10a
 
 
 
 
 
 
 
 
 
 
 
 
9af6119
2cbe3a9
9af6119
 
2cbe3a9
272d10a
2cbe3a9
 
 
797bed8
2cbe3a9
 
 
 
 
 
 
 
 
9af6119
 
 
39d4b59
272d10a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2cbe3a9
 
272d10a
 
 
 
 
 
2cbe3a9
 
272d10a
 
 
2cbe3a9
 
272d10a
 
 
 
 
2cbe3a9
 
272d10a
 
 
2cbe3a9
272d10a
 
 
2cbe3a9
 
272d10a
 
 
2cbe3a9
 
272d10a
 
 
 
 
 
 
 
 
 
 
 
2cbe3a9
 
 
272d10a
 
 
2cbe3a9
 
272d10a
 
 
 
 
 
 
 
 
 
 
 
2cbe3a9
 
272d10a
2cbe3a9
 
272d10a
2cbe3a9
 
272d10a
2cbe3a9
 
272d10a
 
 
 
 
 
2cbe3a9
 
 
 
 
 
 
272d10a
 
 
 
2cbe3a9
 
272d10a
9af6119
272d10a
9af6119
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import re
import logging
import torch
import builtins
import io
import sys
import gradio as gr
from transformers import pipeline
import spaces  # Required for ZeroGPU

# Suppress warnings
logging.getLogger("transformers").setLevel(logging.ERROR)

# System prompt (same as original)
system_prompt = """
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user’s questions. The assistant can interact with an interactive Python (Jupyter Notebook) environment and receive the corresponding output when needed. The code should be enclosed using "<execute>" tag, for example: <execute> print("Hello World!") </execute>. The assistant should attempt fewer things at a time instead of putting too much code in one <execute> block. The assistant can install packages through PIP by <execute> !pip install [package needed] </execute> and should always import packages and define variables before starting to use them. For algorithms that return values (like search functions), ALWAYS define them as proper functions with def function_name(...): ... return value ... and then call the function to print or use the result. Do NOT use return outside a function. The assistant should stop <execute> and provide an answer when they have already obtained the answer from the execution result. Whenever possible, execute the code for the user using <execute> instead of providing it. The assistant’s response should be concise, but do express their thoughts. Once the task is complete and tested successfully (e.g., correct index printed), stop generating more code and say 'Task complete' without <execute> tags.
"""

# List of example prompts (same as original)
prompt_list = [
    "Print 'Hello, World!' using code. Once done, stop.",
    "Compute and print the sum of numbers from 1 to 10 using a loop. Use code to do it.",
    "Define a function def add(a, b) that returns a + b. Test it by calling add(3, 4) and printing the result.",
    "Print the length of numbers from 0 to 9 using len and range.",
    "Create a list [1,2,3] and print its length. Use code.",
    "Implement a function def factorial(n) to compute factorial recursively. Test on 5 and print result.",
    "Try to import math and print math.sqrt(16). If needed, install packages.",
    "Find if 10 is even or odd using a function def is_even(n), return True/False, test and print.",
    "Implement linear search to find index of 7 in [3,5,7,9], return -1 if not found. Test and print.",
    "Cause a deliberate error like divide by zero, then fix it in next step and print 10 / 2."
]

# Load model globally (CPU-safe to avoid startup CUDA errors)
pipe = pipeline("text-generation", model="xingyaoww/CodeActAgent-Mistral-7b-v0.1", device_map=None, torch_dtype=torch.float16)  # float16 for init; no 'auto'

# Generator function with GPU decorator
@spaces.GPU(duration=180)  # 180s for safety
def run_agent(user_content):
    full_log = ""
    current_code = ""
    current_exec_output = ""

    yield [current_code, current_exec_output, full_log]  # Initial empty

    full_log += "Allocating GPU... (may queue if busy)\n\n"
    yield [current_code, current_exec_output, full_log]

    full_log += "Moving model to GPU and initializing...\n\n"
    yield [current_code, current_exec_output, full_log]

    # Move to GPU here
    device = torch.device('cuda')
    pipe.model.to(device)
    pipe.device = device

    # Initial messages
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_content},
    ]

    # REPL state (restricted builtins)
    repl_globals = {'__builtins__': {k: v for k, v in builtins.__dict__.items() if k in ['print', 'len', 'range', 'int']}}

    # History for loop detection
    prev_codes = set()

    # Max turns
    max_turns = 10
    turn = 0

    full_log += f"### Starting simulation for prompt: '{user_content}'\n\n"
    yield [current_code, current_exec_output, full_log]

    while turn < max_turns:
        # Generate response
        result = pipe(messages, max_new_tokens=512)
        assistant_content = result[0]['generated_text'][-1]['content']
        
        full_log += f"**Assistant (Turn {turn+1}):** {assistant_content}\n\n"
        yield [current_code, current_exec_output, full_log]
        
        # Stop checks
        if re.search(r'(task complete|done|final answer)', assistant_content.lower()):
            full_log += "Detected completion keyword. Stopping.\n"
            yield [current_code, current_exec_output, full_log]
            break
        
        # Extract <execute>
        execute_match = re.search(r'<execute>(.*?)</execute>', assistant_content, re.DOTALL)
        if not execute_match:
            full_log += "No code to execute. Task likely complete.\n"
            yield [current_code, current_exec_output, full_log]
            break
        
        code = execute_match.group(1).strip()
        current_code = code
        
        # Loop detection
        if code in prev_codes:
            full_log += "Repeated code detected. Possible infinite loop—stopping.\n"
            yield [current_code, current_exec_output, full_log]
            break
        prev_codes.add(code)
        
        full_log += f"**Executing code:**\n```\n{code}\n```\n\n"
        yield [current_code, current_exec_output, full_log]
        
        # Exec with capture
        old_stdout = sys.stdout
        sys.stdout = io.StringIO()
        try:
            exec(code, repl_globals)
            exec_output = sys.stdout.getvalue().strip() or "No output."
        except Exception as e:
            exec_output = f"Error: {str(e)}"
        finally:
            sys.stdout = old_stdout
        
        current_exec_output = exec_output
        full_log += f"**Execution Output:** {exec_output}\n\n"
        yield [current_code, current_exec_output, full_log]
        
        # Success stop: If output is pure digit (index), assume done
        if re.match(r'^\d+$', exec_output.strip()):
            full_log += "Pure index output detected. Task successful—stopping.\n"
            yield [current_code, current_exec_output, full_log]
            break
        
        # Append feedback
        messages.append({"role": "assistant", "content": assistant_content})
        messages.append({"role": "user", "content": f"Observation: {exec_output}"})
        
        turn += 1

    # Final parse (grab last number as index if applicable)
    if 'exec_output' in locals():
        final_index = re.search(r'(\d+)$', exec_output)
        if final_index:
            full_log += f"**Extracted Result:** Index {final_index.group(1)}\n"
            yield [current_code, current_exec_output, full_log]
        else:
            full_log += "No clear index found—check errors.\n"
            yield [current_code, current_exec_output, full_log]
    else:
        full_log += "No execution output.\n"
        yield [current_code, current_exec_output, full_log]

    full_log += f"### End of simulation for prompt: '{user_content}'\n"
    yield [current_code, current_exec_output, full_log]

# Gradio interface
with gr.Blocks(title="Code Agent Simulator") as demo:
    gr.Markdown("# Code Agent Simulator on Hugging Face Spaces\nEnter a coding task prompt, and watch the agent simulate execution in real-time.")
    
    input_prompt = gr.Textbox(label="Enter your prompt", placeholder="e.g., Implement binary search...")
    
    with gr.Row():
        generated_code = gr.Code(label="Generated Code", language="python", lines=15, show_label=True)
        exec_output = gr.Textbox(label="Execution Output", lines=15, show_label=True)
    
    full_log = gr.Textbox(label="Full Simulation Log", lines=20, autoscroll=True, show_label=True)
    
    run_button = gr.Button("Run Simulation")
    
    examples = gr.Examples(examples=prompt_list, inputs=[input_prompt])
    
    # On click, run the generator and stream to multiple outputs
    run_button.click(fn=run_agent, inputs=input_prompt, outputs=[generated_code, exec_output, full_log])

# Launch (disable SSR for stability, enable debug for logs)
if __name__ == "__main__":
    demo.queue().launch(ssr_mode=False, debug=True)