Update app.py
Browse files
app.py
CHANGED
|
@@ -155,8 +155,7 @@ def evaluate(input_data):
|
|
| 155 |
item = future_to_item[future]
|
| 156 |
try:
|
| 157 |
result = future.result()
|
| 158 |
-
|
| 159 |
-
results.append(item)
|
| 160 |
except Exception as e:
|
| 161 |
item.update({"status": "Exception", "error": str(e)})
|
| 162 |
results.append(item)
|
|
@@ -180,26 +179,39 @@ def evaluate_single_case(input_data):
|
|
| 180 |
# Use a retry mechanism for all languages for better reliability
|
| 181 |
max_retries = 2 # One retry for all languages
|
| 182 |
|
| 183 |
-
|
| 184 |
for comp in completions:
|
| 185 |
code = input_data.get('prompt') + comp + '\n' + input_data.get('tests')
|
| 186 |
|
| 187 |
-
# Try up to max_retries
|
| 188 |
-
for attempt in range(max_retries
|
| 189 |
result = evaluate_code(code, language)
|
| 190 |
|
| 191 |
# If success or last attempt, return/record the result
|
| 192 |
-
if result["status"] == "OK"
|
| 193 |
-
if result["status"] == "OK":
|
| 194 |
-
return result
|
| 195 |
-
results.append(result)
|
| 196 |
break
|
| 197 |
-
|
| 198 |
# For retries, briefly wait to allow resources to stabilize
|
| 199 |
time.sleep(0.3)
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
except Exception as e:
|
| 204 |
return {"status": "Exception", "error": str(e)}
|
| 205 |
|
|
|
|
| 155 |
item = future_to_item[future]
|
| 156 |
try:
|
| 157 |
result = future.result()
|
| 158 |
+
results.append(result)
|
|
|
|
| 159 |
except Exception as e:
|
| 160 |
item.update({"status": "Exception", "error": str(e)})
|
| 161 |
results.append(item)
|
|
|
|
| 179 |
# Use a retry mechanism for all languages for better reliability
|
| 180 |
max_retries = 2 # One retry for all languages
|
| 181 |
|
| 182 |
+
status_list, stderr_list = [], []
|
| 183 |
for comp in completions:
|
| 184 |
code = input_data.get('prompt') + comp + '\n' + input_data.get('tests')
|
| 185 |
|
| 186 |
+
# Try up to max_retries times for all test cases
|
| 187 |
+
for attempt in range(max_retries):
|
| 188 |
result = evaluate_code(code, language)
|
| 189 |
|
| 190 |
# If success or last attempt, return/record the result
|
| 191 |
+
if result["status"] == "OK":
|
|
|
|
|
|
|
|
|
|
| 192 |
break
|
|
|
|
| 193 |
# For retries, briefly wait to allow resources to stabilize
|
| 194 |
time.sleep(0.3)
|
| 195 |
+
|
| 196 |
+
status_list.append(result["status"])
|
| 197 |
+
stderr_list.append(result["stderr"])
|
| 198 |
+
|
| 199 |
+
processed_completions = input_data.pop('processed_completions', [])
|
| 200 |
+
completions = input_data.pop('completions', [])
|
| 201 |
+
|
| 202 |
+
meta_data = [
|
| 203 |
+
{
|
| 204 |
+
'processed_completion': p_comp,
|
| 205 |
+
'completion': comp,
|
| 206 |
+
'status': status,
|
| 207 |
+
'stderr': stderr
|
| 208 |
+
}
|
| 209 |
+
for p_comp, comp, status, stderr in zip(processed_completions, completions, status_list, stderr_list)
|
| 210 |
+
]
|
| 211 |
+
|
| 212 |
+
input_data['meta_data'] = meta_data
|
| 213 |
+
return input_data
|
| 214 |
+
|
| 215 |
except Exception as e:
|
| 216 |
return {"status": "Exception", "error": str(e)}
|
| 217 |
|