Spaces:
Sleeping
Sleeping
output formatting
Browse files- evaluation_logic.py +4 -10
evaluation_logic.py
CHANGED
|
@@ -166,23 +166,17 @@ def run_evaluation(inference_api, model_name, prompt_format="duckdbinstgranitesh
|
|
| 166 |
if metrics:
|
| 167 |
yield "Overall Results:"
|
| 168 |
overall_metrics = metrics['exec']['all']
|
| 169 |
-
yield f"
|
| 170 |
-
yield f"
|
| 171 |
-
yield f"Exact Match Accuracy: {overall_metrics['exact']:.3f}"
|
| 172 |
-
yield f"Equality: {metrics['equality']['equality']:.3f}"
|
| 173 |
-
yield f"Edit Distance: {metrics['edit_distance']['edit_distance']:.3f}"
|
| 174 |
|
| 175 |
-
yield "\nResults by Category:"
|
| 176 |
categories = ['easy', 'medium', 'hard', 'duckdb', 'ddl', 'all']
|
| 177 |
|
| 178 |
for category in categories:
|
| 179 |
if category in metrics['exec']:
|
| 180 |
-
yield f"\n{category}:"
|
| 181 |
category_metrics = metrics['exec'][category]
|
| 182 |
-
yield f"
|
| 183 |
-
yield f"Execution Accuracy: {category_metrics['exec']:.3f}"
|
| 184 |
else:
|
| 185 |
-
yield f"
|
| 186 |
else:
|
| 187 |
yield "No evaluation metrics returned."
|
| 188 |
except Exception as e:
|
|
|
|
| 166 |
if metrics:
|
| 167 |
yield "Overall Results:"
|
| 168 |
overall_metrics = metrics['exec']['all']
|
| 169 |
+
yield f"All (n={overall_metrics['count']}) - Execution Accuracy: {overall_metrics['exec']:.3f}"
|
| 170 |
+
yield f"All (n={overall_metrics['count']}) - Edit Distance: {metrics['edit_distance']['edit_distance']:.3f}"
|
|
|
|
|
|
|
|
|
|
| 171 |
|
|
|
|
| 172 |
categories = ['easy', 'medium', 'hard', 'duckdb', 'ddl', 'all']
|
| 173 |
|
| 174 |
for category in categories:
|
| 175 |
if category in metrics['exec']:
|
|
|
|
| 176 |
category_metrics = metrics['exec'][category]
|
| 177 |
+
yield f"{category} (n={category_metrics['count']}) - Execution Accuracy: {category_metrics['exec']:.3f}"
|
|
|
|
| 178 |
else:
|
| 179 |
+
yield f"{category}: No data available"
|
| 180 |
else:
|
| 181 |
yield "No evaluation metrics returned."
|
| 182 |
except Exception as e:
|