Spaces:
Sleeping
Sleeping
save just the eval main results
Browse files- evaluation_logic.py +20 -1
evaluation_logic.py
CHANGED
|
@@ -57,15 +57,34 @@ def save_prediction(inference_api, model_name, prompt_format, question, generate
|
|
| 57 |
def save_evaluation(inference_api, model_name, prompt_format, metrics):
|
| 58 |
evaluation_file = evaluation_folder / f"evaluation_{file_uuid}.json"
|
| 59 |
evaluation_folder.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
with evaluation_scheduler.lock:
|
| 61 |
with evaluation_file.open("a") as f:
|
| 62 |
json.dump({
|
| 63 |
"inference_api": inference_api,
|
| 64 |
"model_name": model_name,
|
| 65 |
"prompt_format": prompt_format,
|
| 66 |
-
"
|
| 67 |
"timestamp": datetime.now().isoformat()
|
| 68 |
}, f)
|
|
|
|
| 69 |
|
| 70 |
def run_prediction(inference_api, model_name, prompt_format, output_file):
|
| 71 |
dataset_path = str(eval_dir / "data/dev.json")
|
|
|
|
| 57 |
def save_evaluation(inference_api, model_name, prompt_format, metrics):
|
| 58 |
evaluation_file = evaluation_folder / f"evaluation_{file_uuid}.json"
|
| 59 |
evaluation_folder.mkdir(parents=True, exist_ok=True)
|
| 60 |
+
|
| 61 |
+
# Extract only the category-specific execution metrics
|
| 62 |
+
categories = ['easy', 'medium', 'hard', 'duckdb', 'ddl', 'all']
|
| 63 |
+
simplified_metrics = {}
|
| 64 |
+
|
| 65 |
+
for category in categories:
|
| 66 |
+
if category in metrics['exec']:
|
| 67 |
+
category_metrics = metrics['exec'][category]
|
| 68 |
+
simplified_metrics[category] = {
|
| 69 |
+
'count': category_metrics['count'],
|
| 70 |
+
'execution_accuracy': category_metrics['exec']
|
| 71 |
+
}
|
| 72 |
+
else:
|
| 73 |
+
simplified_metrics[category] = {
|
| 74 |
+
'count': 0,
|
| 75 |
+
'execution_accuracy': 0.0
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
with evaluation_scheduler.lock:
|
| 79 |
with evaluation_file.open("a") as f:
|
| 80 |
json.dump({
|
| 81 |
"inference_api": inference_api,
|
| 82 |
"model_name": model_name,
|
| 83 |
"prompt_format": prompt_format,
|
| 84 |
+
"category_metrics": simplified_metrics,
|
| 85 |
"timestamp": datetime.now().isoformat()
|
| 86 |
}, f)
|
| 87 |
+
f.write('\n')
|
| 88 |
|
| 89 |
def run_prediction(inference_api, model_name, prompt_format, output_file):
|
| 90 |
dataset_path = str(eval_dir / "data/dev.json")
|