Spaces:
Running
Running
File size: 4,225 Bytes
1df4c13 8055678 be0239b 711a69b be0239b 711a69b be0239b 711a69b 8055678 711a69b 8055678 711a69b 8055678 be0239b 711a69b 1df4c13 c797bf2 711a69b e0290d0 711a69b e0290d0 711a69b 8055678 711a69b e0290d0 711a69b be0239b e0290d0 711a69b be0239b 711a69b be0239b 711a69b 8055678 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import os
import sys
import csv
import json
import pandas as pd
from typing import Dict, Union
from config.model_metadata import MODELS
def get_headers(reader, agg=False) -> Union[list, list]:
metrics, benchs = [], []
for i, row in enumerate(reader):
if i == 0:
metrics = row[1:]
elif i == 1 and not agg:
benchs = row[1:]
break
else:
return metrics
return metrics, benchs
def get_model_metadata(model_key: str) -> tuple[str, float, str, str, str]:
try:
model_metadata = MODELS[model_key]
except KeyError:
raise KeyError(f"Unknown model: {model_key}")
return (
model_metadata.url,
model_metadata.params,
model_metadata.model_type,
model_metadata.release,
model_metadata.model_arch,
)
def parse_results(csv_path: str) -> list[dict]:
"""
Each row has the following format:
MODEL | BENCHMARK | TASK | METRIC | RESULT
"""
dataset = []
models = []
with open(csv_path, newline="") as csvfile:
reader = csv.reader(csvfile, delimiter=",")
metrics, benchs = get_headers(reader)
for i, row in enumerate(reader):
if not row or all(not cell.strip() for cell in row):
continue
model = row[0]
if not model:
continue
url, params, type, release, reasoning = get_model_metadata(model)
models.append(model)
row = row[1:]
ctr = 0
for metric, bench in zip(metrics, benchs):
if metric == "EM":
metric = "Exact Matching (EM)"
record = {}
record["Model"] = model
record["Model Type"] = type
record["Benchmark"] = bench
record["Task"] = metric
record["Result"] = float(row[ctr].replace(",", "."))
record["Model URL"] = url
record["Params"] = params
record["Release"] = release
record["Thinking"] = reasoning
dataset.append(record)
ctr += 1
return dataset
def parse_agg(csv_path: str = "results/aggregated_scores_icarus.csv") -> pd.DataFrame:
"""
Each row has the following format:
MODEL | BENCHMARK | TASK | METRIC | RESULT
"""
return pd.read_csv(csv_path)
def writeJson(data: list, path: str):
with open(path, "w") as f:
json.dump(data, f, indent=4, ensure_ascii=False)
print("Done")
def read_json(json_path: str = "results/results_icarus.json"):
with open(json_path, "r", encoding="utf-8") as file:
data = json.load(file)
return data
def read_dataframe(json_path: str) -> pd.DataFrame:
data = read_json(json_path)
df = pd.DataFrame(data)
df.rename(
columns={
"Model": "Model",
"Benchmark": "Benchmark",
"Task": "Metric",
"Result": "Score",
"EM": "Exact Matching (EM)",
},
inplace=True,
)
df["Params"] = pd.to_numeric(df["Params"], errors="coerce")
return df
def get_metadata(df: pd.DataFrame) -> tuple[list, list, str]:
benchmarks = sorted(df["Benchmark"].unique().tolist(), reverse=True)
metrics = df["Metric"].unique().tolist()
default_metric = "Functionality (FNC)" if "Functionality (FNC)" in metrics else metrics[0]
return benchmarks, metrics, default_metric
def read_data(
json_path: str = "results/results_icarus.json",
) -> tuple[pd.DataFrame, list, list, str]:
df = read_dataframe(json_path)
benchmarks, metrics, default_metric = get_metadata(df)
return df, benchmarks, metrics, default_metric
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python results/parse.py <path_to_input_csv>")
sys.exit(1)
csv_path = sys.argv[1]
if not os.path.exists(csv_path):
print(f"Error: File not found at {csv_path}")
sys.exit(1)
json_path = os.path.splitext(csv_path)[0] + ".json"
print(f"Parsing {csv_path}...")
parsed_data = parse_results(csv_path)
writeJson(parsed_data, json_path)
|