Spaces:
Running
Running
| import os | |
| import sys | |
| import csv | |
| import json | |
| import pandas as pd | |
| from typing import Dict, Union | |
| from config.model_metadata import MODELS | |
| def get_headers(reader, agg=False) -> Union[list, list]: | |
| metrics, benchs = [], [] | |
| for i, row in enumerate(reader): | |
| if i == 0: | |
| metrics = row[1:] | |
| elif i == 1 and not agg: | |
| benchs = row[1:] | |
| break | |
| else: | |
| return metrics | |
| return metrics, benchs | |
| def get_model_metadata(model_key: str) -> tuple[str, float, str, str, str]: | |
| try: | |
| model_metadata = MODELS[model_key] | |
| except KeyError: | |
| raise KeyError(f"Unknown model: {model_key}") | |
| return ( | |
| model_metadata.url, | |
| model_metadata.params, | |
| model_metadata.model_type, | |
| model_metadata.release, | |
| model_metadata.model_arch, | |
| ) | |
| def parse_results(csv_path: str) -> list[dict]: | |
| """ | |
| Each row has the following format: | |
| MODEL | BENCHMARK | TASK | METRIC | RESULT | |
| """ | |
| dataset = [] | |
| models = [] | |
| with open(csv_path, newline="") as csvfile: | |
| reader = csv.reader(csvfile, delimiter=",") | |
| metrics, benchs = get_headers(reader) | |
| for i, row in enumerate(reader): | |
| if not row or all(not cell.strip() for cell in row): | |
| continue | |
| model = row[0] | |
| if not model: | |
| continue | |
| url, params, type, release, reasoning = get_model_metadata(model) | |
| models.append(model) | |
| row = row[1:] | |
| ctr = 0 | |
| for metric, bench in zip(metrics, benchs): | |
| if metric == "EM": | |
| metric = "Exact Matching (EM)" | |
| record = {} | |
| record["Model"] = model | |
| record["Model Type"] = type | |
| record["Benchmark"] = bench | |
| record["Task"] = metric | |
| record["Result"] = float(row[ctr].replace(",", ".")) | |
| record["Model URL"] = url | |
| record["Params"] = params | |
| record["Release"] = release | |
| record["Thinking"] = reasoning | |
| dataset.append(record) | |
| ctr += 1 | |
| return dataset | |
| def parse_agg(csv_path: str = "results/aggregated_scores_icarus.csv") -> pd.DataFrame: | |
| """ | |
| Each row has the following format: | |
| MODEL | BENCHMARK | TASK | METRIC | RESULT | |
| """ | |
| return pd.read_csv(csv_path) | |
| def writeJson(data: list, path: str): | |
| with open(path, "w") as f: | |
| json.dump(data, f, indent=4, ensure_ascii=False) | |
| print("Done") | |
| def read_json(json_path: str = "results/results_icarus.json"): | |
| with open(json_path, "r", encoding="utf-8") as file: | |
| data = json.load(file) | |
| return data | |
| def read_dataframe(json_path: str) -> pd.DataFrame: | |
| data = read_json(json_path) | |
| df = pd.DataFrame(data) | |
| df.rename( | |
| columns={ | |
| "Model": "Model", | |
| "Benchmark": "Benchmark", | |
| "Task": "Metric", | |
| "Result": "Score", | |
| "EM": "Exact Matching (EM)", | |
| }, | |
| inplace=True, | |
| ) | |
| df["Params"] = pd.to_numeric(df["Params"], errors="coerce") | |
| return df | |
| def get_metadata(df: pd.DataFrame) -> tuple[list, list, str]: | |
| benchmarks = sorted(df["Benchmark"].unique().tolist(), reverse=True) | |
| metrics = df["Metric"].unique().tolist() | |
| default_metric = "Functionality (FNC)" if "Functionality (FNC)" in metrics else metrics[0] | |
| return benchmarks, metrics, default_metric | |
| def read_data( | |
| json_path: str = "results/results_icarus.json", | |
| ) -> tuple[pd.DataFrame, list, list, str]: | |
| df = read_dataframe(json_path) | |
| benchmarks, metrics, default_metric = get_metadata(df) | |
| return df, benchmarks, metrics, default_metric | |
| if __name__ == "__main__": | |
| if len(sys.argv) < 2: | |
| print("Usage: python results/parse.py <path_to_input_csv>") | |
| sys.exit(1) | |
| csv_path = sys.argv[1] | |
| if not os.path.exists(csv_path): | |
| print(f"Error: File not found at {csv_path}") | |
| sys.exit(1) | |
| json_path = os.path.splitext(csv_path)[0] + ".json" | |
| print(f"Parsing {csv_path}...") | |
| parsed_data = parse_results(csv_path) | |
| writeJson(parsed_data, json_path) | |