File size: 4,225 Bytes
1df4c13
8055678
be0239b
 
711a69b
 
be0239b
 
711a69b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be0239b
 
 
 
 
 
 
 
 
 
 
 
 
711a69b
 
 
 
 
 
 
 
 
8055678
711a69b
 
 
8055678
 
711a69b
8055678
 
be0239b
711a69b
 
 
 
 
 
 
 
 
 
 
 
 
 
1df4c13
c797bf2
711a69b
 
 
 
 
e0290d0
711a69b
 
 
 
e0290d0
711a69b
 
8055678
 
711a69b
 
 
 
e0290d0
711a69b
 
 
 
 
be0239b
e0290d0
711a69b
 
 
 
 
 
 
 
 
 
 
 
be0239b
 
 
 
711a69b
 
be0239b
 
 
 
 
 
 
 
 
711a69b
 
 
 
8055678
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os
import sys
import csv
import json
import pandas as pd

from typing import Dict, Union
from config.model_metadata import MODELS


def get_headers(reader, agg=False) -> Union[list, list]:
    metrics, benchs = [], []
    for i, row in enumerate(reader):
        if i == 0:
            metrics = row[1:]
        elif i == 1 and not agg:
            benchs = row[1:]
            break
        else:
            return metrics
    return metrics, benchs


def get_model_metadata(model_key: str) -> tuple[str, float, str, str, str]:
    try:
        model_metadata = MODELS[model_key]
    except KeyError:
        raise KeyError(f"Unknown model: {model_key}")

    return (
        model_metadata.url,
        model_metadata.params,
        model_metadata.model_type,
        model_metadata.release,
        model_metadata.model_arch,
    )


def parse_results(csv_path: str) -> list[dict]:
    """
    Each row has the following format:
        MODEL | BENCHMARK | TASK | METRIC | RESULT
    """
    dataset = []
    models = []
    with open(csv_path, newline="") as csvfile:
        reader = csv.reader(csvfile, delimiter=",")
        metrics, benchs = get_headers(reader)
        for i, row in enumerate(reader):
            if not row or all(not cell.strip() for cell in row):
                continue
            model = row[0]
            if not model:
                continue
            url, params, type, release, reasoning = get_model_metadata(model)
            models.append(model)
            row = row[1:]
            ctr = 0
            for metric, bench in zip(metrics, benchs):
                if metric == "EM":
                    metric = "Exact Matching (EM)"
                record = {}
                record["Model"] = model
                record["Model Type"] = type
                record["Benchmark"] = bench
                record["Task"] = metric
                record["Result"] = float(row[ctr].replace(",", "."))
                record["Model URL"] = url
                record["Params"] = params
                record["Release"] = release
                record["Thinking"] = reasoning
                dataset.append(record)
                ctr += 1
    return dataset


def parse_agg(csv_path: str = "results/aggregated_scores_icarus.csv") -> pd.DataFrame:
    """
    Each row has the following format:
        MODEL | BENCHMARK | TASK | METRIC | RESULT
    """
    return pd.read_csv(csv_path)


def writeJson(data: list, path: str):
    with open(path, "w") as f:
        json.dump(data, f, indent=4, ensure_ascii=False)
    print("Done")


def read_json(json_path: str = "results/results_icarus.json"):
    with open(json_path, "r", encoding="utf-8") as file:
        data = json.load(file)
    return data


def read_dataframe(json_path: str) -> pd.DataFrame:
    data = read_json(json_path)
    df = pd.DataFrame(data)
    df.rename(
        columns={
            "Model": "Model",
            "Benchmark": "Benchmark",
            "Task": "Metric",
            "Result": "Score",
            "EM": "Exact Matching (EM)",
        },
        inplace=True,
    )
    df["Params"] = pd.to_numeric(df["Params"], errors="coerce")
    return df


def get_metadata(df: pd.DataFrame) -> tuple[list, list, str]:
    benchmarks = sorted(df["Benchmark"].unique().tolist(), reverse=True)
    metrics = df["Metric"].unique().tolist()
    default_metric = "Functionality (FNC)" if "Functionality (FNC)" in metrics else metrics[0]
    return benchmarks, metrics, default_metric


def read_data(
    json_path: str = "results/results_icarus.json",
) -> tuple[pd.DataFrame, list, list, str]:
    df = read_dataframe(json_path)
    benchmarks, metrics, default_metric = get_metadata(df)
    return df, benchmarks, metrics, default_metric


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python results/parse.py <path_to_input_csv>")
        sys.exit(1)

    csv_path = sys.argv[1]

    if not os.path.exists(csv_path):
        print(f"Error: File not found at {csv_path}")
        sys.exit(1)

    json_path = os.path.splitext(csv_path)[0] + ".json"

    print(f"Parsing {csv_path}...")
    parsed_data = parse_results(csv_path)
    writeJson(parsed_data, json_path)