Add metrics computation for CMG task
Browse files- app.py +1 -1
- requirements.txt +5 -1
- src/__init__.py +0 -0
- src/evaluation/__init__.py +3 -0
- src/evaluation/base_task_metrics.py +17 -0
- src/evaluation/commit_message_generation/__init__.py +3 -0
- src/evaluation/commit_message_generation/cmg_metrics.py +53 -0
- src/evaluation/metrics.py +13 -0
- src/formatting.py +12 -0
- src/get_results_for_task.py +1 -3
- src/submission_uploader.py +171 -33
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
-
import gradio as gr
|
| 4 |
|
| 5 |
from src.content import (INTRODUCTION_TEXT, INTRODUCTION_TITLE,
|
| 6 |
LEADERBOARD_TEXT, LEADERBOARD_TITLE,
|
|
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
+
import gradio as gr # type: ignore[import]
|
| 4 |
|
| 5 |
from src.content import (INTRODUCTION_TEXT, INTRODUCTION_TITLE,
|
| 6 |
LEADERBOARD_TEXT, LEADERBOARD_TITLE,
|
requirements.txt
CHANGED
|
@@ -1 +1,5 @@
|
|
| 1 |
-
huggingface_hub
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
huggingface_hub
|
| 2 |
+
jsonlines
|
| 3 |
+
pandas
|
| 4 |
+
tqdm
|
| 5 |
+
evaluate
|
src/__init__.py
ADDED
|
File without changes
|
src/evaluation/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .metrics import METRICS
|
| 2 |
+
|
| 3 |
+
__all__ = ["METRICS"]
|
src/evaluation/base_task_metrics.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import ABC, abstractmethod
|
| 2 |
+
from typing import Dict, List
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class BaseTaskMetrics(ABC):
|
| 6 |
+
def reset(self):
|
| 7 |
+
pass
|
| 8 |
+
|
| 9 |
+
@abstractmethod
|
| 10 |
+
def add_batch(
|
| 11 |
+
self, predictions: List[str], references: List[str], *args, **kwargs
|
| 12 |
+
) -> None:
|
| 13 |
+
pass
|
| 14 |
+
|
| 15 |
+
@abstractmethod
|
| 16 |
+
def compute(self, *args, **kwargs) -> Dict[str, float]:
|
| 17 |
+
pass
|
src/evaluation/commit_message_generation/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .cmg_metrics import CMGMetrics
|
| 2 |
+
|
| 3 |
+
__all__ = ["CMGMetrics"]
|
src/evaluation/commit_message_generation/cmg_metrics.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, List
|
| 2 |
+
|
| 3 |
+
import evaluate # type: ignore[import]
|
| 4 |
+
|
| 5 |
+
from ..base_task_metrics import BaseTaskMetrics
|
| 6 |
+
from .b_norm import BNorm
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class CMGMetrics(BaseTaskMetrics):
|
| 10 |
+
def __init__(self):
|
| 11 |
+
self.bnorm = BNorm()
|
| 12 |
+
self.bleu = evaluate.load("sacrebleu")
|
| 13 |
+
self.chrf = evaluate.load("chrf")
|
| 14 |
+
self.rouge = evaluate.load("rouge")
|
| 15 |
+
self.bertscore = evaluate.load("bertscore")
|
| 16 |
+
self.bertscore_normalized = evaluate.load("bertscore")
|
| 17 |
+
|
| 18 |
+
def reset(self):
|
| 19 |
+
self.bnorm.reset()
|
| 20 |
+
|
| 21 |
+
def update(
|
| 22 |
+
self, predictions: List[str], references: List[str], *args, **kwargs
|
| 23 |
+
) -> None:
|
| 24 |
+
self.bnorm.update(predictions=predictions, references=references)
|
| 25 |
+
self.bleu.add_batch(
|
| 26 |
+
predictions=predictions, references=[[ref] for ref in references]
|
| 27 |
+
)
|
| 28 |
+
self.chrf.add_batch(
|
| 29 |
+
predictions=predictions, references=[[ref] for ref in references]
|
| 30 |
+
)
|
| 31 |
+
self.rouge.add_batch(predictions=predictions, references=references)
|
| 32 |
+
self.bertscore.add_batch(predictions=predictions, references=references)
|
| 33 |
+
self.bertscore_normalized.add_batch(
|
| 34 |
+
predictions=predictions, references=references
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
def compute(self, *args, **kwargs) -> Dict[str, float]:
|
| 38 |
+
rouge = self.rouge.compute()
|
| 39 |
+
bertscore = self.bertscore.compute(lang="en")
|
| 40 |
+
bertscore_normalized = self.bertscore_normalized.compute(
|
| 41 |
+
lang="en", rescale_with_baseline=True
|
| 42 |
+
)
|
| 43 |
+
return {
|
| 44 |
+
"bnorm": self.bnorm.compute(),
|
| 45 |
+
"bleu": self.bleu.compute(tokenize="13a")["score"],
|
| 46 |
+
"chrf": self.chrf.compute()["score"],
|
| 47 |
+
"rouge1": rouge["rouge1"] * 100,
|
| 48 |
+
"rouge2": rouge["rouge2"] * 100,
|
| 49 |
+
"rougeL": rouge["rougeL"] * 100,
|
| 50 |
+
"bertscore": sum(bertscore["f1"]) / len(bertscore["f1"]),
|
| 51 |
+
"bertscore_normalized": sum(bertscore_normalized["f1"])
|
| 52 |
+
/ len(bertscore_normalized["f1"]),
|
| 53 |
+
}
|
src/evaluation/metrics.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, Optional
|
| 2 |
+
|
| 3 |
+
from .base_task_metrics import BaseTaskMetrics
|
| 4 |
+
from .commit_message_generation import CMGMetrics
|
| 5 |
+
|
| 6 |
+
METRICS: Dict[str, Optional[BaseTaskMetrics]] = {
|
| 7 |
+
"commit_message_generation": CMGMetrics(),
|
| 8 |
+
"bug_localization": None,
|
| 9 |
+
"module_to_text": None,
|
| 10 |
+
"library_usage": None,
|
| 11 |
+
"project_code_completion": None,
|
| 12 |
+
"bug_localization_build_logs": None,
|
| 13 |
+
}
|
src/formatting.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def styled_error(error):
|
| 2 |
+
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def styled_warning(warn):
|
| 6 |
+
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{warn}</p>"
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def styled_message(message):
|
| 10 |
+
return (
|
| 11 |
+
f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
|
| 12 |
+
)
|
src/get_results_for_task.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
|
| 3 |
-
RESULTS_DATASET = "JetBrains-Research/lca-results"
|
| 4 |
|
| 5 |
|
| 6 |
def get_results_for_task_stub(task: str) -> pd.DataFrame:
|
|
|
|
| 1 |
+
import pandas as pd # type: ignore[import]
|
|
|
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
def get_results_for_task_stub(task: str) -> pd.DataFrame:
|
src/submission_uploader.py
CHANGED
|
@@ -1,9 +1,16 @@
|
|
| 1 |
import json
|
|
|
|
| 2 |
import os
|
| 3 |
-
from
|
|
|
|
| 4 |
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
| 6 |
|
|
|
|
|
|
|
| 7 |
from .tasks import TASKS_PRETTY_REVERSE
|
| 8 |
|
| 9 |
|
|
@@ -39,19 +46,30 @@ class SubmissionUploader:
|
|
| 39 |
and discussion.title == pr_title
|
| 40 |
):
|
| 41 |
return discussion
|
|
|
|
| 42 |
|
| 43 |
-
def
|
| 44 |
self,
|
| 45 |
-
task_id: str,
|
| 46 |
-
model_folder: str,
|
| 47 |
model_name_pretty: str,
|
| 48 |
model_availability: str,
|
| 49 |
urls: str,
|
| 50 |
context_size: str,
|
| 51 |
submitted_by: str,
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
) -> List[CommitOperationAdd]:
|
| 54 |
-
# add predictions files
|
| 55 |
commit_operations = [
|
| 56 |
CommitOperationAdd(
|
| 57 |
path_in_repo=f"{task_id}/predictions/{model_folder}/{os.path.basename(filename)}",
|
|
@@ -59,25 +77,114 @@ class SubmissionUploader:
|
|
| 59 |
)
|
| 60 |
for filename in filenames
|
| 61 |
]
|
|
|
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
}
|
| 71 |
-
with open("
|
| 72 |
-
json.dump(
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
CommitOperationAdd(
|
| 75 |
-
path_in_repo=f"{task_id}/
|
| 76 |
-
path_or_fileobj="
|
| 77 |
)
|
| 78 |
-
|
| 79 |
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
def upload_files(
|
| 83 |
self,
|
|
@@ -92,10 +199,21 @@ class SubmissionUploader:
|
|
| 92 |
force: bool = False,
|
| 93 |
) -> str:
|
| 94 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
pr_title = f"π New submission to {task_pretty} task: {model_name_pretty} with {context_size} context size from {submitted_by}"
|
| 96 |
|
| 97 |
task_id = TASKS_PRETTY_REVERSE[task_pretty]
|
| 98 |
|
|
|
|
| 99 |
if not force:
|
| 100 |
if model_name_pretty in self._fs.ls(
|
| 101 |
f"datasets/{self._dataset_id}/{task_id}/predictions"
|
|
@@ -106,29 +224,46 @@ class SubmissionUploader:
|
|
| 106 |
)
|
| 107 |
for filename in filenames + ["metadata.json"]
|
| 108 |
):
|
| 109 |
-
return (
|
| 110 |
f"{model_name_pretty} is already present in {self._dataset_id}."
|
| 111 |
)
|
| 112 |
|
| 113 |
prev_pr = self._get_previous_pr(pr_title)
|
| 114 |
if prev_pr is not None:
|
| 115 |
url = f"https://huggingface.co/datasets/{self._dataset_id}/discussions/{prev_pr.num}"
|
| 116 |
-
return
|
|
|
|
|
|
|
| 117 |
|
| 118 |
-
|
|
|
|
| 119 |
task_id=task_id,
|
| 120 |
model_folder=model_folder,
|
| 121 |
-
model_name_pretty=model_name_pretty,
|
| 122 |
-
model_availability=model_availability,
|
| 123 |
-
urls=urls,
|
| 124 |
-
context_size=context_size,
|
| 125 |
-
submitted_by=submitted_by,
|
| 126 |
filenames=filenames,
|
| 127 |
)
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
new_pr = self._api.create_commit(
|
| 130 |
repo_id=self._dataset_id,
|
| 131 |
-
operations=
|
| 132 |
commit_message=pr_title,
|
| 133 |
commit_description=f"""New submission to {task_pretty} task in ποΈ Long Code Arena benchmark!
|
| 134 |
|
|
@@ -141,7 +276,10 @@ class SubmissionUploader:
|
|
| 141 |
create_pr=True,
|
| 142 |
repo_type="dataset",
|
| 143 |
)
|
| 144 |
-
return f"π PR created at {new_pr.pr_url}."
|
| 145 |
|
| 146 |
-
except Exception:
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import json
|
| 2 |
+
import logging
|
| 3 |
import os
|
| 4 |
+
from tempfile import TemporaryDirectory
|
| 5 |
+
from typing import Dict, List, Optional
|
| 6 |
|
| 7 |
+
import jsonlines
|
| 8 |
+
from huggingface_hub import CommitOperationAdd # type: ignore[import]
|
| 9 |
+
from huggingface_hub import Discussion, HfApi, HfFileSystem
|
| 10 |
+
from tqdm import tqdm
|
| 11 |
|
| 12 |
+
from .evaluation import METRICS
|
| 13 |
+
from .formatting import styled_error, styled_message, styled_warning
|
| 14 |
from .tasks import TASKS_PRETTY_REVERSE
|
| 15 |
|
| 16 |
|
|
|
|
| 46 |
and discussion.title == pr_title
|
| 47 |
):
|
| 48 |
return discussion
|
| 49 |
+
return None
|
| 50 |
|
| 51 |
+
def _get_metadata(
|
| 52 |
self,
|
|
|
|
|
|
|
| 53 |
model_name_pretty: str,
|
| 54 |
model_availability: str,
|
| 55 |
urls: str,
|
| 56 |
context_size: str,
|
| 57 |
submitted_by: str,
|
| 58 |
+
) -> Dict[str, str]:
|
| 59 |
+
return {
|
| 60 |
+
"model_name": model_name_pretty,
|
| 61 |
+
"model_availability": model_availability,
|
| 62 |
+
"urls": urls,
|
| 63 |
+
"context_size": context_size,
|
| 64 |
+
"submitted_by": submitted_by,
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
def _upload_predictions(
|
| 68 |
+
self,
|
| 69 |
+
task_id: str,
|
| 70 |
+
model_folder: str,
|
| 71 |
+
filenames: List[str],
|
| 72 |
) -> List[CommitOperationAdd]:
|
|
|
|
| 73 |
commit_operations = [
|
| 74 |
CommitOperationAdd(
|
| 75 |
path_in_repo=f"{task_id}/predictions/{model_folder}/{os.path.basename(filename)}",
|
|
|
|
| 77 |
)
|
| 78 |
for filename in filenames
|
| 79 |
]
|
| 80 |
+
return commit_operations
|
| 81 |
|
| 82 |
+
def _compute_metrics_for_predictions(
|
| 83 |
+
self, task_id: str, filenames: Optional[List[str]], temp_directory: str
|
| 84 |
+
) -> None:
|
| 85 |
+
metrics_module = METRICS[task_id]
|
| 86 |
+
assert (
|
| 87 |
+
metrics_module is not None
|
| 88 |
+
), f"Computing metrics for {task_id} is not supported."
|
| 89 |
+
metrics_module.reset()
|
| 90 |
+
open(os.path.join(temp_directory, "metrics.jsonl"), "w").close()
|
| 91 |
+
|
| 92 |
+
# compute the metrics for each submitted file
|
| 93 |
+
for filename in filenames:
|
| 94 |
+
with jsonlines.open(filename, "r") as reader:
|
| 95 |
+
for example in tqdm(
|
| 96 |
+
reader, desc=f"Computing metrics for {os.path.basename(filename)}"
|
| 97 |
+
):
|
| 98 |
+
metrics_module.add_batch(
|
| 99 |
+
predictions=[example["prediction"]],
|
| 100 |
+
references=[example["reference"]],
|
| 101 |
+
)
|
| 102 |
+
computed_metrics = metrics_module.compute()
|
| 103 |
+
metrics_module.reset()
|
| 104 |
+
with jsonlines.open(
|
| 105 |
+
os.path.join(temp_directory, "metrics.jsonl"), "a"
|
| 106 |
+
) as writer:
|
| 107 |
+
writer.write(computed_metrics)
|
| 108 |
+
|
| 109 |
+
# aggregate the metrics over submitted files
|
| 110 |
+
with jsonlines.open(
|
| 111 |
+
os.path.join(temp_directory, "metrics.jsonl"), "r"
|
| 112 |
+
) as reader:
|
| 113 |
+
metrics_results = [line for line in reader]
|
| 114 |
+
final_metrics_results = {
|
| 115 |
+
key: sum(entry[key] for entry in metrics_results) / len(metrics_results)
|
| 116 |
+
for key in metrics_results[0]
|
| 117 |
}
|
| 118 |
+
with open(os.path.join(temp_directory, "final_metrics.json"), "w") as f:
|
| 119 |
+
json.dump(final_metrics_results, f)
|
| 120 |
+
|
| 121 |
+
def _upload_results(
|
| 122 |
+
self,
|
| 123 |
+
task_id: str,
|
| 124 |
+
model_folder: str,
|
| 125 |
+
model_name_pretty: str,
|
| 126 |
+
model_availability: str,
|
| 127 |
+
urls: str,
|
| 128 |
+
context_size: str,
|
| 129 |
+
submitted_by: str,
|
| 130 |
+
temp_directory: str,
|
| 131 |
+
) -> List[CommitOperationAdd]:
|
| 132 |
+
final_results = {}
|
| 133 |
+
with open(os.path.join(temp_directory, "final_metrics.json"), "r") as f:
|
| 134 |
+
metrics = json.load(f)
|
| 135 |
+
final_results.update(metrics)
|
| 136 |
+
metadata_dict = self._get_metadata(
|
| 137 |
+
model_name_pretty=model_name_pretty,
|
| 138 |
+
model_availability=model_availability,
|
| 139 |
+
urls=urls,
|
| 140 |
+
context_size=context_size,
|
| 141 |
+
submitted_by=submitted_by,
|
| 142 |
+
)
|
| 143 |
+
final_results.update(metadata_dict)
|
| 144 |
+
|
| 145 |
+
with jsonlines.open(
|
| 146 |
+
os.path.join(temp_directory, "final_results.jsonl"), "w"
|
| 147 |
+
) as writer:
|
| 148 |
+
writer.write(final_results)
|
| 149 |
+
|
| 150 |
+
return [
|
| 151 |
CommitOperationAdd(
|
| 152 |
+
path_in_repo=f"{task_id}/results/{model_folder}.jsonl",
|
| 153 |
+
path_or_fileobj=os.path.join(temp_directory, "final_results.jsonl"),
|
| 154 |
)
|
| 155 |
+
]
|
| 156 |
|
| 157 |
+
def _verify_arguments(
|
| 158 |
+
self,
|
| 159 |
+
model_folder: str,
|
| 160 |
+
model_name_pretty: str,
|
| 161 |
+
model_availability: str,
|
| 162 |
+
urls: str,
|
| 163 |
+
context_size: str,
|
| 164 |
+
submitted_by: str,
|
| 165 |
+
filenames: Optional[List[str]],
|
| 166 |
+
):
|
| 167 |
+
assert (
|
| 168 |
+
model_folder
|
| 169 |
+
), "Please, specify non-empty name for a directory with a model's results."
|
| 170 |
+
assert model_name_pretty, "Please, specify non-empty name for a model."
|
| 171 |
+
assert (
|
| 172 |
+
model_availability
|
| 173 |
+
), "Please, specify non-empty information about a model's availability."
|
| 174 |
+
assert (
|
| 175 |
+
context_size
|
| 176 |
+
), "Please, specify non-empty information about a model's context size."
|
| 177 |
+
try:
|
| 178 |
+
_ = int(context_size)
|
| 179 |
+
except:
|
| 180 |
+
raise ValueError(
|
| 181 |
+
"Please, specify a model's context size as an integer (e.g., 16000)."
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
assert (
|
| 185 |
+
submitted_by
|
| 186 |
+
), "Please, specify non-empty information about a submission's author(s)."
|
| 187 |
+
assert filenames, "Please, attach at least one file with predictions."
|
| 188 |
|
| 189 |
def upload_files(
|
| 190 |
self,
|
|
|
|
| 199 |
force: bool = False,
|
| 200 |
) -> str:
|
| 201 |
try:
|
| 202 |
+
self._verify_arguments(
|
| 203 |
+
model_folder=model_folder,
|
| 204 |
+
model_name_pretty=model_name_pretty,
|
| 205 |
+
model_availability=model_availability,
|
| 206 |
+
urls=urls,
|
| 207 |
+
context_size=context_size,
|
| 208 |
+
submitted_by=submitted_by,
|
| 209 |
+
filenames=filenames,
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
pr_title = f"π New submission to {task_pretty} task: {model_name_pretty} with {context_size} context size from {submitted_by}"
|
| 213 |
|
| 214 |
task_id = TASKS_PRETTY_REVERSE[task_pretty]
|
| 215 |
|
| 216 |
+
logging.info("Checking if this request is already submitted...")
|
| 217 |
if not force:
|
| 218 |
if model_name_pretty in self._fs.ls(
|
| 219 |
f"datasets/{self._dataset_id}/{task_id}/predictions"
|
|
|
|
| 224 |
)
|
| 225 |
for filename in filenames + ["metadata.json"]
|
| 226 |
):
|
| 227 |
+
return styled_warning(
|
| 228 |
f"{model_name_pretty} is already present in {self._dataset_id}."
|
| 229 |
)
|
| 230 |
|
| 231 |
prev_pr = self._get_previous_pr(pr_title)
|
| 232 |
if prev_pr is not None:
|
| 233 |
url = f"https://huggingface.co/datasets/{self._dataset_id}/discussions/{prev_pr.num}"
|
| 234 |
+
return styled_warning(
|
| 235 |
+
f"{self._dataset_id} already has an open PR for this submission: {url}."
|
| 236 |
+
)
|
| 237 |
|
| 238 |
+
logging.info("Processing predictions...")
|
| 239 |
+
predictions_commit_operations = self._upload_predictions(
|
| 240 |
task_id=task_id,
|
| 241 |
model_folder=model_folder,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
filenames=filenames,
|
| 243 |
)
|
| 244 |
|
| 245 |
+
with TemporaryDirectory() as d:
|
| 246 |
+
logging.info("Computing metrics...")
|
| 247 |
+
self._compute_metrics_for_predictions(
|
| 248 |
+
task_id=task_id, filenames=filenames, temp_directory=str(d)
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
logging.info("Processing results...")
|
| 252 |
+
results_commit_operations = self._upload_results(
|
| 253 |
+
task_id=task_id,
|
| 254 |
+
model_folder=model_folder,
|
| 255 |
+
model_name_pretty=model_name_pretty,
|
| 256 |
+
model_availability=model_availability,
|
| 257 |
+
urls=urls,
|
| 258 |
+
context_size=context_size,
|
| 259 |
+
submitted_by=submitted_by,
|
| 260 |
+
temp_directory=str(d),
|
| 261 |
+
)
|
| 262 |
+
|
| 263 |
+
logging.info("Creating commit...")
|
| 264 |
new_pr = self._api.create_commit(
|
| 265 |
repo_id=self._dataset_id,
|
| 266 |
+
operations=predictions_commit_operations + results_commit_operations,
|
| 267 |
commit_message=pr_title,
|
| 268 |
commit_description=f"""New submission to {task_pretty} task in ποΈ Long Code Arena benchmark!
|
| 269 |
|
|
|
|
| 276 |
create_pr=True,
|
| 277 |
repo_type="dataset",
|
| 278 |
)
|
| 279 |
+
return styled_message(f"π PR created at {new_pr.pr_url}.")
|
| 280 |
|
| 281 |
+
except Exception as e:
|
| 282 |
+
logging.exception(e)
|
| 283 |
+
if str(e):
|
| 284 |
+
return styled_error(str(e))
|
| 285 |
+
return styled_error("An exception occured.")
|