Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| import shutil | |
| import uuid | |
| from datetime import datetime | |
| from pathlib import Path | |
| import jsonlines | |
| import streamlit as st | |
| from dotenv import load_dotenv | |
| from huggingface_hub import Repository, cached_download, hf_hub_url | |
| from utils import http_get, http_post, validate_json | |
| if Path(".env").is_file(): | |
| load_dotenv(".env") | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME") | |
| AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API") | |
| LOCAL_REPO = "submission_repo" | |
| LOGS_REPO = "submission-logs" | |
| # TODO | |
| # 1. Add check that fields are nested under `tasks` field correctly | |
| # 2. Add check that names of tasks and datasets are valid | |
| MARKDOWN = """--- | |
| benchmark: gem | |
| type: prediction | |
| submission_name: {submission_name} | |
| tags: | |
| - evaluation | |
| - benchmark | |
| --- | |
| # GEM Submission | |
| Submission name: {submission_name} | |
| """ | |
| def generate_dataset_card(submission_name): | |
| """ | |
| Generate dataset card for the submission | |
| """ | |
| markdown = MARKDOWN.format( | |
| submission_name=submission_name, | |
| ) | |
| with open(os.path.join(LOCAL_REPO, "README.md"), "w") as f: | |
| f.write(markdown) | |
| def load_json(path): | |
| with open(path, "r") as f: | |
| return json.load(f) | |
| def get_submission_names(): | |
| """Download all submission names. | |
| The GEM frontend requires the submission names to be unique, so here we | |
| download all submission names and use them as a check against the user | |
| submissions. | |
| """ | |
| scores_url = hf_hub_url("GEM-submissions/submission-scores", "scores.json", repo_type="dataset") | |
| scores_filepath = cached_download(scores_url, force_download=True) | |
| scores_data = load_json(scores_filepath) | |
| return [score["submission_name"] for score in scores_data] | |
| ####### | |
| # APP # | |
| ####### | |
| st.title("GEM Submissions") | |
| st.markdown( | |
| """ | |
| Welcome to the [GEM benchmark](https://gem-benchmark.com/)! GEM is a benchmark | |
| environment for Natural Language Generation with a focus on its Evaluation, both | |
| through human annotations and automated Metrics. | |
| GEM aims to: | |
| - measure NLG progress across many NLG tasks across languages. | |
| - audit data and models and present results via data cards and model robustness | |
| reports. | |
| - develop standards for evaluation of generated text using both automated and | |
| human metrics. | |
| Use this page to submit your system's predictions to the benchmark. | |
| """ | |
| ) | |
| with st.form(key="form"): | |
| # Flush local repo | |
| shutil.rmtree(LOCAL_REPO, ignore_errors=True) | |
| submission_errors = 0 | |
| uploaded_file = st.file_uploader("Upload submission file", type=["json"]) | |
| if uploaded_file: | |
| data = str(uploaded_file.read(), "utf-8") | |
| json_data = json.loads(data) | |
| submission_names = get_submission_names() | |
| submission_name = json_data["submission_name"] | |
| if submission_name in submission_names: | |
| st.error(f"π Submission name `{submission_name}` is already taken. Please rename your submission.") | |
| submission_errors += 1 | |
| else: | |
| is_valid, message = validate_json(json_data) | |
| if is_valid: | |
| st.success(message) | |
| else: | |
| st.error(message) | |
| submission_errors += 1 | |
| with st.expander("Submission format"): | |
| st.markdown( | |
| """ | |
| Please follow this JSON format for your `submission.json` file: | |
| ```json | |
| { | |
| "submission_name": "An identifying name of your system", | |
| "param_count": 123, # The number of parameters your system has. | |
| "description": "An optional brief description of the system that will be shown on the results page", | |
| "tasks": | |
| { | |
| "dataset_identifier": { | |
| "values": ["output-0", "output-1", "..."], # A list of system outputs. | |
| "keys": ["gem_id-0", "gem_id-1", ...] # A list of GEM IDs. | |
| } | |
| } | |
| } | |
| ``` | |
| Here, `dataset_identifier` is the identifier of the dataset followed by | |
| an identifier of the set the outputs were created from, for example | |
| `_validation` or `_test`. For example, the `mlsum_de` test set has the | |
| identifier `mlsum_de_test`. The `keys` field is needed to avoid | |
| accidental shuffling that will impact your metrics. Simply add a list of | |
| IDs from the `gem_id` column of each evaluation dataset in the same | |
| order as your values. Please see the sample submission below: | |
| """ | |
| ) | |
| with open("sample-submission.json", "r") as f: | |
| example_submission = json.load(f) | |
| st.json(example_submission) | |
| user_name = st.text_input("Enter your π€ Hub username", help="This field is required to track your submission and cannot be empty") | |
| submit_button = st.form_submit_button("Make Submission") | |
| if submit_button and submission_errors == 0: | |
| with st.spinner("β³ Preparing submission for evaluation ..."): | |
| submission_name = json_data["submission_name"] | |
| submission_name_formatted = submission_name.lower().replace(" ", "-").replace("/", "-") | |
| submission_time = str(int(datetime.now().timestamp())) | |
| # Create submission dataset under benchmarks ORG | |
| submission_repo_id = f"GEM-submissions/{user_name}__{submission_name_formatted}__{submission_time}" | |
| dataset_repo_url = f"https://huggingface.co/datasets/{submission_repo_id}" | |
| repo = Repository( | |
| local_dir=LOCAL_REPO, | |
| clone_from=dataset_repo_url, | |
| repo_type="dataset", | |
| private=False, | |
| use_auth_token=HF_TOKEN, | |
| ) | |
| generate_dataset_card(submission_name) | |
| with open(f"{LOCAL_REPO}/submission.json", "w", encoding="utf-8") as f: | |
| json.dump(json_data, f) | |
| # TODO: add informative commit msg | |
| commit_url = repo.push_to_hub() | |
| if commit_url is not None: | |
| commit_sha = commit_url.split("/")[-1] | |
| else: | |
| commit_sha = repo.git_head_commit_url().split("/")[-1] | |
| submission_id = submission_name + "__" + str(uuid.uuid4())[:6] + "__" + submission_time | |
| # Define AutoTrain payload | |
| project_config = {} | |
| # Need a dummy dataset to use the dataset loader in AutoTrain | |
| project_config["dataset_name"] = "lewtun/imdb-dummy" | |
| project_config["dataset_config"] = "lewtun--imdb-dummy" | |
| project_config["dataset_split"] = "train" | |
| project_config["col_mapping"] = {"text": "text", "label": "target"} | |
| # Specify benchmark parameters | |
| project_config["model"] = "gem" | |
| project_config["dataset"] = "GEM/references" | |
| project_config["submission_dataset"] = submission_repo_id | |
| project_id = str(uuid.uuid4()).split("-")[0] | |
| project_payload = { | |
| "username": AUTOTRAIN_USERNAME, | |
| "proj_name": f"benchmark-gem-{project_id}", | |
| "task": 1, | |
| "config": { | |
| "language": "en", | |
| "max_models": 5, | |
| "instance": { | |
| "provider": "aws", | |
| "instance_type": "ml.g4dn.4xlarge", | |
| "max_runtime_seconds": 172800, | |
| "num_instances": 1, | |
| "disk_size_gb": 150, | |
| }, | |
| "benchmark": { | |
| "dataset": project_config["dataset"], | |
| "model": project_config["model"], | |
| "submission_dataset": project_config["submission_dataset"], | |
| }, | |
| }, | |
| } | |
| project_json_resp = http_post( | |
| path="/projects/create", payload=project_payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API | |
| ).json() | |
| print(f"Project creation: {project_json_resp}") | |
| # Upload data | |
| payload = { | |
| "split": 4, | |
| "col_mapping": project_config["col_mapping"], | |
| "load_config": {"max_size_bytes": 0, "shuffle": False}, | |
| } | |
| data_json_resp = http_post( | |
| path=f"/projects/{project_json_resp['id']}/data/{project_config['dataset_name']}", | |
| payload=payload, | |
| token=HF_TOKEN, | |
| domain=AUTOTRAIN_BACKEND_API, | |
| params={ | |
| "type": "dataset", | |
| "config_name": project_config["dataset_config"], | |
| "split_name": project_config["dataset_split"], | |
| }, | |
| ).json() | |
| print(f"Dataset creation: {data_json_resp}") | |
| # Run training | |
| train_json_resp = http_get( | |
| path=f"/projects/{project_json_resp['id']}/data/start_process", | |
| token=HF_TOKEN, | |
| domain=AUTOTRAIN_BACKEND_API, | |
| ).json() | |
| print(f"Training job response: {train_json_resp}") | |
| logs_repo_url = f"https://huggingface.co/datasets/GEM-submissions/{LOGS_REPO}" | |
| logs_repo = Repository( | |
| local_dir=LOGS_REPO, | |
| clone_from=logs_repo_url, | |
| repo_type="dataset", | |
| private=True, | |
| use_auth_token=HF_TOKEN, | |
| ) | |
| evaluation_log = {} | |
| evaluation_log["payload"] = project_payload | |
| evaluation_log["project_creation_response"] = project_json_resp | |
| evaluation_log["dataset_creation_response"] = data_json_resp | |
| evaluation_log["autotrain_job_response"] = train_json_resp | |
| with jsonlines.open(f"{LOGS_REPO}/logs.jsonl") as r: | |
| lines = [] | |
| for obj in r: | |
| lines.append(obj) | |
| lines.append(evaluation_log) | |
| with jsonlines.open(f"{LOGS_REPO}/logs.jsonl", mode="w") as writer: | |
| for job in lines: | |
| writer.write(job) | |
| logs_repo.push_to_hub(commit_message=f"Submission with job ID {project_json_resp['id']}") | |
| if train_json_resp["success"] == 1: | |
| st.success( | |
| f"β Submission {submission_name} was successfully submitted for evaluation!" | |
| ) | |
| st.markdown( | |
| f""" | |
| Evaluation can take up to 1 hour to complete, so grab a β or π΅ while you wait: | |
| * π Click [here](https://huggingface.co/spaces/GEM/results) to view the results from your submission | |
| * πΎ Click [here]({dataset_repo_url}) to view your submission file on the Hugging Face Hub | |
| Please [contact the organisers](mailto:gehrmann@google.com) if you would like your submission and/or evaluation scores deleted. | |
| """ | |
| ) | |
| else: | |
| st.error( | |
| "π Oh noes, there was an error submitting your submission! Please [contact the organisers](mailto:gehrmann@google.com)" | |
| ) | |
| # # Flush local repos | |
| shutil.rmtree(LOCAL_REPO, ignore_errors=True) | |
| shutil.rmtree(LOGS_REPO, ignore_errors=True) | |
| with st.expander("Download all submissions and scores"): | |
| st.markdown("Click the button below if you'd like to download all the submissions and evaluations from GEM:") | |
| outputs_url = hf_hub_url( | |
| "GEM-submissions/v2-outputs-and-scores", "gem-v2-outputs-and-scores.zip", repo_type="dataset" | |
| ) | |
| outputs_filepath = cached_download(outputs_url) | |
| with open(outputs_filepath, "rb") as f: | |
| btn = st.download_button(label="Download submissions and scores", data=f, file_name="outputs-and-scores.zip") | |