superb
/

superb-submission

Model card Files Files and versions

xet

Community

lewtun HF Staff commited on Sep 10, 2021

Commit

2022859

1 Parent(s): 82c33f8

Tidy up README

Browse files

Files changed (2) hide show

{{cookiecutter.repo_name}}/README.md +9 -10
{{cookiecutter.repo_name}}/cli.py +6 -54

{{cookiecutter.repo_name}}/README.md CHANGED Viewed

@@ -1,30 +1,29 @@
 ---
 benchmark: superb
 type: model-upload
-submission_nme: none
 ---
-# SUPERB submissions for {{cookiecutter.repo_name}}
 ## Submitting to the leaderboard
 To make a submission to the [leaderboard](https://huggingface.co/spaces/superb/superb-leaderboard)), there are three main steps:
-1. Generate predictions on the unlabeled test set of each task
-2. Validate the predictions are compatible with the evaluation framework
-3. Push the predictions to the Hub!
 See the instructions below for more details.
 ### Rules
-1. To prevent overfitting to the public leaderboard, we only evaluate **one submission per week**. You can push predictions to the Hub as many times as you wish, but we will only evaluate the most recent commit in a given week.
-2. Transfer or meta-learning using other datasets, including further pre-training on other corpora, is allowed.
-3. Use of unlabeled test data is allowed, as is it always available in the applied setting. For example, further pre-training using the unlabeled data for a task would be permitted.
-4. Systems may be augmented with information retrieved from the internet, e.g. via automated web searches.
 ### Submission file format
 ### Validate your submission
 To ensure that your submission files are correctly formatted, run the following command from the root of the repository:
@@ -55,4 +54,4 @@ Submission successful! 🎉 🥳 🎉
 Your submission will be fine-tuned and evaulated on Sunday 05 September 2021 ⏳
 ```
-where the evaluation is run every Sunday and your results will be visible on the leaderboard.

 ---
 benchmark: superb
 type: model-upload
+submission_name: none
 ---
+# SUPERB submissions for test-submission-with-weights
 ## Submitting to the leaderboard
 To make a submission to the [leaderboard](https://huggingface.co/spaces/superb/superb-leaderboard)), there are three main steps:
+1. Pretrain a model and store the weights in this repository as a `model.pt` file
+2. Validate the model is compatible with the fine-tuning and evaluation framework
+3. Push the model to the Hub!
 See the instructions below for more details.
 ### Rules
+1. To prevent overfitting to the public leaderboard, we only evaluate **one submission per week**. You can push upstream models to the Hub as many times as you wish, but we will only evaluate the most recent commit in a given week.
 ### Submission file format
+TODO
 ### Validate your submission
 To ensure that your submission files are correctly formatted, run the following command from the root of the repository:
 Your submission will be fine-tuned and evaulated on Sunday 05 September 2021 ⏳
 ```
+and your results will be visible on the leaderboard once the model is fine-tuned and evaluated.s

{{cookiecutter.repo_name}}/cli.py CHANGED Viewed

@@ -5,25 +5,11 @@ from pathlib import Path
 import pandas as pd
 import typer
-from datasets import get_dataset_config_names, load_dataset
-CSV_SCHEMA = {
-    "banking_77": (5000, 2),
-    "overruling": (2350, 2),
-    "semiconductor_org_types": (449, 2),
-    "ade_corpus_v2": (5000, 2),
-    "twitter_complaints": (3399, 2),
-    "neurips_impact_statement_risks": (150, 2),
-    "systematic_review_inclusion": (2244, 2),
-    "terms_of_service": (5000, 2),
-    "tai_safety_research": (1639, 2),
-    "one_stop_english": (518, 2),
-    "tweet_eval_hate": (2966, 2),
-}
 app = typer.Typer()
 def _update_submission_name(submission_name: str):
     replacement = ""
     with open("README.md", "r") as f:
@@ -42,54 +28,20 @@ def _update_submission_name(submission_name: str):
 @app.command()
 def validate():
-    # TODO(lewtun): Consider using great_expectations for the data validation
-    tasks = get_dataset_config_names("ought/raft")
     # Check that all the expected files exist
-    prediction_files = list(Path("data").rglob("predictions.csv"))
-    mismatched_files = set(tasks).symmetric_difference(set([f.parent.name for f in prediction_files]))
-    if mismatched_files:
-        raise ValueError(f"Incorrect number of files! Expected {len(tasks)} files, but got {len(prediction_files)}.")
-    # Check all files have the expected shape (number of rows, number of columns)
-    # TODO(lewtun): Add a check for the specific IDs per file
-    shape_errors = []
-    column_errors = []
-    for prediction_file in prediction_files:
-        df = pd.read_csv(prediction_file)
-        incorrect_shape = df.shape != CSV_SCHEMA[prediction_file.parent.name]
-        if incorrect_shape:
-            shape_errors.append(prediction_file)
-        incorrect_columns = sorted(df.columns) != ["ID", "Label"]
-        if incorrect_columns:
-            column_errors.append(prediction_file)
-    if shape_errors:
-        raise ValueError(f"Incorrect CSV shapes in files: {shape_errors}")
-    if column_errors:
-        raise ValueError(f"Incorrect CSV columns in files: {column_errors}")
-    # Check we can load the dataset for each task
-    load_errors = []
-    for task in tasks:
-        try:
-            _ = load_dataset("../{{cookiecutter.repo_name}}", task)
-        except Exception as e:
-            load_errors.append(e)
-    if load_errors:
-        raise ValueError(f"Could not load predictions! Errors: {load_errors}")
     typer.echo("All submission files validated! ✨ 🚀 ✨")
     typer.echo("Now you can make a submission 🤗")
 @app.command()
-def submit(submission_name: str = typer.Option(..., prompt="Please provide a name for your submission, e.g. GPT-4 😁")):
     subprocess.call("git pull origin main".split())
     _update_submission_name(submission_name)
-    subprocess.call(["git", "add", "data/*predictions.csv", "README.md"])
     subprocess.call(["git", "commit", "-m", f"Submission: {submission_name} "])
     subprocess.call(["git", "push"])

 import pandas as pd
 import typer
+SUBMISSION_FILES = ["README.md", "expert.py", "model.pt"]
 app = typer.Typer()
 def _update_submission_name(submission_name: str):
     replacement = ""
     with open("README.md", "r") as f:
 @app.command()
 def validate():
     # Check that all the expected files exist
+    for file in SUBMISSION_FILES:
+        if not Path(file).is_file():
+            raise ValueError(f"File {file} not found! Please include {file} in your submission")
     typer.echo("All submission files validated! ✨ 🚀 ✨")
     typer.echo("Now you can make a submission 🤗")
 @app.command()
+def submit(submission_name: str = typer.Option(..., prompt="Please provide a name for your submission, e.g. HuBERT 😁")):
     subprocess.call("git pull origin main".split())
     _update_submission_name(submission_name)
+    subprocess.call(["git", "add", "model.pt", "README.md"])
     subprocess.call(["git", "commit", "-m", f"Submission: {submission_name} "])
     subprocess.call(["git", "push"])