Spaces:
Runtime error
Runtime error
| import os | |
| import time | |
| from pathlib import Path | |
| import pandas as pd | |
| import streamlit as st | |
| import yaml | |
| from datasets import get_dataset_config_names | |
| from dotenv import load_dotenv | |
| from huggingface_hub import list_datasets | |
| from evaluation import filter_evaluated_models | |
| from utils import ( | |
| AUTOTRAIN_TASK_TO_HUB_TASK, | |
| commit_evaluation_log, | |
| create_autotrain_project_name, | |
| format_col_mapping, | |
| get_compatible_models, | |
| get_config_metadata, | |
| get_dataset_card_url, | |
| get_key, | |
| get_metadata, | |
| http_get, | |
| http_post, | |
| ) | |
| if Path(".env").is_file(): | |
| load_dotenv(".env") | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME") | |
| AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API") | |
| DATASETS_PREVIEW_API = os.getenv("DATASETS_PREVIEW_API") | |
| # Put image tasks on top | |
| TASK_TO_ID = { | |
| "image_binary_classification": 17, | |
| "image_multi_class_classification": 18, | |
| "binary_classification": 1, | |
| "multi_class_classification": 2, | |
| "natural_language_inference": 22, | |
| "entity_extraction": 4, | |
| "extractive_question_answering": 5, | |
| "translation": 6, | |
| "summarization": 8, | |
| "text_zero_shot_classification": 23, | |
| } | |
| TASK_TO_DEFAULT_METRICS = { | |
| "binary_classification": ["f1", "precision", "recall", "auc", "accuracy"], | |
| "multi_class_classification": [ | |
| "f1", | |
| "precision", | |
| "recall", | |
| "accuracy", | |
| ], | |
| "natural_language_inference": ["f1", "precision", "recall", "auc", "accuracy"], | |
| "entity_extraction": ["precision", "recall", "f1", "accuracy"], | |
| "extractive_question_answering": ["f1", "exact_match"], | |
| "translation": ["sacrebleu"], | |
| "summarization": ["rouge1", "rouge2", "rougeL", "rougeLsum"], | |
| "image_binary_classification": ["f1", "precision", "recall", "auc", "accuracy"], | |
| "image_multi_class_classification": [ | |
| "f1", | |
| "precision", | |
| "recall", | |
| "accuracy", | |
| ], | |
| "text_zero_shot_classification": ["accuracy", "loss"], | |
| } | |
| AUTOTRAIN_TASK_TO_LANG = { | |
| "translation": "en2de", | |
| "image_binary_classification": "unk", | |
| "image_multi_class_classification": "unk", | |
| } | |
| AUTOTRAIN_MACHINE = {"text_zero_shot_classification": "r5.16x"} | |
| SUPPORTED_TASKS = list(TASK_TO_ID.keys()) | |
| # Extracted from utils.get_supported_metrics | |
| # Hardcoded for now due to speed / caching constraints | |
| SUPPORTED_METRICS = [ | |
| "accuracy", | |
| "bertscore", | |
| "bleu", | |
| "cer", | |
| "chrf", | |
| "code_eval", | |
| "comet", | |
| "competition_math", | |
| "coval", | |
| "cuad", | |
| "exact_match", | |
| "f1", | |
| "frugalscore", | |
| "google_bleu", | |
| "mae", | |
| "mahalanobis", | |
| "matthews_correlation", | |
| "mean_iou", | |
| "meteor", | |
| "mse", | |
| "pearsonr", | |
| "perplexity", | |
| "precision", | |
| "recall", | |
| "roc_auc", | |
| "rouge", | |
| "sacrebleu", | |
| "sari", | |
| "seqeval", | |
| "spearmanr", | |
| "squad", | |
| "squad_v2", | |
| "ter", | |
| "trec_eval", | |
| "wer", | |
| "wiki_split", | |
| "xnli", | |
| "angelina-wang/directional_bias_amplification", | |
| "jordyvl/ece", | |
| "lvwerra/ai4code", | |
| "lvwerra/amex", | |
| ] | |
| ####### | |
| # APP # | |
| ####### | |
| st.title("Evaluation on the Hub") | |
| st.warning( | |
| "**β οΈ This project has been archived. If you want to evaluate LLMs, checkout [this collection](https://huggingface.co/collections/clefourrier/llm-leaderboards-and-benchmarks-β¨-64f99d2e11e92ca5568a7cce) of leaderboards.**" | |
| ) | |
| st.markdown( | |
| """ | |
| Welcome to Hugging Face's automatic model evaluator π! | |
| This application allows you to evaluate π€ Transformers | |
| [models](https://huggingface.co/models?library=transformers&sort=downloads) | |
| across a wide variety of [datasets](https://huggingface.co/datasets) on the | |
| Hub. Please select the dataset and configuration below. The results of your | |
| evaluation will be displayed on the [public | |
| leaderboards](https://huggingface.co/spaces/autoevaluate/leaderboards). For | |
| more details, check out out our [blog | |
| post](https://huggingface.co/blog/eval-on-the-hub). | |
| """ | |
| ) | |
| # all_datasets = [d.id for d in list_datasets()] | |
| # query_params = st.experimental_get_query_params() | |
| # if "first_query_params" not in st.session_state: | |
| # st.session_state.first_query_params = query_params | |
| # first_query_params = st.session_state.first_query_params | |
| # default_dataset = all_datasets[0] | |
| # if "dataset" in first_query_params: | |
| # if len(first_query_params["dataset"]) > 0 and first_query_params["dataset"][0] in all_datasets: | |
| # default_dataset = first_query_params["dataset"][0] | |
| # selected_dataset = st.selectbox( | |
| # "Select a dataset", | |
| # all_datasets, | |
| # index=all_datasets.index(default_dataset), | |
| # help="""Datasets with metadata can be evaluated with 1-click. Configure an evaluation job to add \ | |
| # new metadata to a dataset card.""", | |
| # ) | |
| # st.experimental_set_query_params(**{"dataset": [selected_dataset]}) | |
| # # Check if selected dataset can be streamed | |
| # is_valid_dataset = http_get( | |
| # path="/is-valid", | |
| # domain=DATASETS_PREVIEW_API, | |
| # params={"dataset": selected_dataset}, | |
| # ).json() | |
| # if is_valid_dataset["viewer"] is False and is_valid_dataset["preview"] is False: | |
| # st.error( | |
| # """The dataset you selected is not currently supported. Open a \ | |
| # [discussion](https://huggingface.co/spaces/autoevaluate/model-evaluator/discussions) for support.""" | |
| # ) | |
| # metadata = get_metadata(selected_dataset, token=HF_TOKEN) | |
| # print(f"INFO -- Dataset metadata: {metadata}") | |
| # if metadata is None: | |
| # st.warning("No evaluation metadata found. Please configure the evaluation job below.") | |
| # with st.expander("Advanced configuration"): | |
| # # Select task | |
| # selected_task = st.selectbox( | |
| # "Select a task", | |
| # SUPPORTED_TASKS, | |
| # index=SUPPORTED_TASKS.index(metadata[0]["task_id"]) if metadata is not None else 0, | |
| # help="""Don't see your favourite task here? Open a \ | |
| # [discussion](https://huggingface.co/spaces/autoevaluate/model-evaluator/discussions) to request it!""", | |
| # ) | |
| # # Select config | |
| # configs = get_dataset_config_names(selected_dataset) | |
| # selected_config = st.selectbox( | |
| # "Select a config", | |
| # configs, | |
| # help="""Some datasets contain several sub-datasets, known as _configurations_. \ | |
| # Select one to evaluate your models on. \ | |
| # See the [docs](https://huggingface.co/docs/datasets/master/en/load_hub#configurations) for more details. | |
| # """, | |
| # ) | |
| # # Some datasets have multiple metadata (one per config), so we grab the one associated with the selected config | |
| # config_metadata = get_config_metadata(selected_config, metadata) | |
| # print(f"INFO -- Config metadata: {config_metadata}") | |
| # # Select splits | |
| # splits_resp = http_get( | |
| # path="/splits", | |
| # domain=DATASETS_PREVIEW_API, | |
| # params={"dataset": selected_dataset}, | |
| # ) | |
| # if splits_resp.status_code == 200: | |
| # split_names = [] | |
| # all_splits = splits_resp.json() | |
| # for split in all_splits["splits"]: | |
| # if split["config"] == selected_config: | |
| # split_names.append(split["split"]) | |
| # if config_metadata is not None: | |
| # eval_split = config_metadata["splits"].get("eval_split", None) | |
| # else: | |
| # eval_split = None | |
| # selected_split = st.selectbox( | |
| # "Select a split", | |
| # split_names, | |
| # index=split_names.index(eval_split) if eval_split is not None else 0, | |
| # help="Be wary when evaluating models on the `train` split.", | |
| # ) | |
| # # Select columns | |
| # rows_resp = http_get( | |
| # path="/first-rows", | |
| # domain=DATASETS_PREVIEW_API, | |
| # params={ | |
| # "dataset": selected_dataset, | |
| # "config": selected_config, | |
| # "split": selected_split, | |
| # }, | |
| # ).json() | |
| # col_names = list(pd.json_normalize(rows_resp["rows"][0]["row"]).columns) | |
| # st.markdown("**Map your dataset columns**") | |
| # st.markdown( | |
| # """The model evaluator uses a standardised set of column names for the input examples and labels. \ | |
| # Please define the mapping between your dataset columns (right) and the standardised column names (left).""" | |
| # ) | |
| # col1, col2 = st.columns(2) | |
| # # TODO: find a better way to layout these items | |
| # # TODO: need graceful way of handling dataset <--> task mismatch for datasets with metadata | |
| # col_mapping = {} | |
| # if selected_task in ["binary_classification", "multi_class_classification"]: | |
| # with col1: | |
| # st.markdown("`text` column") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.markdown("`target` column") | |
| # with col2: | |
| # text_col = st.selectbox( | |
| # "This column should contain the text to be classified", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "text")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # target_col = st.selectbox( | |
| # "This column should contain the labels associated with the text", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "target")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # col_mapping[text_col] = "text" | |
| # col_mapping[target_col] = "target" | |
| # elif selected_task == "text_zero_shot_classification": | |
| # with col1: | |
| # st.markdown("`text` column") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.markdown("`classes` column") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.markdown("`target` column") | |
| # with col2: | |
| # text_col = st.selectbox( | |
| # "This column should contain the text to be classified", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "text")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # classes_col = st.selectbox( | |
| # "This column should contain the classes associated with the text", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "classes")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # target_col = st.selectbox( | |
| # "This column should contain the index of the correct class", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "target")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # col_mapping[text_col] = "text" | |
| # col_mapping[classes_col] = "classes" | |
| # col_mapping[target_col] = "target" | |
| # if selected_task in ["natural_language_inference"]: | |
| # config_metadata = get_config_metadata(selected_config, metadata) | |
| # with col1: | |
| # st.markdown("`text1` column") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.markdown("`text2` column") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.markdown("`target` column") | |
| # with col2: | |
| # text1_col = st.selectbox( | |
| # "This column should contain the first text passage to be classified", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "text1")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # text2_col = st.selectbox( | |
| # "This column should contain the second text passage to be classified", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "text2")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # target_col = st.selectbox( | |
| # "This column should contain the labels associated with the text", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "target")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # col_mapping[text1_col] = "text1" | |
| # col_mapping[text2_col] = "text2" | |
| # col_mapping[target_col] = "target" | |
| # elif selected_task == "entity_extraction": | |
| # with col1: | |
| # st.markdown("`tokens` column") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.markdown("`tags` column") | |
| # with col2: | |
| # tokens_col = st.selectbox( | |
| # "This column should contain the array of tokens to be classified", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "tokens")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # tags_col = st.selectbox( | |
| # "This column should contain the labels associated with each part of the text", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "tags")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # col_mapping[tokens_col] = "tokens" | |
| # col_mapping[tags_col] = "tags" | |
| # elif selected_task == "translation": | |
| # with col1: | |
| # st.markdown("`source` column") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.markdown("`target` column") | |
| # with col2: | |
| # text_col = st.selectbox( | |
| # "This column should contain the text to be translated", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "source")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # target_col = st.selectbox( | |
| # "This column should contain the target translation", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "target")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # col_mapping[text_col] = "source" | |
| # col_mapping[target_col] = "target" | |
| # elif selected_task == "summarization": | |
| # with col1: | |
| # st.markdown("`text` column") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.markdown("`target` column") | |
| # with col2: | |
| # text_col = st.selectbox( | |
| # "This column should contain the text to be summarized", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "text")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # target_col = st.selectbox( | |
| # "This column should contain the target summary", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "target")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # col_mapping[text_col] = "text" | |
| # col_mapping[target_col] = "target" | |
| # elif selected_task == "extractive_question_answering": | |
| # if config_metadata is not None: | |
| # col_mapping = config_metadata["col_mapping"] | |
| # # Hub YAML parser converts periods to hyphens, so we remap them here | |
| # col_mapping = format_col_mapping(col_mapping) | |
| # with col1: | |
| # st.markdown("`context` column") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.markdown("`question` column") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.markdown("`answers.text` column") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.markdown("`answers.answer_start` column") | |
| # with col2: | |
| # context_col = st.selectbox( | |
| # "This column should contain the question's context", | |
| # col_names, | |
| # index=col_names.index(get_key(col_mapping, "context")) if config_metadata is not None else 0, | |
| # ) | |
| # question_col = st.selectbox( | |
| # "This column should contain the question to be answered, given the context", | |
| # col_names, | |
| # index=col_names.index(get_key(col_mapping, "question")) if config_metadata is not None else 0, | |
| # ) | |
| # answers_text_col = st.selectbox( | |
| # "This column should contain example answers to the question, extracted from the context", | |
| # col_names, | |
| # index=col_names.index(get_key(col_mapping, "answers.text")) if config_metadata is not None else 0, | |
| # ) | |
| # answers_start_col = st.selectbox( | |
| # "This column should contain the indices in the context of the first character of each `answers.text`", | |
| # col_names, | |
| # index=col_names.index(get_key(col_mapping, "answers.answer_start")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # col_mapping[context_col] = "context" | |
| # col_mapping[question_col] = "question" | |
| # col_mapping[answers_text_col] = "answers.text" | |
| # col_mapping[answers_start_col] = "answers.answer_start" | |
| # elif selected_task in ["image_binary_classification", "image_multi_class_classification"]: | |
| # with col1: | |
| # st.markdown("`image` column") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.text("") | |
| # st.markdown("`target` column") | |
| # with col2: | |
| # image_col = st.selectbox( | |
| # "This column should contain the images to be classified", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "image")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # target_col = st.selectbox( | |
| # "This column should contain the labels associated with the images", | |
| # col_names, | |
| # index=col_names.index(get_key(config_metadata["col_mapping"], "target")) | |
| # if config_metadata is not None | |
| # else 0, | |
| # ) | |
| # col_mapping[image_col] = "image" | |
| # col_mapping[target_col] = "target" | |
| # # Select metrics | |
| # st.markdown("**Select metrics**") | |
| # st.markdown("The following metrics will be computed") | |
| # html_string = " ".join( | |
| # [ | |
| # '<div style="padding-right:5px;padding-left:5px;padding-top:5px;padding-bottom:5px;float:left">' | |
| # + '<div style="background-color:#D3D3D3;border-radius:5px;display:inline-block;padding-right:5px;' | |
| # + 'padding-left:5px;color:white">' | |
| # + metric | |
| # + "</div></div>" | |
| # for metric in TASK_TO_DEFAULT_METRICS[selected_task] | |
| # ] | |
| # ) | |
| # st.markdown(html_string, unsafe_allow_html=True) | |
| # selected_metrics = st.multiselect( | |
| # "(Optional) Select additional metrics", | |
| # sorted(list(set(SUPPORTED_METRICS) - set(TASK_TO_DEFAULT_METRICS[selected_task]))), | |
| # help="""User-selected metrics will be computed with their default arguments. \ | |
| # For example, `f1` will report results for binary labels. \ | |
| # Check out the [available metrics](https://huggingface.co/metrics) for more details.""", | |
| # ) | |
| # with st.form(key="form"): | |
| # compatible_models = get_compatible_models(selected_task, [selected_dataset]) | |
| # selected_models = st.multiselect( | |
| # "Select the models you wish to evaluate", | |
| # compatible_models, | |
| # help="""Don't see your favourite model in this list? Add the dataset and task it was trained on to the \ | |
| # [model card metadata.](https://huggingface.co/docs/hub/models-cards#model-card-metadata)""", | |
| # ) | |
| # print("INFO -- Selected models before filter:", selected_models) | |
| # hf_username = st.text_input("Enter your π€ Hub username to be notified when the evaluation is finished") | |
| # submit_button = st.form_submit_button("Evaluate models π") | |
| # if submit_button: | |
| # if len(hf_username) == 0: | |
| # st.warning("No π€ Hub username provided! Please enter your username and try again.") | |
| # elif len(selected_models) == 0: | |
| # st.warning("β οΈ No models were selected for evaluation! Please select at least one model and try again.") | |
| # elif len(selected_models) > 10: | |
| # st.warning("Only 10 models can be evaluated at once. Please select fewer models and try again.") | |
| # else: | |
| # # Filter out previously evaluated models | |
| # selected_models = filter_evaluated_models( | |
| # selected_models, | |
| # selected_task, | |
| # selected_dataset, | |
| # selected_config, | |
| # selected_split, | |
| # selected_metrics, | |
| # ) | |
| # print("INFO -- Selected models after filter:", selected_models) | |
| # if len(selected_models) > 0: | |
| # project_payload = { | |
| # "username": AUTOTRAIN_USERNAME, | |
| # "proj_name": create_autotrain_project_name(selected_dataset, selected_config), | |
| # "task": TASK_TO_ID[selected_task], | |
| # "config": { | |
| # "language": AUTOTRAIN_TASK_TO_LANG[selected_task] | |
| # if selected_task in AUTOTRAIN_TASK_TO_LANG | |
| # else "en", | |
| # "max_models": 5, | |
| # "instance": { | |
| # "provider": "sagemaker" if selected_task in AUTOTRAIN_MACHINE.keys() else "ovh", | |
| # "instance_type": AUTOTRAIN_MACHINE[selected_task] | |
| # if selected_task in AUTOTRAIN_MACHINE.keys() | |
| # else "p3", | |
| # "max_runtime_seconds": 172800, | |
| # "num_instances": 1, | |
| # "disk_size_gb": 200, | |
| # }, | |
| # "evaluation": { | |
| # "metrics": selected_metrics, | |
| # "models": selected_models, | |
| # "hf_username": hf_username, | |
| # }, | |
| # }, | |
| # } | |
| # print(f"INFO -- Payload: {project_payload}") | |
| # project_json_resp = http_post( | |
| # path="/projects/create", | |
| # payload=project_payload, | |
| # token=HF_TOKEN, | |
| # domain=AUTOTRAIN_BACKEND_API, | |
| # ).json() | |
| # print(f"INFO -- Project creation response: {project_json_resp}") | |
| # if project_json_resp["created"]: | |
| # data_payload = { | |
| # "split": 4, # use "auto" split choice in AutoTrain | |
| # "col_mapping": col_mapping, | |
| # "load_config": {"max_size_bytes": 0, "shuffle": False}, | |
| # "dataset_id": selected_dataset, | |
| # "dataset_config": selected_config, | |
| # "dataset_split": selected_split, | |
| # } | |
| # data_json_resp = http_post( | |
| # path=f"/projects/{project_json_resp['id']}/data/dataset", | |
| # payload=data_payload, | |
| # token=HF_TOKEN, | |
| # domain=AUTOTRAIN_BACKEND_API, | |
| # ).json() | |
| # print(f"INFO -- Dataset creation response: {data_json_resp}") | |
| # if data_json_resp["download_status"] == 1: | |
| # train_json_resp = http_post( | |
| # path=f"/projects/{project_json_resp['id']}/data/start_processing", | |
| # token=HF_TOKEN, | |
| # domain=AUTOTRAIN_BACKEND_API, | |
| # ).json() | |
| # # For local development we process and approve projects on-the-fly | |
| # if "localhost" in AUTOTRAIN_BACKEND_API: | |
| # with st.spinner("β³ Waiting for data processing to complete ..."): | |
| # is_data_processing_success = False | |
| # while is_data_processing_success is not True: | |
| # project_status = http_get( | |
| # path=f"/projects/{project_json_resp['id']}", | |
| # token=HF_TOKEN, | |
| # domain=AUTOTRAIN_BACKEND_API, | |
| # ).json() | |
| # if project_status["status"] == 3: | |
| # is_data_processing_success = True | |
| # time.sleep(10) | |
| # # Approve training job | |
| # train_job_resp = http_post( | |
| # path=f"/projects/{project_json_resp['id']}/start_training", | |
| # token=HF_TOKEN, | |
| # domain=AUTOTRAIN_BACKEND_API, | |
| # ).json() | |
| # st.success("β Data processing and project approval complete - go forth and evaluate!") | |
| # else: | |
| # # Prod/staging submissions are evaluated in a cron job via run_evaluation_jobs.py | |
| # print(f"INFO -- AutoTrain job response: {train_json_resp}") | |
| # if train_json_resp["success"]: | |
| # train_eval_index = { | |
| # "train-eval-index": [ | |
| # { | |
| # "config": selected_config, | |
| # "task": AUTOTRAIN_TASK_TO_HUB_TASK[selected_task], | |
| # "task_id": selected_task, | |
| # "splits": {"eval_split": selected_split}, | |
| # "col_mapping": col_mapping, | |
| # } | |
| # ] | |
| # } | |
| # selected_metadata = yaml.dump(train_eval_index, sort_keys=False) | |
| # dataset_card_url = get_dataset_card_url(selected_dataset) | |
| # st.success("β Successfully submitted evaluation job!") | |
| # st.markdown( | |
| # f""" | |
| # Evaluation can take up to 1 hour to complete, so grab a βοΈ or π΅ while you wait: | |
| # * π A [Hub pull request](https://huggingface.co/docs/hub/repositories-pull-requests-discussions) with the evaluation results will be opened for each model you selected. Check your email for notifications. | |
| # * π Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) to view the results from your submission once the Hub pull request is merged. | |
| # * π₯± Tired of configuring evaluations? Add the following metadata to the [dataset card]({dataset_card_url}) to enable 1-click evaluations: | |
| # """ # noqa | |
| # ) | |
| # st.markdown( | |
| # f""" | |
| # ```yaml | |
| # {selected_metadata} | |
| # """ | |
| # ) | |
| # print("INFO -- Pushing evaluation job logs to the Hub") | |
| # evaluation_log = {} | |
| # evaluation_log["project_id"] = project_json_resp["id"] | |
| # evaluation_log["autotrain_env"] = ( | |
| # "staging" if "staging" in AUTOTRAIN_BACKEND_API else "prod" | |
| # ) | |
| # evaluation_log["payload"] = project_payload | |
| # evaluation_log["project_creation_response"] = project_json_resp | |
| # evaluation_log["dataset_creation_response"] = data_json_resp | |
| # evaluation_log["autotrain_job_response"] = train_json_resp | |
| # commit_evaluation_log(evaluation_log, hf_access_token=HF_TOKEN) | |
| # else: | |
| # st.error("π Oh no, there was an error submitting your evaluation job!") | |
| # else: | |
| # st.warning("β οΈ No models left to evaluate! Please select other models and try again.") | |