Spaces:
Running
Running
Commit
·
d4cb50a
1
Parent(s):
492f435
update about and env
Browse files- src/about.py +15 -5
- src/envs.py +8 -5
src/about.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from dataclasses import dataclass
|
| 2 |
from enum import Enum
|
| 3 |
|
|
|
|
| 4 |
@dataclass
|
| 5 |
class Task:
|
| 6 |
benchmark: str
|
|
@@ -11,24 +12,25 @@ class Task:
|
|
| 11 |
# Select your tasks here
|
| 12 |
# ---------------------------------------------------
|
| 13 |
class Tasks(Enum):
|
| 14 |
-
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 15 |
task0 = Task("anli_r1", "acc", "ANLI")
|
| 16 |
task1 = Task("logiqa", "acc_norm", "LogiQA")
|
| 17 |
|
| 18 |
-
NUM_FEWSHOT = 0 # Change with your few shot
|
| 19 |
-
# ---------------------------------------------------
|
| 20 |
|
|
|
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
# Your leaderboard name
|
| 24 |
-
TITLE = """<h1 align="center" id="space-title">
|
| 25 |
|
| 26 |
# What does your leaderboard evaluate?
|
| 27 |
INTRODUCTION_TEXT = """
|
| 28 |
-
|
| 29 |
"""
|
| 30 |
|
| 31 |
# Which evaluations are you running? how can people reproduce what you have?
|
|
|
|
| 32 |
LLM_BENCHMARKS_TEXT = f"""
|
| 33 |
## How it works
|
| 34 |
|
|
@@ -69,4 +71,12 @@ If everything is done, check you can launch the EleutherAIHarness on your model
|
|
| 69 |
|
| 70 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
| 71 |
CITATION_BUTTON_TEXT = r"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
"""
|
|
|
|
| 1 |
from dataclasses import dataclass
|
| 2 |
from enum import Enum
|
| 3 |
|
| 4 |
+
|
| 5 |
@dataclass
|
| 6 |
class Task:
|
| 7 |
benchmark: str
|
|
|
|
| 12 |
# Select your tasks here
|
| 13 |
# ---------------------------------------------------
|
| 14 |
class Tasks(Enum):
|
| 15 |
+
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 16 |
task0 = Task("anli_r1", "acc", "ANLI")
|
| 17 |
task1 = Task("logiqa", "acc_norm", "LogiQA")
|
| 18 |
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
NUM_FEWSHOT = 0 # Change with your few shot
|
| 21 |
+
# ---------------------------------------------------
|
| 22 |
|
| 23 |
|
| 24 |
# Your leaderboard name
|
| 25 |
+
TITLE = """<h1 align="center" id="space-title">BOOM 💥 Time Series Forecasting Leaderboard</h1>"""
|
| 26 |
|
| 27 |
# What does your leaderboard evaluate?
|
| 28 |
INTRODUCTION_TEXT = """
|
| 29 |
+
BOOM (Benchmark of Observability Metrics) is a large-scale, real-world time series dataset designed for evaluating models on forecasting tasks in complex observability environments. Composed of real-world metrics data collected from Datadog, a leading observability platform, the benchmark captures the irregularity, structural complexity, and heavy-tailed statistics typical of production observability data. For more information, please refer to the [BOOM Dataset Card](https://huggingface.co/datasets/Datadog/BOOM) and the [BOOM GitHub repository](https://github.com/DataDog/toto?tab=readme-ov-file#boom-benchmark-of-observability-metrics)
|
| 30 |
"""
|
| 31 |
|
| 32 |
# Which evaluations are you running? how can people reproduce what you have?
|
| 33 |
+
# TODO
|
| 34 |
LLM_BENCHMARKS_TEXT = f"""
|
| 35 |
## How it works
|
| 36 |
|
|
|
|
| 71 |
|
| 72 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
| 73 |
CITATION_BUTTON_TEXT = r"""
|
| 74 |
+
@misc{toto2025,
|
| 75 |
+
title={This Time is Different: An Observability Perspective on Time Series Foundation Models},
|
| 76 |
+
author={TODO},
|
| 77 |
+
year={2025},
|
| 78 |
+
eprint={arXiv:TODO},
|
| 79 |
+
archivePrefix={arXiv},
|
| 80 |
+
primaryClass={cs.LG}
|
| 81 |
+
}
|
| 82 |
"""
|
src/envs.py
CHANGED
|
@@ -4,21 +4,24 @@ from huggingface_hub import HfApi
|
|
| 4 |
|
| 5 |
# Info to change for your repository
|
| 6 |
# ----------------------------------
|
| 7 |
-
TOKEN = os.environ.get("HF_TOKEN")
|
| 8 |
|
| 9 |
-
OWNER =
|
|
|
|
|
|
|
| 10 |
# ----------------------------------
|
| 11 |
|
| 12 |
-
REPO_ID = f"{OWNER}/
|
| 13 |
QUEUE_REPO = f"{OWNER}/requests"
|
| 14 |
RESULTS_REPO = f"{OWNER}/results"
|
| 15 |
|
| 16 |
# If you setup a cache later, just change HF_HOME
|
| 17 |
-
CACHE_PATH=os.getenv("HF_HOME", ".")
|
| 18 |
|
| 19 |
# Local caches
|
| 20 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
| 21 |
-
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
|
|
|
| 22 |
EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
|
| 23 |
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
|
| 24 |
|
|
|
|
| 4 |
|
| 5 |
# Info to change for your repository
|
| 6 |
# ----------------------------------
|
| 7 |
+
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
| 8 |
|
| 9 |
+
OWNER = (
|
| 10 |
+
"Datadog" # Change to your org - don't forget to create a results and request dataset, with the correct format!
|
| 11 |
+
)
|
| 12 |
# ----------------------------------
|
| 13 |
|
| 14 |
+
REPO_ID = f"{OWNER}/BOOM-Leaderboard" # The repo id of your space
|
| 15 |
QUEUE_REPO = f"{OWNER}/requests"
|
| 16 |
RESULTS_REPO = f"{OWNER}/results"
|
| 17 |
|
| 18 |
# If you setup a cache later, just change HF_HOME
|
| 19 |
+
CACHE_PATH = os.getenv("HF_HOME", ".")
|
| 20 |
|
| 21 |
# Local caches
|
| 22 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
| 23 |
+
# EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
| 24 |
+
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "results")
|
| 25 |
EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
|
| 26 |
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
|
| 27 |
|