abdev-leaderboard / utils.py
loodvanniekerkginkgo's picture
Fixed fillna anonymous, moved data fetching to utils.py
cfa5138
raw
history blame
8.13 kB
import contextlib
from datetime import datetime, timezone, timedelta
import hashlib
import os
from typing import Iterable, Union
from datasets import load_dataset
import gradio as gr
import pandas as pd
from constants import (
RESULTS_REPO,
ASSAY_RENAME,
LEADERBOARD_RESULTS_COLUMNS,
BASELINE_USERNAMES,
)
pd.set_option("display.max_columns", None)
def get_time(tz_name="EST") -> str:
offsets = {"EST": -5, "UTC": 0}
if tz_name not in offsets:
print("Invalid timezone, using EST")
tz_name = "EST"
offset = offsets[tz_name]
return (
datetime.now(timezone(timedelta(hours=offset))).strftime("%Y-%m-%d %H:%M:%S")
+ f" ({tz_name})"
)
def show_output_box(message):
return gr.update(value=message, visible=True)
def anonymize_user(username: str) -> str:
# Anonymize using a hash of the username
return hashlib.sha256(username.encode()).hexdigest()[:8]
def fetch_hf_results():
# load_dataset should cache by default if not using force_redownload
df = load_dataset(
RESULTS_REPO,
data_files="auto_submissions/metrics_all.csv",
)["train"].to_pandas()
assert all(
col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS
), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_RESULTS_COLUMNS) - set(df.columns)}"
df_baseline = df[df["user"].isin(BASELINE_USERNAMES)]
df_non_baseline = df[~df["user"].isin(BASELINE_USERNAMES)]
# Show latest submission only
# For baselines: Keep unique model names
df_baseline = df_baseline.sort_values(
"submission_time", ascending=False
).drop_duplicates(subset=["model", "assay", "dataset", "user"], keep="first")
# For users: Just show latest submission
df_non_baseline = df_non_baseline.sort_values(
"submission_time", ascending=False
).drop_duplicates(subset=["assay", "dataset", "user"], keep="first")
df = pd.concat([df_baseline, df_non_baseline], ignore_index=True)
df["property"] = df["assay"].map(ASSAY_RENAME)
# Rename baseline username to just "Baseline"
df.loc[df["user"].isin(BASELINE_USERNAMES), "user"] = "Baseline"
# Note: Could optionally add a column "is_baseline" to the dataframe to indicate whether the model is a baseline model or not. If things get crowded.
# Anonymize the user column at this point (so note: users can submit anonymous / non-anonymous and we'll show their latest submission regardless)
df.loc[df["anonymous"], "user"] = "anon-" + df.loc[df["anonymous"], "user"].apply(
readable_hash
)
# Compare to previous dataframe
if os.path.exists("debug-current-results.csv"):
old_df = pd.read_csv("debug-current-results.csv")
if len(df) != len(old_df):
print(f"New results: Length {len(old_df)} -> {len(df)} ({get_time()})")
df.to_csv("debug-current-results.csv", index=False)
def fetch_latest_data(stop_event):
import time
while not stop_event.is_set():
try:
fetch_hf_results()
except Exception as e:
print(f"Error fetching latest data: {e}")
time.sleep(3) # Fetch every 60 seconds
print("Exiting data fetch thread")
@contextlib.asynccontextmanager
async def periodic_data_fetch(app):
import threading
event = threading.Event()
t = threading.Thread(target=fetch_latest_data, args=(event,), daemon=True)
t.start()
yield
event.set()
t.join(3)
# Readable hashing function similar to coolname or codenamize
ADJECTIVES = [
"ancient",
"brave",
"calm",
"clever",
"crimson",
"curious",
"dapper",
"eager",
"fuzzy",
"gentle",
"glowing",
"golden",
"happy",
"icy",
"jolly",
"lucky",
"magical",
"mellow",
"nimble",
"peachy",
"quick",
"royal",
"shiny",
"silent",
"sly",
"sparkly",
"spicy",
"spry",
"sturdy",
"sunny",
"swift",
"tiny",
"vivid",
"witty",
]
ANIMALS = [
"ant",
"bat",
"bear",
"bee",
"bison",
"boar",
"bug",
"cat",
"crab",
"crow",
"deer",
"dog",
"duck",
"eel",
"elk",
"fox",
"frog",
"goat",
"gull",
"hare",
"hawk",
"hen",
"horse",
"ibis",
"kid",
"kiwi",
"koala",
"lamb",
"lark",
"lemur",
"lion",
"llama",
"loon",
"lynx",
"mole",
"moose",
"mouse",
"newt",
"otter",
"owl",
"ox",
"panda",
"pig",
"prawn",
"puma",
"quail",
"quokka",
"rabbit",
"rat",
"ray",
"robin",
"seal",
"shark",
"sheep",
"shrew",
"skunk",
"slug",
"snail",
"snake",
"swan",
"toad",
"trout",
"turtle",
"vole",
"walrus",
"wasp",
"whale",
"wolf",
"worm",
"yak",
"zebra",
]
NOUNS = [
"rock",
"sand",
"star",
"tree",
"leaf",
"seed",
"stone",
"cloud",
"rain",
"snow",
"wind",
"fire",
"ash",
"dirt",
"mud",
"ice",
"wave",
"shell",
"dust",
"sun",
"moon",
"hill",
"lake",
"pond",
"reef",
"root",
"twig",
"wood",
]
def readable_hash(
data: Union[str, bytes, Iterable[int]],
*,
salt: Union[str, bytes, None] = None,
words: tuple[list[str], list[str]] = (ADJECTIVES, ANIMALS + NOUNS),
sep: str = "-",
checksum_len: int = 2, # 0 to disable; 2–3 is plenty
case: str = "lower", # "lower" | "title" | "upper"
) -> str:
"""
Deterministically map input data to 'adjective-animal[-checksum]'. Generated using ChatGPT.
Examples
--------
>>> readable_hash("hello world")
'magical-panda-6h'
>>> readable_hash("hello world", salt="my-app-v1", checksum_len=3)
'royal-otter-1pz'
>>> readable_hash(b"\x00\x01\x02\x03", case="title", checksum_len=0)
'Fuzzy-Tiger'
Vocabulary
----------
ADJECTIVES: ~160 safe, descriptive words (e.g. "ancient", "brave", "silent", "swift")
ANIMALS: ~80 short, common animals (e.g. "dog", "owl", "whale", "tiger")
NOUNS: optional set of ~30 neutral nouns (e.g. "rock", "star", "tree", "cloud")
Combinations
------------
- adjective + animal: ~13,000 unique names
- adjective + noun: ~5,000 unique names
- adjective + animal + noun: ~390,000 unique names
Checksum
--------
An optional short base-36 suffix (e.g. "-6h" or "-1pz"). The checksum
acts as a disambiguator in case two different inputs map to the same
word combination. With 2-3 characters, collisions become vanishingly rare.
If you only need fun, human-readable names, you can disable it by setting
``checksum_len=0``. If you need unique, stable identifiers, keep it enabled.
"""
if isinstance(data, str):
data = data.encode()
elif isinstance(data, Iterable) and not isinstance(data, (bytes, bytearray)):
data = bytes(data)
h = hashlib.blake2b(digest_size=8) # fast, stable, short digest
if salt:
h.update(salt.encode() if isinstance(salt, str) else salt)
h.update(b"\x00") # domain-separate salt from data
h.update(data)
digest = h.digest()
# Use the first 6 bytes to index words; last bytes for checksum
n1 = int.from_bytes(digest[0:3], "big")
n2 = int.from_bytes(digest[3:6], "big")
adj = words[0][n1 % len(words[0])]
noun = words[1][n2 % len(words[1])]
phrase = f"{adj}{sep}{noun}"
if checksum_len > 0:
# Short base36 checksum for collision visibility
cs = int.from_bytes(digest[6:], "big")
base36 = ""
alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
while cs:
cs, r = divmod(cs, 36)
base36 = alphabet[r] + base36
base36 = (base36 or "0")[:checksum_len]
phrase = f"{phrase}{sep}{base36}"
if case == "title":
phrase = sep.join(p.capitalize() for p in phrase.split(sep))
elif case == "upper":
phrase = phrase.upper()
return phrase