import contextlib from datetime import datetime, timezone, timedelta import hashlib import os from typing import Iterable, Union from datasets import load_dataset import gradio as gr import pandas as pd from constants import ( RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS, BASELINE_USERNAMES, ) pd.set_option("display.max_columns", None) def get_time(tz_name="EST") -> str: offsets = {"EST": -5, "UTC": 0} if tz_name not in offsets: print("Invalid timezone, using EST") tz_name = "EST" offset = offsets[tz_name] return ( datetime.now(timezone(timedelta(hours=offset))).strftime("%Y-%m-%d %H:%M:%S") + f" ({tz_name})" ) def show_output_box(message): return gr.update(value=message, visible=True) def anonymize_user(username: str) -> str: # Anonymize using a hash of the username return hashlib.sha256(username.encode()).hexdigest()[:8] def fetch_hf_results(): # load_dataset should cache by default if not using force_redownload df = load_dataset( RESULTS_REPO, data_files="auto_submissions/metrics_all.csv", )["train"].to_pandas() assert all( col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS ), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_RESULTS_COLUMNS) - set(df.columns)}" df_baseline = df[df["user"].isin(BASELINE_USERNAMES)] df_non_baseline = df[~df["user"].isin(BASELINE_USERNAMES)] # Show latest submission only # For baselines: Keep unique model names df_baseline = df_baseline.sort_values( "submission_time", ascending=False ).drop_duplicates(subset=["model", "assay", "dataset", "user"], keep="first") # For users: Just show latest submission df_non_baseline = df_non_baseline.sort_values( "submission_time", ascending=False ).drop_duplicates(subset=["assay", "dataset", "user"], keep="first") df = pd.concat([df_baseline, df_non_baseline], ignore_index=True) df["property"] = df["assay"].map(ASSAY_RENAME) # Rename baseline username to just "Baseline" df.loc[df["user"].isin(BASELINE_USERNAMES), "user"] = "Baseline" # Note: Could optionally add a column "is_baseline" to the dataframe to indicate whether the model is a baseline model or not. If things get crowded. # Anonymize the user column at this point (so note: users can submit anonymous / non-anonymous and we'll show their latest submission regardless) df.loc[df["anonymous"], "user"] = "anon-" + df.loc[df["anonymous"], "user"].apply( readable_hash ) # Compare to previous dataframe if os.path.exists("debug-current-results.csv"): old_df = pd.read_csv("debug-current-results.csv") if len(df) != len(old_df): print(f"New results: Length {len(old_df)} -> {len(df)} ({get_time()})") df.to_csv("debug-current-results.csv", index=False) def fetch_latest_data(stop_event): import time while not stop_event.is_set(): try: fetch_hf_results() except Exception as e: print(f"Error fetching latest data: {e}") time.sleep(3) # Fetch every 60 seconds print("Exiting data fetch thread") @contextlib.asynccontextmanager async def periodic_data_fetch(app): import threading event = threading.Event() t = threading.Thread(target=fetch_latest_data, args=(event,), daemon=True) t.start() yield event.set() t.join(3) # Readable hashing function similar to coolname or codenamize ADJECTIVES = [ "ancient", "brave", "calm", "clever", "crimson", "curious", "dapper", "eager", "fuzzy", "gentle", "glowing", "golden", "happy", "icy", "jolly", "lucky", "magical", "mellow", "nimble", "peachy", "quick", "royal", "shiny", "silent", "sly", "sparkly", "spicy", "spry", "sturdy", "sunny", "swift", "tiny", "vivid", "witty", ] ANIMALS = [ "ant", "bat", "bear", "bee", "bison", "boar", "bug", "cat", "crab", "crow", "deer", "dog", "duck", "eel", "elk", "fox", "frog", "goat", "gull", "hare", "hawk", "hen", "horse", "ibis", "kid", "kiwi", "koala", "lamb", "lark", "lemur", "lion", "llama", "loon", "lynx", "mole", "moose", "mouse", "newt", "otter", "owl", "ox", "panda", "pig", "prawn", "puma", "quail", "quokka", "rabbit", "rat", "ray", "robin", "seal", "shark", "sheep", "shrew", "skunk", "slug", "snail", "snake", "swan", "toad", "trout", "turtle", "vole", "walrus", "wasp", "whale", "wolf", "worm", "yak", "zebra", ] NOUNS = [ "rock", "sand", "star", "tree", "leaf", "seed", "stone", "cloud", "rain", "snow", "wind", "fire", "ash", "dirt", "mud", "ice", "wave", "shell", "dust", "sun", "moon", "hill", "lake", "pond", "reef", "root", "twig", "wood", ] def readable_hash( data: Union[str, bytes, Iterable[int]], *, salt: Union[str, bytes, None] = None, words: tuple[list[str], list[str]] = (ADJECTIVES, ANIMALS + NOUNS), sep: str = "-", checksum_len: int = 2, # 0 to disable; 2–3 is plenty case: str = "lower", # "lower" | "title" | "upper" ) -> str: """ Deterministically map input data to 'adjective-animal[-checksum]'. Generated using ChatGPT. Examples -------- >>> readable_hash("hello world") 'magical-panda-6h' >>> readable_hash("hello world", salt="my-app-v1", checksum_len=3) 'royal-otter-1pz' >>> readable_hash(b"\x00\x01\x02\x03", case="title", checksum_len=0) 'Fuzzy-Tiger' Vocabulary ---------- ADJECTIVES: ~160 safe, descriptive words (e.g. "ancient", "brave", "silent", "swift") ANIMALS: ~80 short, common animals (e.g. "dog", "owl", "whale", "tiger") NOUNS: optional set of ~30 neutral nouns (e.g. "rock", "star", "tree", "cloud") Combinations ------------ - adjective + animal: ~13,000 unique names - adjective + noun: ~5,000 unique names - adjective + animal + noun: ~390,000 unique names Checksum -------- An optional short base-36 suffix (e.g. "-6h" or "-1pz"). The checksum acts as a disambiguator in case two different inputs map to the same word combination. With 2-3 characters, collisions become vanishingly rare. If you only need fun, human-readable names, you can disable it by setting ``checksum_len=0``. If you need unique, stable identifiers, keep it enabled. """ if isinstance(data, str): data = data.encode() elif isinstance(data, Iterable) and not isinstance(data, (bytes, bytearray)): data = bytes(data) h = hashlib.blake2b(digest_size=8) # fast, stable, short digest if salt: h.update(salt.encode() if isinstance(salt, str) else salt) h.update(b"\x00") # domain-separate salt from data h.update(data) digest = h.digest() # Use the first 6 bytes to index words; last bytes for checksum n1 = int.from_bytes(digest[0:3], "big") n2 = int.from_bytes(digest[3:6], "big") adj = words[0][n1 % len(words[0])] noun = words[1][n2 % len(words[1])] phrase = f"{adj}{sep}{noun}" if checksum_len > 0: # Short base36 checksum for collision visibility cs = int.from_bytes(digest[6:], "big") base36 = "" alphabet = "0123456789abcdefghijklmnopqrstuvwxyz" while cs: cs, r = divmod(cs, 36) base36 = alphabet[r] + base36 base36 = (base36 or "0")[:checksum_len] phrase = f"{phrase}{sep}{base36}" if case == "title": phrase = sep.join(p.capitalize() for p in phrase.split(sep)) elif case == "upper": phrase = phrase.upper() return phrase