abdev-leaderboard

Running

App Files Files Community

abdev-leaderboard / utils.py

loodvanniekerkginkgo

Fixed fillna anonymous, moved data fetching to utils.py

cfa5138 about 1 month ago

raw

history blame

8.13 kB

	import contextlib
	from datetime import datetime, timezone, timedelta
	import hashlib
	import os
	from typing import Iterable, Union

	from datasets import load_dataset
	import gradio as gr
	import pandas as pd

	from constants import (
	RESULTS_REPO,
	ASSAY_RENAME,
	LEADERBOARD_RESULTS_COLUMNS,
	BASELINE_USERNAMES,
	)

	pd.set_option("display.max_columns", None)


	def get_time(tz_name="EST") -> str:
	offsets = {"EST": -5, "UTC": 0}
	if tz_name not in offsets:
	print("Invalid timezone, using EST")
	tz_name = "EST"
	offset = offsets[tz_name]
	return (
	datetime.now(timezone(timedelta(hours=offset))).strftime("%Y-%m-%d %H:%M:%S")
	+ f" ({tz_name})"
	)


	def show_output_box(message):
	return gr.update(value=message, visible=True)


	def anonymize_user(username: str) -> str:
	# Anonymize using a hash of the username
	return hashlib.sha256(username.encode()).hexdigest()[:8]


	def fetch_hf_results():
	# load_dataset should cache by default if not using force_redownload
	df = load_dataset(
	RESULTS_REPO,
	data_files="auto_submissions/metrics_all.csv",
	)["train"].to_pandas()
	assert all(
	col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS
	), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_RESULTS_COLUMNS) - set(df.columns)}"

	df_baseline = df[df["user"].isin(BASELINE_USERNAMES)]
	df_non_baseline = df[~df["user"].isin(BASELINE_USERNAMES)]
	# Show latest submission only
	# For baselines: Keep unique model names
	df_baseline = df_baseline.sort_values(
	"submission_time", ascending=False
	).drop_duplicates(subset=["model", "assay", "dataset", "user"], keep="first")
	# For users: Just show latest submission
	df_non_baseline = df_non_baseline.sort_values(
	"submission_time", ascending=False
	).drop_duplicates(subset=["assay", "dataset", "user"], keep="first")
	df = pd.concat([df_baseline, df_non_baseline], ignore_index=True)
	df["property"] = df["assay"].map(ASSAY_RENAME)

	# Rename baseline username to just "Baseline"
	df.loc[df["user"].isin(BASELINE_USERNAMES), "user"] = "Baseline"
	# Note: Could optionally add a column "is_baseline" to the dataframe to indicate whether the model is a baseline model or not. If things get crowded.
	# Anonymize the user column at this point (so note: users can submit anonymous / non-anonymous and we'll show their latest submission regardless)
	df.loc[df["anonymous"], "user"] = "anon-" + df.loc[df["anonymous"], "user"].apply(
	readable_hash
	)

	# Compare to previous dataframe
	if os.path.exists("debug-current-results.csv"):
	old_df = pd.read_csv("debug-current-results.csv")
	if len(df) != len(old_df):
	print(f"New results: Length {len(old_df)} -> {len(df)} ({get_time()})")

	df.to_csv("debug-current-results.csv", index=False)


	def fetch_latest_data(stop_event):
	import time

	while not stop_event.is_set():
	try:
	fetch_hf_results()
	except Exception as e:
	print(f"Error fetching latest data: {e}")
	time.sleep(3) # Fetch every 60 seconds
	print("Exiting data fetch thread")


	@contextlib.asynccontextmanager
	async def periodic_data_fetch(app):
	import threading

	event = threading.Event()
	t = threading.Thread(target=fetch_latest_data, args=(event,), daemon=True)
	t.start()
	yield
	event.set()
	t.join(3)


	# Readable hashing function similar to coolname or codenamize
	ADJECTIVES = [
	"ancient",
	"brave",
	"calm",
	"clever",
	"crimson",
	"curious",
	"dapper",
	"eager",
	"fuzzy",
	"gentle",
	"glowing",
	"golden",
	"happy",
	"icy",
	"jolly",
	"lucky",
	"magical",
	"mellow",
	"nimble",
	"peachy",
	"quick",
	"royal",
	"shiny",
	"silent",
	"sly",
	"sparkly",
	"spicy",
	"spry",
	"sturdy",
	"sunny",
	"swift",
	"tiny",
	"vivid",
	"witty",
	]

	ANIMALS = [
	"ant",
	"bat",
	"bear",
	"bee",
	"bison",
	"boar",
	"bug",
	"cat",
	"crab",
	"crow",
	"deer",
	"dog",
	"duck",
	"eel",
	"elk",
	"fox",
	"frog",
	"goat",
	"gull",
	"hare",
	"hawk",
	"hen",
	"horse",
	"ibis",
	"kid",
	"kiwi",
	"koala",
	"lamb",
	"lark",
	"lemur",
	"lion",
	"llama",
	"loon",
	"lynx",
	"mole",
	"moose",
	"mouse",
	"newt",
	"otter",
	"owl",
	"ox",
	"panda",
	"pig",
	"prawn",
	"puma",
	"quail",
	"quokka",
	"rabbit",
	"rat",
	"ray",
	"robin",
	"seal",
	"shark",
	"sheep",
	"shrew",
	"skunk",
	"slug",
	"snail",
	"snake",
	"swan",
	"toad",
	"trout",
	"turtle",
	"vole",
	"walrus",
	"wasp",
	"whale",
	"wolf",
	"worm",
	"yak",
	"zebra",
	]
	NOUNS = [
	"rock",
	"sand",
	"star",
	"tree",
	"leaf",
	"seed",
	"stone",
	"cloud",
	"rain",
	"snow",
	"wind",
	"fire",
	"ash",
	"dirt",
	"mud",
	"ice",
	"wave",
	"shell",
	"dust",
	"sun",
	"moon",
	"hill",
	"lake",
	"pond",
	"reef",
	"root",
	"twig",
	"wood",
	]


	def readable_hash(
	data: Union[str, bytes, Iterable[int]],
	*,
	salt: Union[str, bytes, None] = None,
	words: tuple[list[str], list[str]] = (ADJECTIVES, ANIMALS + NOUNS),
	sep: str = "-",
	checksum_len: int = 2, # 0 to disable; 2–3 is plenty
	case: str = "lower", # "lower" \| "title" \| "upper"
	) -> str:
	"""
	Deterministically map input data to 'adjective-animal[-checksum]'. Generated using ChatGPT.

	Examples
	--------
	>>> readable_hash("hello world")
	'magical-panda-6h'

	>>> readable_hash("hello world", salt="my-app-v1", checksum_len=3)
	'royal-otter-1pz'

	>>> readable_hash(b"\x00\x01\x02\x03", case="title", checksum_len=0)
	'Fuzzy-Tiger'

	Vocabulary
	----------
	ADJECTIVES: ~160 safe, descriptive words (e.g. "ancient", "brave", "silent", "swift")
	ANIMALS: ~80 short, common animals (e.g. "dog", "owl", "whale", "tiger")
	NOUNS: optional set of ~30 neutral nouns (e.g. "rock", "star", "tree", "cloud")

	Combinations
	------------
	- adjective + animal: ~13,000 unique names
	- adjective + noun: ~5,000 unique names
	- adjective + animal + noun: ~390,000 unique names

	Checksum
	--------
	An optional short base-36 suffix (e.g. "-6h" or "-1pz"). The checksum
	acts as a disambiguator in case two different inputs map to the same
	word combination. With 2-3 characters, collisions become vanishingly rare.
	If you only need fun, human-readable names, you can disable it by setting
	``checksum_len=0``. If you need unique, stable identifiers, keep it enabled.
	"""
	if isinstance(data, str):
	data = data.encode()
	elif isinstance(data, Iterable) and not isinstance(data, (bytes, bytearray)):
	data = bytes(data)

	h = hashlib.blake2b(digest_size=8) # fast, stable, short digest
	if salt:
	h.update(salt.encode() if isinstance(salt, str) else salt)
	h.update(b"\x00") # domain-separate salt from data
	h.update(data)
	digest = h.digest()

	# Use the first 6 bytes to index words; last bytes for checksum
	n1 = int.from_bytes(digest[0:3], "big")
	n2 = int.from_bytes(digest[3:6], "big")

	adj = words[0][n1 % len(words[0])]
	noun = words[1][n2 % len(words[1])]
	phrase = f"{adj}{sep}{noun}"

	if checksum_len > 0:
	# Short base36 checksum for collision visibility
	cs = int.from_bytes(digest[6:], "big")
	base36 = ""
	alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
	while cs:
	cs, r = divmod(cs, 36)
	base36 = alphabet[r] + base36
	base36 = (base36 or "0")[:checksum_len]
	phrase = f"{phrase}{sep}{base36}"

	if case == "title":
	phrase = sep.join(p.capitalize() for p in phrase.split(sep))
	elif case == "upper":
	phrase = phrase.upper()

	return phrase