Magistral_geogussr_challenge / data_manager.py
Jofthomas's picture
Upload 12 files
a303b0f verified
raw
history blame
3.59 kB
import os
import json
import tempfile
from datetime import datetime, timezone
from huggingface_hub import hf_hub_download, upload_file
from huggingface_hub.utils import HfHubHTTPError
# Constant for the dataset repository, configurable via environment variable
DATASET_REPO = os.getenv("HF_DATASET_REPO", "jofthomas/geoguessr_game_of_the_day")
def get_todays_records_path() -> str:
"""Gets the path for today's game records file, e.g., 'records/2025-10-03.json'."""
date_str = datetime.now(timezone.utc).strftime('%Y-%m-%d')
return f"records/{date_str}.json"
def get_todays_games(token: str) -> list:
"""
Downloads and reads the game records for the current day from the HF Hub.
Returns an empty list if the file for today doesn't exist yet.
"""
filepath = get_todays_records_path()
try:
# Use the provided token for read access
local_path = hf_hub_download(
repo_id=DATASET_REPO,
filename=filepath,
repo_type="dataset",
token=token,
)
with open(local_path, "r", encoding="utf-8") as f:
return json.load(f)
except HfHubHTTPError as e:
if e.response.status_code == 404:
return [] # No games played today yet, which is normal.
else:
print(f"Error downloading daily records: {e}")
raise # Re-raise other HTTP errors
except Exception as e:
print(f"An unexpected error occurred while getting today's games: {e}")
return []
def has_user_played_today(username: str, todays_games: list) -> bool:
"""Checks if a user's record already exists in today's games."""
return any(game.get("username") == username for game in todays_games)
def record_game(username: str, score: float):
"""
Records a completed game to the daily records file on the HF Hub.
This function reads the existing file, appends the new record, and uploads it back.
It uses the server's write token from environment variables.
"""
write_token = os.getenv("HF_TOKEN", "")
if not write_token:
print("Warning: Server HF_TOKEN not set. Cannot record game score.")
return
try:
# Fetch the latest records using the write token to ensure we have the most recent data
todays_games = get_todays_games(token=write_token)
# Final check to prevent duplicate entries in case of concurrent games
if has_user_played_today(username, todays_games):
print(f"User {username} has already played today. Skipping record.")
return
todays_games.append({
"username": username,
"score": score,
"timestamp": datetime.now(timezone.utc).isoformat()
})
filepath_in_repo = get_todays_records_path()
with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json", encoding="utf-8") as tmp_file:
json.dump(todays_games, tmp_file, indent=2)
tmp_file_path = tmp_file.name
upload_file(
path_or_fileobj=tmp_file_path,
path_in_repo=filepath_in_repo,
repo_id=DATASET_REPO,
repo_type="dataset",
token=write_token,
commit_message=f"Game result for {username}"
)
print(f"Successfully recorded game for {username} with score {score}")
except Exception as e:
print(f"Error recording game for {username}: {e}")
finally:
if 'tmp_file_path' in locals() and os.path.exists(tmp_file_path):
os.remove(tmp_file_path)