Spaces:
Running
Running
| """ | |
| Initialize HuggingFace dataset structure for Piclets Discovery game | |
| This script sets up the required directory structure and initial files | |
| in the Fraser/piclets dataset repository. | |
| Usage: | |
| python init_dataset.py | |
| Requires: | |
| - HF_TOKEN environment variable or set in HuggingFace Space secrets | |
| - Write access to Fraser/piclets dataset | |
| """ | |
| import json | |
| import os | |
| from datetime import datetime | |
| from huggingface_hub import HfApi, hf_hub_download | |
| from pathlib import Path | |
| # Configuration | |
| DATASET_REPO = os.getenv("DATASET_REPO", "Fraser/piclets") | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| def init_metadata_files(): | |
| """Initialize global metadata files""" | |
| # Initial stats | |
| stats = { | |
| "totalPiclets": 0, | |
| "totalVariations": 0, | |
| "totalScans": 0, | |
| "totalUsers": 0, | |
| "lastUpdated": datetime.now().isoformat() | |
| } | |
| # Initial leaderboard (empty) | |
| leaderboard = { | |
| "topDiscoverers": [], | |
| "recentActivity": [], | |
| "lastUpdated": datetime.now().isoformat() | |
| } | |
| return stats, leaderboard | |
| def create_example_files(): | |
| """Create example user and piclet files for testing""" | |
| # Example user profile | |
| example_user = { | |
| "sub": "example_123456", | |
| "preferred_username": "example_user", | |
| "name": "Example User", | |
| "picture": None, | |
| "joinedAt": datetime.now().isoformat(), | |
| "lastSeen": datetime.now().isoformat(), | |
| "discoveries": [], | |
| "uniqueFinds": 0, | |
| "totalFinds": 0, | |
| "rarityScore": 0, | |
| "visibility": "public" | |
| } | |
| # Example piclet (empty canonical, ready for discoveries) | |
| example_piclet = { | |
| "canonical": None, | |
| "variations": [], | |
| "metadata": { | |
| "created": datetime.now().isoformat(), | |
| "lastUpdated": datetime.now().isoformat() | |
| } | |
| } | |
| return example_user, example_piclet | |
| def upload_initial_structure(): | |
| """Upload initial dataset structure to HuggingFace""" | |
| if not HF_TOKEN: | |
| print("ERROR: HF_TOKEN environment variable not set") | |
| print("Please set HF_TOKEN with write access to the dataset") | |
| return False | |
| print(f"Initializing dataset: {DATASET_REPO}") | |
| api = HfApi() | |
| # Create temporary directory for files | |
| temp_dir = Path("temp_dataset_init") | |
| temp_dir.mkdir(exist_ok=True) | |
| try: | |
| # 1. Create metadata directory and files | |
| metadata_dir = temp_dir / "metadata" | |
| metadata_dir.mkdir(exist_ok=True) | |
| stats, leaderboard = init_metadata_files() | |
| with open(metadata_dir / "stats.json", "w") as f: | |
| json.dump(stats, f, indent=2) | |
| print("β Created metadata/stats.json") | |
| with open(metadata_dir / "leaderboard.json", "w") as f: | |
| json.dump(leaderboard, f, indent=2) | |
| print("β Created metadata/leaderboard.json") | |
| # 2. Create users directory with example | |
| users_dir = temp_dir / "users" | |
| users_dir.mkdir(exist_ok=True) | |
| example_user, _ = create_example_files() | |
| with open(users_dir / ".gitkeep", "w") as f: | |
| f.write("# User profiles stored here\n") | |
| print("β Created users/ directory") | |
| # 3. Create piclets directory with example | |
| piclets_dir = temp_dir / "piclets" | |
| piclets_dir.mkdir(exist_ok=True) | |
| with open(piclets_dir / ".gitkeep", "w") as f: | |
| f.write("# Canonical piclets and variations stored here\n") | |
| print("β Created piclets/ directory") | |
| # 4. Create README | |
| readme_content = """# Piclets Discovery Dataset | |
| This dataset stores the canonical Piclets, variations, and user profiles for the Piclets Discovery game. | |
| ## Structure | |
| ``` | |
| metadata/ | |
| stats.json # Global statistics | |
| leaderboard.json # Top discoverers and recent activity | |
| users/ | |
| {sub}.json # User profiles keyed by HuggingFace user ID | |
| piclets/ | |
| {normalized_name}.json # Canonical piclet + variations | |
| ``` | |
| ## Authentication | |
| All endpoints require HuggingFace OAuth tokens: | |
| - Frontend sends `Authorization: Bearer <token>` headers | |
| - Server verifies via `https://huggingface.co/oauth/userinfo` | |
| - User profiles use stable `sub` field as primary key | |
| ## API | |
| Server endpoint: `Fraser/piclets-server` (HuggingFace Space) | |
| See server repository for full API documentation. | |
| """ | |
| with open(temp_dir / "README.md", "w") as f: | |
| f.write(readme_content) | |
| print("β Created README.md") | |
| # 5. Upload all files to dataset | |
| print(f"\nUploading to {DATASET_REPO}...") | |
| api.upload_folder( | |
| folder_path=str(temp_dir), | |
| repo_id=DATASET_REPO, | |
| repo_type="dataset", | |
| token=HF_TOKEN, | |
| commit_message="Initialize dataset structure for Piclets Discovery" | |
| ) | |
| print(f"\nβ Dataset initialized successfully!") | |
| print(f"View at: https://huggingface.co/datasets/{DATASET_REPO}") | |
| return True | |
| except Exception as e: | |
| print(f"\nβ Error initializing dataset: {e}") | |
| return False | |
| finally: | |
| # Cleanup temp directory | |
| import shutil | |
| if temp_dir.exists(): | |
| shutil.rmtree(temp_dir) | |
| def verify_dataset_structure(): | |
| """Verify that dataset structure exists""" | |
| if not HF_TOKEN: | |
| print("ERROR: HF_TOKEN not set") | |
| return False | |
| print(f"Verifying dataset structure: {DATASET_REPO}") | |
| try: | |
| # Try to download metadata files | |
| stats_path = hf_hub_download( | |
| repo_id=DATASET_REPO, | |
| filename="metadata/stats.json", | |
| repo_type="dataset", | |
| token=HF_TOKEN | |
| ) | |
| with open(stats_path) as f: | |
| stats = json.load(f) | |
| print(f"β Dataset exists with {stats['totalPiclets']} piclets") | |
| print(f"β Structure verified") | |
| return True | |
| except Exception as e: | |
| print(f"β Dataset not initialized or error: {e}") | |
| return False | |
| if __name__ == "__main__": | |
| import sys | |
| if len(sys.argv) > 1 and sys.argv[1] == "--verify": | |
| # Verify mode | |
| if verify_dataset_structure(): | |
| sys.exit(0) | |
| else: | |
| sys.exit(1) | |
| else: | |
| # Initialize mode | |
| print("=" * 60) | |
| print("Piclets Discovery Dataset Initialization") | |
| print("=" * 60) | |
| print() | |
| if upload_initial_structure(): | |
| print("\nNext steps:") | |
| print("1. Verify at https://huggingface.co/datasets/Fraser/piclets") | |
| print("2. Test with: python init_dataset.py --verify") | |
| print("3. Deploy piclets-server to HuggingFace Space") | |
| sys.exit(0) | |
| else: | |
| sys.exit(1) | |