Spaces:
Running
Running
File size: 6,723 Bytes
f9201f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 |
"""
Initialize HuggingFace dataset structure for Piclets Discovery game
This script sets up the required directory structure and initial files
in the Fraser/piclets dataset repository.
Usage:
python init_dataset.py
Requires:
- HF_TOKEN environment variable or set in HuggingFace Space secrets
- Write access to Fraser/piclets dataset
"""
import json
import os
from datetime import datetime
from huggingface_hub import HfApi, hf_hub_download
from pathlib import Path
# Configuration
DATASET_REPO = os.getenv("DATASET_REPO", "Fraser/piclets")
HF_TOKEN = os.getenv("HF_TOKEN")
def init_metadata_files():
"""Initialize global metadata files"""
# Initial stats
stats = {
"totalPiclets": 0,
"totalVariations": 0,
"totalScans": 0,
"totalUsers": 0,
"lastUpdated": datetime.now().isoformat()
}
# Initial leaderboard (empty)
leaderboard = {
"topDiscoverers": [],
"recentActivity": [],
"lastUpdated": datetime.now().isoformat()
}
return stats, leaderboard
def create_example_files():
"""Create example user and piclet files for testing"""
# Example user profile
example_user = {
"sub": "example_123456",
"preferred_username": "example_user",
"name": "Example User",
"picture": None,
"joinedAt": datetime.now().isoformat(),
"lastSeen": datetime.now().isoformat(),
"discoveries": [],
"uniqueFinds": 0,
"totalFinds": 0,
"rarityScore": 0,
"visibility": "public"
}
# Example piclet (empty canonical, ready for discoveries)
example_piclet = {
"canonical": None,
"variations": [],
"metadata": {
"created": datetime.now().isoformat(),
"lastUpdated": datetime.now().isoformat()
}
}
return example_user, example_piclet
def upload_initial_structure():
"""Upload initial dataset structure to HuggingFace"""
if not HF_TOKEN:
print("ERROR: HF_TOKEN environment variable not set")
print("Please set HF_TOKEN with write access to the dataset")
return False
print(f"Initializing dataset: {DATASET_REPO}")
api = HfApi()
# Create temporary directory for files
temp_dir = Path("temp_dataset_init")
temp_dir.mkdir(exist_ok=True)
try:
# 1. Create metadata directory and files
metadata_dir = temp_dir / "metadata"
metadata_dir.mkdir(exist_ok=True)
stats, leaderboard = init_metadata_files()
with open(metadata_dir / "stats.json", "w") as f:
json.dump(stats, f, indent=2)
print("β Created metadata/stats.json")
with open(metadata_dir / "leaderboard.json", "w") as f:
json.dump(leaderboard, f, indent=2)
print("β Created metadata/leaderboard.json")
# 2. Create users directory with example
users_dir = temp_dir / "users"
users_dir.mkdir(exist_ok=True)
example_user, _ = create_example_files()
with open(users_dir / ".gitkeep", "w") as f:
f.write("# User profiles stored here\n")
print("β Created users/ directory")
# 3. Create piclets directory with example
piclets_dir = temp_dir / "piclets"
piclets_dir.mkdir(exist_ok=True)
with open(piclets_dir / ".gitkeep", "w") as f:
f.write("# Canonical piclets and variations stored here\n")
print("β Created piclets/ directory")
# 4. Create README
readme_content = """# Piclets Discovery Dataset
This dataset stores the canonical Piclets, variations, and user profiles for the Piclets Discovery game.
## Structure
```
metadata/
stats.json # Global statistics
leaderboard.json # Top discoverers and recent activity
users/
{sub}.json # User profiles keyed by HuggingFace user ID
piclets/
{normalized_name}.json # Canonical piclet + variations
```
## Authentication
All endpoints require HuggingFace OAuth tokens:
- Frontend sends `Authorization: Bearer <token>` headers
- Server verifies via `https://huggingface.co/oauth/userinfo`
- User profiles use stable `sub` field as primary key
## API
Server endpoint: `Fraser/piclets-server` (HuggingFace Space)
See server repository for full API documentation.
"""
with open(temp_dir / "README.md", "w") as f:
f.write(readme_content)
print("β Created README.md")
# 5. Upload all files to dataset
print(f"\nUploading to {DATASET_REPO}...")
api.upload_folder(
folder_path=str(temp_dir),
repo_id=DATASET_REPO,
repo_type="dataset",
token=HF_TOKEN,
commit_message="Initialize dataset structure for Piclets Discovery"
)
print(f"\nβ Dataset initialized successfully!")
print(f"View at: https://huggingface.co/datasets/{DATASET_REPO}")
return True
except Exception as e:
print(f"\nβ Error initializing dataset: {e}")
return False
finally:
# Cleanup temp directory
import shutil
if temp_dir.exists():
shutil.rmtree(temp_dir)
def verify_dataset_structure():
"""Verify that dataset structure exists"""
if not HF_TOKEN:
print("ERROR: HF_TOKEN not set")
return False
print(f"Verifying dataset structure: {DATASET_REPO}")
try:
# Try to download metadata files
stats_path = hf_hub_download(
repo_id=DATASET_REPO,
filename="metadata/stats.json",
repo_type="dataset",
token=HF_TOKEN
)
with open(stats_path) as f:
stats = json.load(f)
print(f"β Dataset exists with {stats['totalPiclets']} piclets")
print(f"β Structure verified")
return True
except Exception as e:
print(f"β Dataset not initialized or error: {e}")
return False
if __name__ == "__main__":
import sys
if len(sys.argv) > 1 and sys.argv[1] == "--verify":
# Verify mode
if verify_dataset_structure():
sys.exit(0)
else:
sys.exit(1)
else:
# Initialize mode
print("=" * 60)
print("Piclets Discovery Dataset Initialization")
print("=" * 60)
print()
if upload_initial_structure():
print("\nNext steps:")
print("1. Verify at https://huggingface.co/datasets/Fraser/piclets")
print("2. Test with: python init_dataset.py --verify")
print("3. Deploy piclets-server to HuggingFace Space")
sys.exit(0)
else:
sys.exit(1)
|