Spaces:

Fraser
/

piclets-server

Sleeping

App Files Files Community

Fraser commited on Oct 1

Commit

f9201f6

1 Parent(s): cc4ae68

UPDATE

Browse files

Files changed (5) hide show

API_DOCUMENTATION.md +94 -17
app.py +148 -26
auth.py +95 -0
init_dataset.py +243 -0
requirements.txt +2 -1

API_DOCUMENTATION.md CHANGED Viewed

@@ -91,7 +91,7 @@ const result = await client.predict("/search_piclet", {
 ### 2. Create Canonical
 **Endpoint**: `/create_canonical`
-**Purpose**: Register the first discovery of an object
 **Method**: Gradio function call
 **Input Parameters**:
@@ -99,7 +99,7 @@ const result = await client.predict("/search_piclet", {
 {
   "object_name": "pillow",
   "piclet_data": "{ /* JSON string of Piclet instance */ }",
-  "username": "discoverer123"
 }
 ```
@@ -111,7 +111,11 @@ const result = await client.predict("/search_piclet", {
   "piclet": {
     "objectName": "pillow",
     "typeId": "pillow_canonical",
-    "discoveredBy": "discoverer123",
     "discoveredAt": "2024-07-26T10:30:00",
     "scanCount": 1,
     "picletData": { /* full Piclet data */ }
@@ -119,7 +123,13 @@ const result = await client.predict("/search_piclet", {
 }
 ```
-**Error Response**:
 ```json
 {
   "success": false,
@@ -127,9 +137,15 @@ const result = await client.predict("/search_piclet", {
 }
 ```
 ### 3. Create Variation
 **Endpoint**: `/create_variation`
-**Purpose**: Add a variation to an existing canonical Piclet
 **Method**: Gradio function call
 **Input Parameters**:
@@ -138,7 +154,7 @@ const result = await client.predict("/search_piclet", {
   "canonical_id": "pillow_canonical",
   "attributes": ["velvet", "blue"],
   "piclet_data": "{ /* JSON string of variation data */ }",
-  "username": "player456",
   "object_name": "pillow"
 }
 ```
@@ -152,6 +168,10 @@ const result = await client.predict("/search_piclet", {
     "typeId": "pillow_001",
     "attributes": ["velvet", "blue"],
     "discoveredBy": "player456",
     "discoveredAt": "2024-07-26T11:00:00",
     "scanCount": 1,
     "picletData": { /* variation data */ }
@@ -159,6 +179,25 @@ const result = await client.predict("/search_piclet", {
 }
 ```
 ### 4. Increment Scan Count
 **Endpoint**: `/increment_scan_count`
 **Purpose**: Track how many times a Piclet has been discovered
@@ -261,7 +300,7 @@ const result = await client.predict("/search_piclet", {
 **Input Parameters**:
 ```json
 {
-  "username": "player123"
 }
 ```
@@ -270,16 +309,28 @@ const result = await client.predict("/search_piclet", {
 {
   "success": true,
   "profile": {
-    "username": "player123",
     "joinedAt": "2024-07-01T10:00:00",
     "discoveries": ["pillow_canonical", "chair_002", "lamp_canonical"],
     "uniqueFinds": 2,
     "totalFinds": 3,
-    "rarityScore": 250
   }
 }
 ```
 ## Object Normalization Rules
 The server normalizes object names for consistent storage:
@@ -340,15 +391,41 @@ Currently no rate limiting implemented. For production:
 ## Authentication
-**Current**: Username-based (no passwords)
-- Users provide username in requests
-- All data is publicly visible
-- No sensitive information stored
-**Future Options**:
-- HuggingFace OAuth integration
-- API keys for verified users
-- Session-based authentication
 ## Data Storage

 ### 2. Create Canonical
 **Endpoint**: `/create_canonical`
+**Purpose**: Register the first discovery of an object with OAuth verification
 **Method**: Gradio function call
 **Input Parameters**:
 {
   "object_name": "pillow",
   "piclet_data": "{ /* JSON string of Piclet instance */ }",
+  "token_or_username": "hf_xxxxxxxxxxxxx"  // OAuth token or username for testing
 }
 ```
   "piclet": {
     "objectName": "pillow",
     "typeId": "pillow_canonical",
+    "discoveredBy": "username123",
+    "discovererSub": "987654321",
+    "discovererUsername": "username123",
+    "discovererName": "Display Name",
+    "discovererPicture": "https://avatars.huggingface.co/...",
     "discoveredAt": "2024-07-26T10:30:00",
     "scanCount": 1,
     "picletData": { /* full Piclet data */ }
 }
 ```
+**Error Responses**:
+```json
+{
+  "success": false,
+  "error": "Invalid OAuth token"
+}
+```
 ```json
 {
   "success": false,
 }
 ```
+**Notes**:
+- If `token_or_username` starts with `hf_`, it's verified as an OAuth token
+- Token verification calls `https://huggingface.co/oauth/userinfo`
+- User profile is created/updated with cached OAuth fields
+- Legacy mode: Plain usernames create `legacy_{username}` profiles
 ### 3. Create Variation
 **Endpoint**: `/create_variation`
+**Purpose**: Add a variation to an existing canonical Piclet with OAuth verification
 **Method**: Gradio function call
 **Input Parameters**:
   "canonical_id": "pillow_canonical",
   "attributes": ["velvet", "blue"],
   "piclet_data": "{ /* JSON string of variation data */ }",
+  "token_or_username": "hf_xxxxxxxxxxxxx",  // OAuth token or username for testing
   "object_name": "pillow"
 }
 ```
     "typeId": "pillow_001",
     "attributes": ["velvet", "blue"],
     "discoveredBy": "player456",
+    "discovererSub": "123456789",
+    "discovererUsername": "player456",
+    "discovererName": "Player Name",
+    "discovererPicture": "https://avatars.huggingface.co/...",
     "discoveredAt": "2024-07-26T11:00:00",
     "scanCount": 1,
     "picletData": { /* variation data */ }
 }
 ```
+**Error Responses**:
+```json
+{
+  "success": false,
+  "error": "Invalid OAuth token"
+}
+```
+```json
+{
+  "success": false,
+  "error": "Canonical Piclet not found for 'pillow'"
+}
+```
+**Notes**:
+- Same OAuth verification as create_canonical
+- User profile updated with variation discovery (+50 rarity points)
+- Variation numbering is automatic (pillow_001, pillow_002, etc.)
 ### 4. Increment Scan Count
 **Endpoint**: `/increment_scan_count`
 **Purpose**: Track how many times a Piclet has been discovered
 **Input Parameters**:
 ```json
 {
+  "sub": "987654321"  // HuggingFace user ID (preferred) or username for legacy
 }
 ```
 {
   "success": true,
   "profile": {
+    "sub": "987654321",
+    "preferred_username": "player123",
+    "name": "Player Display Name",
+    "picture": "https://avatars.huggingface.co/...",
+    "email": "user@example.com",
     "joinedAt": "2024-07-01T10:00:00",
+    "lastSeen": "2024-07-26T12:00:00",
     "discoveries": ["pillow_canonical", "chair_002", "lamp_canonical"],
     "uniqueFinds": 2,
     "totalFinds": 3,
+    "rarityScore": 250,
+    "visibility": "public"
   }
 }
 ```
+**Notes**:
+- Profile keyed by `sub` (stable HF user ID), not username
+- OAuth fields (preferred_username, name, picture) cached and refreshed on each login
+- Legacy profiles have `sub = "legacy_{username}"`
+- Visibility can be "public" or "private" (future feature)
 ## Object Normalization Rules
 The server normalizes object names for consistent storage:
 ## Authentication
+**OAuth Token Verification** (Production Mode):
+- Frontend sends `Authorization: Bearer <hf_token>` headers
+- Server verifies tokens via `https://huggingface.co/oauth/userinfo`
+- Returns user info: `sub` (stable ID), `preferred_username`, `name`, `picture`, `email`
+- User profiles keyed by `sub` (HF user ID) instead of username
+- Usernames can change, but `sub` remains stable
+**Legacy Mode** (Testing Only):
+- For backward compatibility, endpoints accept plain usernames
+- If token doesn't start with `hf_`, treated as username
+- Creates legacy user profile with `sub = "legacy_{username}"`
+**Example OAuth Flow**:
+```javascript
+// Frontend: Get OAuth token from HuggingFace Space
+import { HfInference } from "https://cdn.jsdelivr.net/npm/@huggingface/inference/+esm";
+const auth = await hfAuth.signIn();
+// Make authenticated request
+const response = await fetch('/api/endpoint', {
+  headers: {
+    'Authorization': `Bearer ${auth.accessToken}`,
+    'Content-Type': 'application/json'
+  },
+  body: JSON.stringify({ /* payload */ })
+});
+```
+**Token Verification Process**:
+1. Extract Bearer token from Authorization header
+2. Call `https://huggingface.co/oauth/userinfo` with token
+3. Verify response status 200
+4. Extract user info (sub, preferred_username, name, picture)
+5. Get or create user profile using `sub` as key
+6. Cache profile fields on each request
 ## Data Storage

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import Dict, List, Optional, Tuple
 from huggingface_hub import HfApi, hf_hub_download, list_repo_files
 from pathlib import Path
 import tempfile
 # HuggingFace configuration
 HF_TOKEN = os.getenv("HF_TOKEN")  # Required for writing to dataset
@@ -109,10 +110,18 @@ class PicletDiscoveryService:
             return False
     @staticmethod
-    def load_user_data(username: str) -> dict:
-        """Load user profile from dataset"""
         try:
-            file_path = f"users/{username.lower()}.json"
             local_path = hf_hub_download(
                 repo_id=DATASET_REPO,
                 filename=file_path,
@@ -125,20 +134,38 @@ class PicletDiscoveryService:
                 return json.load(f)
         except:
             # Return default user profile if not found
             return {
-                "username": username,
                 "joinedAt": datetime.now().isoformat(),
                 "discoveries": [],
                 "uniqueFinds": 0,
                 "totalFinds": 0,
-                "rarityScore": 0
             }
     @staticmethod
-    def save_user_data(username: str, data: dict) -> bool:
-        """Save user profile to dataset"""
         try:
-            file_path = f"users/{username.lower()}.json"
             with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
                 json.dump(data, f, indent=2)
@@ -149,7 +176,7 @@ class PicletDiscoveryService:
                 path_in_repo=file_path,
                 repo_id=DATASET_REPO,
                 repo_type=DATASET_TYPE,
-                commit_message=f"Update user profile: {username}"
             )
             os.unlink(temp_path)
@@ -158,6 +185,36 @@ class PicletDiscoveryService:
             print(f"Failed to save user data: {e}")
             return False
     @staticmethod
     def update_global_stats() -> dict:
         """Update and return global statistics"""
@@ -234,19 +291,53 @@ def search_piclet(object_name: str, attributes: List[str]) -> dict:
         "piclet": None
     }
-def create_canonical(object_name: str, piclet_data: str, username: str) -> dict:
     """
     Create a new canonical Piclet
     """
     try:
         piclet_json = json.loads(piclet_data) if isinstance(piclet_data, str) else piclet_data
-        # Create canonical entry
         canonical_data = {
             "canonical": {
                 "objectName": object_name,
                 "typeId": f"{PicletDiscoveryService.normalize_object_name(object_name)}_canonical",
-                "discoveredBy": username,
                 "discoveredAt": datetime.now().isoformat(),
                 "scanCount": 1,
                 "picletData": piclet_json
@@ -257,12 +348,12 @@ def create_canonical(object_name: str, piclet_data: str, username: str) -> dict:
         # Save to dataset
         if PicletDiscoveryService.save_piclet_data(object_name, canonical_data):
             # Update user profile
-            user_data = PicletDiscoveryService.load_user_data(username)
-            user_data["discoveries"].append(canonical_data["canonical"]["typeId"])
-            user_data["uniqueFinds"] += 1
-            user_data["totalFinds"] += 1
-            user_data["rarityScore"] += 100  # Bonus for canonical discovery
-            PicletDiscoveryService.save_user_data(username, user_data)
             return {
                 "success": True,
@@ -280,13 +371,41 @@ def create_canonical(object_name: str, piclet_data: str, username: str) -> dict:
             "error": str(e)
         }
-def create_variation(canonical_id: str, attributes: List[str], piclet_data: str, username: str, object_name: str) -> dict:
     """
-    Create a variation of an existing canonical Piclet
     """
     try:
         piclet_json = json.loads(piclet_data) if isinstance(piclet_data, str) else piclet_data
         # Load existing data
         existing_data = PicletDiscoveryService.load_piclet_data(object_name)
         if not existing_data:
@@ -300,7 +419,11 @@ def create_variation(canonical_id: str, attributes: List[str], piclet_data: str,
         variation = {
             "typeId": variation_id,
             "attributes": attributes,
-            "discoveredBy": username,
             "discoveredAt": datetime.now().isoformat(),
             "scanCount": 1,
             "picletData": piclet_json
@@ -312,11 +435,10 @@ def create_variation(canonical_id: str, attributes: List[str], piclet_data: str,
         # Save updated data
         if PicletDiscoveryService.save_piclet_data(object_name, existing_data):
             # Update user profile
-            user_data = PicletDiscoveryService.load_user_data(username)
-            user_data["discoveries"].append(variation_id)
-            user_data["totalFinds"] += 1
-            user_data["rarityScore"] += 50  # Bonus for variation discovery
-            PicletDiscoveryService.save_user_data(username, user_data)
             return {
                 "success": True,

 from huggingface_hub import HfApi, hf_hub_download, list_repo_files
 from pathlib import Path
 import tempfile
+from auth import verify_hf_token, get_user_from_request_headers
 # HuggingFace configuration
 HF_TOKEN = os.getenv("HF_TOKEN")  # Required for writing to dataset
             return False
     @staticmethod
+    def load_user_data(sub: str) -> dict:
+        """
+        Load user profile from dataset by HF user ID (sub)
+        Args:
+            sub: HuggingFace user ID (stable identifier)
+        Returns:
+            User profile dict or default profile if not found
+        """
         try:
+            file_path = f"users/{sub}.json"
             local_path = hf_hub_download(
                 repo_id=DATASET_REPO,
                 filename=file_path,
                 return json.load(f)
         except:
             # Return default user profile if not found
+            # Will be populated with actual data on first save
             return {
+                "sub": sub,
+                "preferred_username": None,
+                "name": None,
+                "picture": None,
                 "joinedAt": datetime.now().isoformat(),
+                "lastSeen": datetime.now().isoformat(),
                 "discoveries": [],
                 "uniqueFinds": 0,
                 "totalFinds": 0,
+                "rarityScore": 0,
+                "visibility": "public"
             }
     @staticmethod
+    def save_user_data(sub: str, data: dict) -> bool:
+        """
+        Save user profile to dataset by HF user ID (sub)
+        Args:
+            sub: HuggingFace user ID (stable identifier)
+            data: User profile dict
+        Returns:
+            True if successful, False otherwise
+        """
         try:
+            file_path = f"users/{sub}.json"
+            # Update lastSeen timestamp
+            data["lastSeen"] = datetime.now().isoformat()
             with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
                 json.dump(data, f, indent=2)
                 path_in_repo=file_path,
                 repo_id=DATASET_REPO,
                 repo_type=DATASET_TYPE,
+                commit_message=f"Update user profile: {data.get('preferred_username', sub)}"
             )
             os.unlink(temp_path)
             print(f"Failed to save user data: {e}")
             return False
+    @staticmethod
+    def get_or_create_user_profile(user_info: dict) -> dict:
+        """
+        Get existing user profile or create new one from OAuth user_info
+        Refreshes cached profile data on each call
+        Args:
+            user_info: OAuth user info from HF (sub, preferred_username, name, picture)
+        Returns:
+            User profile dict
+        """
+        sub = user_info['sub']
+        # Load existing profile
+        profile = PicletDiscoveryService.load_user_data(sub)
+        # Update cached profile fields from OAuth
+        profile['sub'] = sub
+        profile['preferred_username'] = user_info.get('preferred_username')
+        profile['name'] = user_info.get('name')
+        profile['picture'] = user_info.get('picture')
+        profile['email'] = user_info.get('email')
+        # Set joinedAt only if this is a new profile
+        if 'joinedAt' not in profile or not profile['joinedAt']:
+            profile['joinedAt'] = datetime.now().isoformat()
+        return profile
     @staticmethod
     def update_global_stats() -> dict:
         """Update and return global statistics"""
         "piclet": None
     }
+def create_canonical(object_name: str, piclet_data: str, token_or_username: str) -> dict:
     """
     Create a new canonical Piclet
+    Args:
+        object_name: The normalized object name (e.g., "pillow")
+        piclet_data: JSON string of Piclet instance data
+        token_or_username: Either OAuth token (starts with "hf_") or username for testing
+    Returns:
+        Dict with success status and piclet data
     """
     try:
         piclet_json = json.loads(piclet_data) if isinstance(piclet_data, str) else piclet_data
+        # Determine if this is a token or username
+        user_info = None
+        if token_or_username and token_or_username.startswith('hf_'):
+            # OAuth token - verify it
+            user_info = verify_hf_token(token_or_username)
+            if not user_info:
+                return {
+                    "success": False,
+                    "error": "Invalid OAuth token"
+                }
+        else:
+            # Legacy username mode (for testing)
+            user_info = {
+                "sub": f"legacy_{token_or_username}",
+                "preferred_username": token_or_username,
+                "name": token_or_username,
+                "picture": None
+            }
+        # Get or create user profile
+        user_profile = PicletDiscoveryService.get_or_create_user_profile(user_info)
+        # Create canonical entry with full discoverer info
         canonical_data = {
             "canonical": {
                 "objectName": object_name,
                 "typeId": f"{PicletDiscoveryService.normalize_object_name(object_name)}_canonical",
+                "discoveredBy": user_info['preferred_username'],
+                "discovererSub": user_info['sub'],
+                "discovererUsername": user_info['preferred_username'],
+                "discovererName": user_info.get('name'),
+                "discovererPicture": user_info.get('picture'),
                 "discoveredAt": datetime.now().isoformat(),
                 "scanCount": 1,
                 "picletData": piclet_json
         # Save to dataset
         if PicletDiscoveryService.save_piclet_data(object_name, canonical_data):
             # Update user profile
+            user_profile["discoveries"].append(canonical_data["canonical"]["typeId"])
+            user_profile["uniqueFinds"] += 1
+            user_profile["totalFinds"] += 1
+            user_profile["rarityScore"] += 100  # Bonus for canonical discovery
+            PicletDiscoveryService.save_user_data(user_info['sub'], user_profile)
             return {
                 "success": True,
             "error": str(e)
         }
+def create_variation(canonical_id: str, attributes: List[str], piclet_data: str, token_or_username: str, object_name: str) -> dict:
     """
+    Create a variation of an existing canonical Piclet with OAuth verification
+    Args:
+        canonical_id: ID of the canonical Piclet
+        attributes: List of variation attributes
+        piclet_data: JSON data for the Piclet
+        token_or_username: Either OAuth token (starts with "hf_") or username for testing
+        object_name: Normalized object name
+    Returns:
+        Success/error dict with variation data
     """
     try:
         piclet_json = json.loads(piclet_data) if isinstance(piclet_data, str) else piclet_data
+        # Verify token or use legacy mode
+        user_info = None
+        if token_or_username and token_or_username.startswith('hf_'):
+            user_info = verify_hf_token(token_or_username)
+            if not user_info:
+                return {"success": False, "error": "Invalid OAuth token"}
+        else:
+            # Legacy mode for testing
+            user_info = {
+                "sub": f"legacy_{token_or_username}",
+                "preferred_username": token_or_username,
+                "name": token_or_username,
+                "picture": None
+            }
+        # Get or create user profile
+        user_profile = PicletDiscoveryService.get_or_create_user_profile(user_info)
         # Load existing data
         existing_data = PicletDiscoveryService.load_piclet_data(object_name)
         if not existing_data:
         variation = {
             "typeId": variation_id,
             "attributes": attributes,
+            "discoveredBy": user_info['preferred_username'],
+            "discovererSub": user_info['sub'],
+            "discovererUsername": user_info['preferred_username'],
+            "discovererName": user_info.get('name'),
+            "discovererPicture": user_info.get('picture'),
             "discoveredAt": datetime.now().isoformat(),
             "scanCount": 1,
             "picletData": piclet_json
         # Save updated data
         if PicletDiscoveryService.save_piclet_data(object_name, existing_data):
             # Update user profile
+            user_profile["discoveries"].append(variation_id)
+            user_profile["totalFinds"] += 1
+            user_profile["rarityScore"] += 50  # Bonus for variation discovery
+            PicletDiscoveryService.save_user_data(user_info['sub'], user_profile)
             return {
                 "success": True,

auth.py ADDED Viewed

	@@ -0,0 +1,95 @@

+"""
+OAuth authentication utilities for HuggingFace token verification
+"""
+import requests
+from typing import Optional, Dict
+# HuggingFace OAuth userinfo endpoint
+HF_USERINFO_URL = 'https://huggingface.co/oauth/userinfo'
+def verify_hf_token(token: str) -> Optional[Dict]:
+    """
+    Verify HF OAuth token and return user info
+    Args:
+        token: The HuggingFace OAuth access token
+    Returns:
+        User info dict with fields: sub, preferred_username, name, picture, email
+        None if token is invalid
+    Example:
+        {
+            "sub": "12345678",
+            "preferred_username": "username",
+            "name": "Display Name",
+            "picture": "https://avatars.huggingface.co/...",
+            "email": "user@example.com"
+        }
+    """
+    try:
+        response = requests.get(
+            HF_USERINFO_URL,
+            headers={'Authorization': f'Bearer {token}'},
+            timeout=10
+        )
+        if response.status_code == 200:
+            return response.json()
+        else:
+            print(f"Token verification failed with status {response.status_code}")
+            return None
+    except requests.RequestException as e:
+        print(f"Token verification error: {e}")
+        return None
+def extract_token_from_auth_header(auth_header: str) -> Optional[str]:
+    """
+    Extract Bearer token from Authorization header
+    Args:
+        auth_header: The Authorization header value (e.g., "Bearer abc123...")
+    Returns:
+        The token string, or None if invalid format
+    """
+    if not auth_header:
+        return None
+    if not auth_header.startswith('Bearer '):
+        return None
+    try:
+        return auth_header.split(' ', 1)[1]
+    except IndexError:
+        return None
+def get_user_from_request_headers(headers: Dict[str, str]) -> Optional[Dict]:
+    """
+    Extract and verify user info from request headers
+    Args:
+        headers: Dict of request headers (case-insensitive keys)
+    Returns:
+        User info dict if valid token, None otherwise
+    """
+    # Try to get Authorization header (case-insensitive)
+    auth_header = None
+    for key, value in headers.items():
+        if key.lower() == 'authorization':
+            auth_header = value
+            break
+    if not auth_header:
+        return None
+    token = extract_token_from_auth_header(auth_header)
+    if not token:
+        return None
+    return verify_hf_token(token)

init_dataset.py ADDED Viewed

	@@ -0,0 +1,243 @@

+"""
+Initialize HuggingFace dataset structure for Piclets Discovery game
+This script sets up the required directory structure and initial files
+in the Fraser/piclets dataset repository.
+Usage:
+    python init_dataset.py
+Requires:
+    - HF_TOKEN environment variable or set in HuggingFace Space secrets
+    - Write access to Fraser/piclets dataset
+"""
+import json
+import os
+from datetime import datetime
+from huggingface_hub import HfApi, hf_hub_download
+from pathlib import Path
+# Configuration
+DATASET_REPO = os.getenv("DATASET_REPO", "Fraser/piclets")
+HF_TOKEN = os.getenv("HF_TOKEN")
+def init_metadata_files():
+    """Initialize global metadata files"""
+    # Initial stats
+    stats = {
+        "totalPiclets": 0,
+        "totalVariations": 0,
+        "totalScans": 0,
+        "totalUsers": 0,
+        "lastUpdated": datetime.now().isoformat()
+    }
+    # Initial leaderboard (empty)
+    leaderboard = {
+        "topDiscoverers": [],
+        "recentActivity": [],
+        "lastUpdated": datetime.now().isoformat()
+    }
+    return stats, leaderboard
+def create_example_files():
+    """Create example user and piclet files for testing"""
+    # Example user profile
+    example_user = {
+        "sub": "example_123456",
+        "preferred_username": "example_user",
+        "name": "Example User",
+        "picture": None,
+        "joinedAt": datetime.now().isoformat(),
+        "lastSeen": datetime.now().isoformat(),
+        "discoveries": [],
+        "uniqueFinds": 0,
+        "totalFinds": 0,
+        "rarityScore": 0,
+        "visibility": "public"
+    }
+    # Example piclet (empty canonical, ready for discoveries)
+    example_piclet = {
+        "canonical": None,
+        "variations": [],
+        "metadata": {
+            "created": datetime.now().isoformat(),
+            "lastUpdated": datetime.now().isoformat()
+        }
+    }
+    return example_user, example_piclet
+def upload_initial_structure():
+    """Upload initial dataset structure to HuggingFace"""
+    if not HF_TOKEN:
+        print("ERROR: HF_TOKEN environment variable not set")
+        print("Please set HF_TOKEN with write access to the dataset")
+        return False
+    print(f"Initializing dataset: {DATASET_REPO}")
+    api = HfApi()
+    # Create temporary directory for files
+    temp_dir = Path("temp_dataset_init")
+    temp_dir.mkdir(exist_ok=True)
+    try:
+        # 1. Create metadata directory and files
+        metadata_dir = temp_dir / "metadata"
+        metadata_dir.mkdir(exist_ok=True)
+        stats, leaderboard = init_metadata_files()
+        with open(metadata_dir / "stats.json", "w") as f:
+            json.dump(stats, f, indent=2)
+        print("✓ Created metadata/stats.json")
+        with open(metadata_dir / "leaderboard.json", "w") as f:
+            json.dump(leaderboard, f, indent=2)
+        print("✓ Created metadata/leaderboard.json")
+        # 2. Create users directory with example
+        users_dir = temp_dir / "users"
+        users_dir.mkdir(exist_ok=True)
+        example_user, _ = create_example_files()
+        with open(users_dir / ".gitkeep", "w") as f:
+            f.write("# User profiles stored here\n")
+        print("✓ Created users/ directory")
+        # 3. Create piclets directory with example
+        piclets_dir = temp_dir / "piclets"
+        piclets_dir.mkdir(exist_ok=True)
+        with open(piclets_dir / ".gitkeep", "w") as f:
+            f.write("# Canonical piclets and variations stored here\n")
+        print("✓ Created piclets/ directory")
+        # 4. Create README
+        readme_content = """# Piclets Discovery Dataset
+This dataset stores the canonical Piclets, variations, and user profiles for the Piclets Discovery game.
+## Structure
+```
+metadata/
+  stats.json          # Global statistics
+  leaderboard.json    # Top discoverers and recent activity
+users/
+  {sub}.json          # User profiles keyed by HuggingFace user ID
+piclets/
+  {normalized_name}.json  # Canonical piclet + variations
+```
+## Authentication
+All endpoints require HuggingFace OAuth tokens:
+- Frontend sends `Authorization: Bearer <token>` headers
+- Server verifies via `https://huggingface.co/oauth/userinfo`
+- User profiles use stable `sub` field as primary key
+## API
+Server endpoint: `Fraser/piclets-server` (HuggingFace Space)
+See server repository for full API documentation.
+"""
+        with open(temp_dir / "README.md", "w") as f:
+            f.write(readme_content)
+        print("✓ Created README.md")
+        # 5. Upload all files to dataset
+        print(f"\nUploading to {DATASET_REPO}...")
+        api.upload_folder(
+            folder_path=str(temp_dir),
+            repo_id=DATASET_REPO,
+            repo_type="dataset",
+            token=HF_TOKEN,
+            commit_message="Initialize dataset structure for Piclets Discovery"
+        )
+        print(f"\n✓ Dataset initialized successfully!")
+        print(f"View at: https://huggingface.co/datasets/{DATASET_REPO}")
+        return True
+    except Exception as e:
+        print(f"\n✗ Error initializing dataset: {e}")
+        return False
+    finally:
+        # Cleanup temp directory
+        import shutil
+        if temp_dir.exists():
+            shutil.rmtree(temp_dir)
+def verify_dataset_structure():
+    """Verify that dataset structure exists"""
+    if not HF_TOKEN:
+        print("ERROR: HF_TOKEN not set")
+        return False
+    print(f"Verifying dataset structure: {DATASET_REPO}")
+    try:
+        # Try to download metadata files
+        stats_path = hf_hub_download(
+            repo_id=DATASET_REPO,
+            filename="metadata/stats.json",
+            repo_type="dataset",
+            token=HF_TOKEN
+        )
+        with open(stats_path) as f:
+            stats = json.load(f)
+        print(f"✓ Dataset exists with {stats['totalPiclets']} piclets")
+        print(f"✓ Structure verified")
+        return True
+    except Exception as e:
+        print(f"✗ Dataset not initialized or error: {e}")
+        return False
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1 and sys.argv[1] == "--verify":
+        # Verify mode
+        if verify_dataset_structure():
+            sys.exit(0)
+        else:
+            sys.exit(1)
+    else:
+        # Initialize mode
+        print("=" * 60)
+        print("Piclets Discovery Dataset Initialization")
+        print("=" * 60)
+        print()
+        if upload_initial_structure():
+            print("\nNext steps:")
+            print("1. Verify at https://huggingface.co/datasets/Fraser/piclets")
+            print("2. Test with: python init_dataset.py --verify")
+            print("3. Deploy piclets-server to HuggingFace Space")
+            sys.exit(0)
+        else:
+            sys.exit(1)

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 gradio==5.38.2
 Pillow>=9.0.0
 huggingface_hub>=0.20.0
-datasets>=2.15.0

 gradio==5.38.2
 Pillow>=9.0.0
 huggingface_hub>=0.20.0
+datasets>=2.15.0
+requests>=2.31.0