File size: 6,723 Bytes
f9201f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
"""
Initialize HuggingFace dataset structure for Piclets Discovery game

This script sets up the required directory structure and initial files
in the Fraser/piclets dataset repository.

Usage:
    python init_dataset.py

Requires:
    - HF_TOKEN environment variable or set in HuggingFace Space secrets
    - Write access to Fraser/piclets dataset
"""

import json
import os
from datetime import datetime
from huggingface_hub import HfApi, hf_hub_download
from pathlib import Path

# Configuration
DATASET_REPO = os.getenv("DATASET_REPO", "Fraser/piclets")
HF_TOKEN = os.getenv("HF_TOKEN")

def init_metadata_files():
    """Initialize global metadata files"""

    # Initial stats
    stats = {
        "totalPiclets": 0,
        "totalVariations": 0,
        "totalScans": 0,
        "totalUsers": 0,
        "lastUpdated": datetime.now().isoformat()
    }

    # Initial leaderboard (empty)
    leaderboard = {
        "topDiscoverers": [],
        "recentActivity": [],
        "lastUpdated": datetime.now().isoformat()
    }

    return stats, leaderboard


def create_example_files():
    """Create example user and piclet files for testing"""

    # Example user profile
    example_user = {
        "sub": "example_123456",
        "preferred_username": "example_user",
        "name": "Example User",
        "picture": None,
        "joinedAt": datetime.now().isoformat(),
        "lastSeen": datetime.now().isoformat(),
        "discoveries": [],
        "uniqueFinds": 0,
        "totalFinds": 0,
        "rarityScore": 0,
        "visibility": "public"
    }

    # Example piclet (empty canonical, ready for discoveries)
    example_piclet = {
        "canonical": None,
        "variations": [],
        "metadata": {
            "created": datetime.now().isoformat(),
            "lastUpdated": datetime.now().isoformat()
        }
    }

    return example_user, example_piclet


def upload_initial_structure():
    """Upload initial dataset structure to HuggingFace"""

    if not HF_TOKEN:
        print("ERROR: HF_TOKEN environment variable not set")
        print("Please set HF_TOKEN with write access to the dataset")
        return False

    print(f"Initializing dataset: {DATASET_REPO}")

    api = HfApi()

    # Create temporary directory for files
    temp_dir = Path("temp_dataset_init")
    temp_dir.mkdir(exist_ok=True)

    try:
        # 1. Create metadata directory and files
        metadata_dir = temp_dir / "metadata"
        metadata_dir.mkdir(exist_ok=True)

        stats, leaderboard = init_metadata_files()

        with open(metadata_dir / "stats.json", "w") as f:
            json.dump(stats, f, indent=2)
        print("βœ“ Created metadata/stats.json")

        with open(metadata_dir / "leaderboard.json", "w") as f:
            json.dump(leaderboard, f, indent=2)
        print("βœ“ Created metadata/leaderboard.json")

        # 2. Create users directory with example
        users_dir = temp_dir / "users"
        users_dir.mkdir(exist_ok=True)

        example_user, _ = create_example_files()
        with open(users_dir / ".gitkeep", "w") as f:
            f.write("# User profiles stored here\n")
        print("βœ“ Created users/ directory")

        # 3. Create piclets directory with example
        piclets_dir = temp_dir / "piclets"
        piclets_dir.mkdir(exist_ok=True)

        with open(piclets_dir / ".gitkeep", "w") as f:
            f.write("# Canonical piclets and variations stored here\n")
        print("βœ“ Created piclets/ directory")

        # 4. Create README
        readme_content = """# Piclets Discovery Dataset

This dataset stores the canonical Piclets, variations, and user profiles for the Piclets Discovery game.

## Structure

```
metadata/
  stats.json          # Global statistics
  leaderboard.json    # Top discoverers and recent activity

users/
  {sub}.json          # User profiles keyed by HuggingFace user ID

piclets/
  {normalized_name}.json  # Canonical piclet + variations
```

## Authentication

All endpoints require HuggingFace OAuth tokens:
- Frontend sends `Authorization: Bearer <token>` headers
- Server verifies via `https://huggingface.co/oauth/userinfo`
- User profiles use stable `sub` field as primary key

## API

Server endpoint: `Fraser/piclets-server` (HuggingFace Space)

See server repository for full API documentation.
"""

        with open(temp_dir / "README.md", "w") as f:
            f.write(readme_content)
        print("βœ“ Created README.md")

        # 5. Upload all files to dataset
        print(f"\nUploading to {DATASET_REPO}...")

        api.upload_folder(
            folder_path=str(temp_dir),
            repo_id=DATASET_REPO,
            repo_type="dataset",
            token=HF_TOKEN,
            commit_message="Initialize dataset structure for Piclets Discovery"
        )

        print(f"\nβœ“ Dataset initialized successfully!")
        print(f"View at: https://huggingface.co/datasets/{DATASET_REPO}")

        return True

    except Exception as e:
        print(f"\nβœ— Error initializing dataset: {e}")
        return False

    finally:
        # Cleanup temp directory
        import shutil
        if temp_dir.exists():
            shutil.rmtree(temp_dir)


def verify_dataset_structure():
    """Verify that dataset structure exists"""

    if not HF_TOKEN:
        print("ERROR: HF_TOKEN not set")
        return False

    print(f"Verifying dataset structure: {DATASET_REPO}")

    try:
        # Try to download metadata files
        stats_path = hf_hub_download(
            repo_id=DATASET_REPO,
            filename="metadata/stats.json",
            repo_type="dataset",
            token=HF_TOKEN
        )

        with open(stats_path) as f:
            stats = json.load(f)

        print(f"βœ“ Dataset exists with {stats['totalPiclets']} piclets")
        print(f"βœ“ Structure verified")
        return True

    except Exception as e:
        print(f"βœ— Dataset not initialized or error: {e}")
        return False


if __name__ == "__main__":
    import sys

    if len(sys.argv) > 1 and sys.argv[1] == "--verify":
        # Verify mode
        if verify_dataset_structure():
            sys.exit(0)
        else:
            sys.exit(1)
    else:
        # Initialize mode
        print("=" * 60)
        print("Piclets Discovery Dataset Initialization")
        print("=" * 60)
        print()

        if upload_initial_structure():
            print("\nNext steps:")
            print("1. Verify at https://huggingface.co/datasets/Fraser/piclets")
            print("2. Test with: python init_dataset.py --verify")
            print("3. Deploy piclets-server to HuggingFace Space")
            sys.exit(0)
        else:
            sys.exit(1)