Fraser commited on
Commit
f9201f6
Β·
1 Parent(s): cc4ae68
Files changed (5) hide show
  1. API_DOCUMENTATION.md +94 -17
  2. app.py +148 -26
  3. auth.py +95 -0
  4. init_dataset.py +243 -0
  5. requirements.txt +2 -1
API_DOCUMENTATION.md CHANGED
@@ -91,7 +91,7 @@ const result = await client.predict("/search_piclet", {
91
 
92
  ### 2. Create Canonical
93
  **Endpoint**: `/create_canonical`
94
- **Purpose**: Register the first discovery of an object
95
  **Method**: Gradio function call
96
 
97
  **Input Parameters**:
@@ -99,7 +99,7 @@ const result = await client.predict("/search_piclet", {
99
  {
100
  "object_name": "pillow",
101
  "piclet_data": "{ /* JSON string of Piclet instance */ }",
102
- "username": "discoverer123"
103
  }
104
  ```
105
 
@@ -111,7 +111,11 @@ const result = await client.predict("/search_piclet", {
111
  "piclet": {
112
  "objectName": "pillow",
113
  "typeId": "pillow_canonical",
114
- "discoveredBy": "discoverer123",
 
 
 
 
115
  "discoveredAt": "2024-07-26T10:30:00",
116
  "scanCount": 1,
117
  "picletData": { /* full Piclet data */ }
@@ -119,7 +123,13 @@ const result = await client.predict("/search_piclet", {
119
  }
120
  ```
121
 
122
- **Error Response**:
 
 
 
 
 
 
123
  ```json
124
  {
125
  "success": false,
@@ -127,9 +137,15 @@ const result = await client.predict("/search_piclet", {
127
  }
128
  ```
129
 
 
 
 
 
 
 
130
  ### 3. Create Variation
131
  **Endpoint**: `/create_variation`
132
- **Purpose**: Add a variation to an existing canonical Piclet
133
  **Method**: Gradio function call
134
 
135
  **Input Parameters**:
@@ -138,7 +154,7 @@ const result = await client.predict("/search_piclet", {
138
  "canonical_id": "pillow_canonical",
139
  "attributes": ["velvet", "blue"],
140
  "piclet_data": "{ /* JSON string of variation data */ }",
141
- "username": "player456",
142
  "object_name": "pillow"
143
  }
144
  ```
@@ -152,6 +168,10 @@ const result = await client.predict("/search_piclet", {
152
  "typeId": "pillow_001",
153
  "attributes": ["velvet", "blue"],
154
  "discoveredBy": "player456",
 
 
 
 
155
  "discoveredAt": "2024-07-26T11:00:00",
156
  "scanCount": 1,
157
  "picletData": { /* variation data */ }
@@ -159,6 +179,25 @@ const result = await client.predict("/search_piclet", {
159
  }
160
  ```
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  ### 4. Increment Scan Count
163
  **Endpoint**: `/increment_scan_count`
164
  **Purpose**: Track how many times a Piclet has been discovered
@@ -261,7 +300,7 @@ const result = await client.predict("/search_piclet", {
261
  **Input Parameters**:
262
  ```json
263
  {
264
- "username": "player123"
265
  }
266
  ```
267
 
@@ -270,16 +309,28 @@ const result = await client.predict("/search_piclet", {
270
  {
271
  "success": true,
272
  "profile": {
273
- "username": "player123",
 
 
 
 
274
  "joinedAt": "2024-07-01T10:00:00",
 
275
  "discoveries": ["pillow_canonical", "chair_002", "lamp_canonical"],
276
  "uniqueFinds": 2,
277
  "totalFinds": 3,
278
- "rarityScore": 250
 
279
  }
280
  }
281
  ```
282
 
 
 
 
 
 
 
283
  ## Object Normalization Rules
284
 
285
  The server normalizes object names for consistent storage:
@@ -340,15 +391,41 @@ Currently no rate limiting implemented. For production:
340
 
341
  ## Authentication
342
 
343
- **Current**: Username-based (no passwords)
344
- - Users provide username in requests
345
- - All data is publicly visible
346
- - No sensitive information stored
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
 
348
- **Future Options**:
349
- - HuggingFace OAuth integration
350
- - API keys for verified users
351
- - Session-based authentication
 
 
 
352
 
353
  ## Data Storage
354
 
 
91
 
92
  ### 2. Create Canonical
93
  **Endpoint**: `/create_canonical`
94
+ **Purpose**: Register the first discovery of an object with OAuth verification
95
  **Method**: Gradio function call
96
 
97
  **Input Parameters**:
 
99
  {
100
  "object_name": "pillow",
101
  "piclet_data": "{ /* JSON string of Piclet instance */ }",
102
+ "token_or_username": "hf_xxxxxxxxxxxxx" // OAuth token or username for testing
103
  }
104
  ```
105
 
 
111
  "piclet": {
112
  "objectName": "pillow",
113
  "typeId": "pillow_canonical",
114
+ "discoveredBy": "username123",
115
+ "discovererSub": "987654321",
116
+ "discovererUsername": "username123",
117
+ "discovererName": "Display Name",
118
+ "discovererPicture": "https://avatars.huggingface.co/...",
119
  "discoveredAt": "2024-07-26T10:30:00",
120
  "scanCount": 1,
121
  "picletData": { /* full Piclet data */ }
 
123
  }
124
  ```
125
 
126
+ **Error Responses**:
127
+ ```json
128
+ {
129
+ "success": false,
130
+ "error": "Invalid OAuth token"
131
+ }
132
+ ```
133
  ```json
134
  {
135
  "success": false,
 
137
  }
138
  ```
139
 
140
+ **Notes**:
141
+ - If `token_or_username` starts with `hf_`, it's verified as an OAuth token
142
+ - Token verification calls `https://huggingface.co/oauth/userinfo`
143
+ - User profile is created/updated with cached OAuth fields
144
+ - Legacy mode: Plain usernames create `legacy_{username}` profiles
145
+
146
  ### 3. Create Variation
147
  **Endpoint**: `/create_variation`
148
+ **Purpose**: Add a variation to an existing canonical Piclet with OAuth verification
149
  **Method**: Gradio function call
150
 
151
  **Input Parameters**:
 
154
  "canonical_id": "pillow_canonical",
155
  "attributes": ["velvet", "blue"],
156
  "piclet_data": "{ /* JSON string of variation data */ }",
157
+ "token_or_username": "hf_xxxxxxxxxxxxx", // OAuth token or username for testing
158
  "object_name": "pillow"
159
  }
160
  ```
 
168
  "typeId": "pillow_001",
169
  "attributes": ["velvet", "blue"],
170
  "discoveredBy": "player456",
171
+ "discovererSub": "123456789",
172
+ "discovererUsername": "player456",
173
+ "discovererName": "Player Name",
174
+ "discovererPicture": "https://avatars.huggingface.co/...",
175
  "discoveredAt": "2024-07-26T11:00:00",
176
  "scanCount": 1,
177
  "picletData": { /* variation data */ }
 
179
  }
180
  ```
181
 
182
+ **Error Responses**:
183
+ ```json
184
+ {
185
+ "success": false,
186
+ "error": "Invalid OAuth token"
187
+ }
188
+ ```
189
+ ```json
190
+ {
191
+ "success": false,
192
+ "error": "Canonical Piclet not found for 'pillow'"
193
+ }
194
+ ```
195
+
196
+ **Notes**:
197
+ - Same OAuth verification as create_canonical
198
+ - User profile updated with variation discovery (+50 rarity points)
199
+ - Variation numbering is automatic (pillow_001, pillow_002, etc.)
200
+
201
  ### 4. Increment Scan Count
202
  **Endpoint**: `/increment_scan_count`
203
  **Purpose**: Track how many times a Piclet has been discovered
 
300
  **Input Parameters**:
301
  ```json
302
  {
303
+ "sub": "987654321" // HuggingFace user ID (preferred) or username for legacy
304
  }
305
  ```
306
 
 
309
  {
310
  "success": true,
311
  "profile": {
312
+ "sub": "987654321",
313
+ "preferred_username": "player123",
314
+ "name": "Player Display Name",
315
+ "picture": "https://avatars.huggingface.co/...",
316
+ "email": "user@example.com",
317
  "joinedAt": "2024-07-01T10:00:00",
318
+ "lastSeen": "2024-07-26T12:00:00",
319
  "discoveries": ["pillow_canonical", "chair_002", "lamp_canonical"],
320
  "uniqueFinds": 2,
321
  "totalFinds": 3,
322
+ "rarityScore": 250,
323
+ "visibility": "public"
324
  }
325
  }
326
  ```
327
 
328
+ **Notes**:
329
+ - Profile keyed by `sub` (stable HF user ID), not username
330
+ - OAuth fields (preferred_username, name, picture) cached and refreshed on each login
331
+ - Legacy profiles have `sub = "legacy_{username}"`
332
+ - Visibility can be "public" or "private" (future feature)
333
+
334
  ## Object Normalization Rules
335
 
336
  The server normalizes object names for consistent storage:
 
391
 
392
  ## Authentication
393
 
394
+ **OAuth Token Verification** (Production Mode):
395
+ - Frontend sends `Authorization: Bearer <hf_token>` headers
396
+ - Server verifies tokens via `https://huggingface.co/oauth/userinfo`
397
+ - Returns user info: `sub` (stable ID), `preferred_username`, `name`, `picture`, `email`
398
+ - User profiles keyed by `sub` (HF user ID) instead of username
399
+ - Usernames can change, but `sub` remains stable
400
+
401
+ **Legacy Mode** (Testing Only):
402
+ - For backward compatibility, endpoints accept plain usernames
403
+ - If token doesn't start with `hf_`, treated as username
404
+ - Creates legacy user profile with `sub = "legacy_{username}"`
405
+
406
+ **Example OAuth Flow**:
407
+ ```javascript
408
+ // Frontend: Get OAuth token from HuggingFace Space
409
+ import { HfInference } from "https://cdn.jsdelivr.net/npm/@huggingface/inference/+esm";
410
+ const auth = await hfAuth.signIn();
411
+
412
+ // Make authenticated request
413
+ const response = await fetch('/api/endpoint', {
414
+ headers: {
415
+ 'Authorization': `Bearer ${auth.accessToken}`,
416
+ 'Content-Type': 'application/json'
417
+ },
418
+ body: JSON.stringify({ /* payload */ })
419
+ });
420
+ ```
421
 
422
+ **Token Verification Process**:
423
+ 1. Extract Bearer token from Authorization header
424
+ 2. Call `https://huggingface.co/oauth/userinfo` with token
425
+ 3. Verify response status 200
426
+ 4. Extract user info (sub, preferred_username, name, picture)
427
+ 5. Get or create user profile using `sub` as key
428
+ 6. Cache profile fields on each request
429
 
430
  ## Data Storage
431
 
app.py CHANGED
@@ -7,6 +7,7 @@ from typing import Dict, List, Optional, Tuple
7
  from huggingface_hub import HfApi, hf_hub_download, list_repo_files
8
  from pathlib import Path
9
  import tempfile
 
10
 
11
  # HuggingFace configuration
12
  HF_TOKEN = os.getenv("HF_TOKEN") # Required for writing to dataset
@@ -109,10 +110,18 @@ class PicletDiscoveryService:
109
  return False
110
 
111
  @staticmethod
112
- def load_user_data(username: str) -> dict:
113
- """Load user profile from dataset"""
 
 
 
 
 
 
 
 
114
  try:
115
- file_path = f"users/{username.lower()}.json"
116
  local_path = hf_hub_download(
117
  repo_id=DATASET_REPO,
118
  filename=file_path,
@@ -125,20 +134,38 @@ class PicletDiscoveryService:
125
  return json.load(f)
126
  except:
127
  # Return default user profile if not found
 
128
  return {
129
- "username": username,
 
 
 
130
  "joinedAt": datetime.now().isoformat(),
 
131
  "discoveries": [],
132
  "uniqueFinds": 0,
133
  "totalFinds": 0,
134
- "rarityScore": 0
 
135
  }
136
 
137
  @staticmethod
138
- def save_user_data(username: str, data: dict) -> bool:
139
- """Save user profile to dataset"""
 
 
 
 
 
 
 
 
 
140
  try:
141
- file_path = f"users/{username.lower()}.json"
 
 
 
142
 
143
  with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
144
  json.dump(data, f, indent=2)
@@ -149,7 +176,7 @@ class PicletDiscoveryService:
149
  path_in_repo=file_path,
150
  repo_id=DATASET_REPO,
151
  repo_type=DATASET_TYPE,
152
- commit_message=f"Update user profile: {username}"
153
  )
154
 
155
  os.unlink(temp_path)
@@ -158,6 +185,36 @@ class PicletDiscoveryService:
158
  print(f"Failed to save user data: {e}")
159
  return False
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  @staticmethod
162
  def update_global_stats() -> dict:
163
  """Update and return global statistics"""
@@ -234,19 +291,53 @@ def search_piclet(object_name: str, attributes: List[str]) -> dict:
234
  "piclet": None
235
  }
236
 
237
- def create_canonical(object_name: str, piclet_data: str, username: str) -> dict:
238
  """
239
  Create a new canonical Piclet
 
 
 
 
 
 
 
 
240
  """
241
  try:
242
  piclet_json = json.loads(piclet_data) if isinstance(piclet_data, str) else piclet_data
243
 
244
- # Create canonical entry
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  canonical_data = {
246
  "canonical": {
247
  "objectName": object_name,
248
  "typeId": f"{PicletDiscoveryService.normalize_object_name(object_name)}_canonical",
249
- "discoveredBy": username,
 
 
 
 
250
  "discoveredAt": datetime.now().isoformat(),
251
  "scanCount": 1,
252
  "picletData": piclet_json
@@ -257,12 +348,12 @@ def create_canonical(object_name: str, piclet_data: str, username: str) -> dict:
257
  # Save to dataset
258
  if PicletDiscoveryService.save_piclet_data(object_name, canonical_data):
259
  # Update user profile
260
- user_data = PicletDiscoveryService.load_user_data(username)
261
- user_data["discoveries"].append(canonical_data["canonical"]["typeId"])
262
- user_data["uniqueFinds"] += 1
263
- user_data["totalFinds"] += 1
264
- user_data["rarityScore"] += 100 # Bonus for canonical discovery
265
- PicletDiscoveryService.save_user_data(username, user_data)
266
 
267
  return {
268
  "success": True,
@@ -280,13 +371,41 @@ def create_canonical(object_name: str, piclet_data: str, username: str) -> dict:
280
  "error": str(e)
281
  }
282
 
283
- def create_variation(canonical_id: str, attributes: List[str], piclet_data: str, username: str, object_name: str) -> dict:
284
  """
285
- Create a variation of an existing canonical Piclet
 
 
 
 
 
 
 
 
 
 
286
  """
287
  try:
288
  piclet_json = json.loads(piclet_data) if isinstance(piclet_data, str) else piclet_data
289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  # Load existing data
291
  existing_data = PicletDiscoveryService.load_piclet_data(object_name)
292
  if not existing_data:
@@ -300,7 +419,11 @@ def create_variation(canonical_id: str, attributes: List[str], piclet_data: str,
300
  variation = {
301
  "typeId": variation_id,
302
  "attributes": attributes,
303
- "discoveredBy": username,
 
 
 
 
304
  "discoveredAt": datetime.now().isoformat(),
305
  "scanCount": 1,
306
  "picletData": piclet_json
@@ -312,11 +435,10 @@ def create_variation(canonical_id: str, attributes: List[str], piclet_data: str,
312
  # Save updated data
313
  if PicletDiscoveryService.save_piclet_data(object_name, existing_data):
314
  # Update user profile
315
- user_data = PicletDiscoveryService.load_user_data(username)
316
- user_data["discoveries"].append(variation_id)
317
- user_data["totalFinds"] += 1
318
- user_data["rarityScore"] += 50 # Bonus for variation discovery
319
- PicletDiscoveryService.save_user_data(username, user_data)
320
 
321
  return {
322
  "success": True,
 
7
  from huggingface_hub import HfApi, hf_hub_download, list_repo_files
8
  from pathlib import Path
9
  import tempfile
10
+ from auth import verify_hf_token, get_user_from_request_headers
11
 
12
  # HuggingFace configuration
13
  HF_TOKEN = os.getenv("HF_TOKEN") # Required for writing to dataset
 
110
  return False
111
 
112
  @staticmethod
113
+ def load_user_data(sub: str) -> dict:
114
+ """
115
+ Load user profile from dataset by HF user ID (sub)
116
+
117
+ Args:
118
+ sub: HuggingFace user ID (stable identifier)
119
+
120
+ Returns:
121
+ User profile dict or default profile if not found
122
+ """
123
  try:
124
+ file_path = f"users/{sub}.json"
125
  local_path = hf_hub_download(
126
  repo_id=DATASET_REPO,
127
  filename=file_path,
 
134
  return json.load(f)
135
  except:
136
  # Return default user profile if not found
137
+ # Will be populated with actual data on first save
138
  return {
139
+ "sub": sub,
140
+ "preferred_username": None,
141
+ "name": None,
142
+ "picture": None,
143
  "joinedAt": datetime.now().isoformat(),
144
+ "lastSeen": datetime.now().isoformat(),
145
  "discoveries": [],
146
  "uniqueFinds": 0,
147
  "totalFinds": 0,
148
+ "rarityScore": 0,
149
+ "visibility": "public"
150
  }
151
 
152
  @staticmethod
153
+ def save_user_data(sub: str, data: dict) -> bool:
154
+ """
155
+ Save user profile to dataset by HF user ID (sub)
156
+
157
+ Args:
158
+ sub: HuggingFace user ID (stable identifier)
159
+ data: User profile dict
160
+
161
+ Returns:
162
+ True if successful, False otherwise
163
+ """
164
  try:
165
+ file_path = f"users/{sub}.json"
166
+
167
+ # Update lastSeen timestamp
168
+ data["lastSeen"] = datetime.now().isoformat()
169
 
170
  with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
171
  json.dump(data, f, indent=2)
 
176
  path_in_repo=file_path,
177
  repo_id=DATASET_REPO,
178
  repo_type=DATASET_TYPE,
179
+ commit_message=f"Update user profile: {data.get('preferred_username', sub)}"
180
  )
181
 
182
  os.unlink(temp_path)
 
185
  print(f"Failed to save user data: {e}")
186
  return False
187
 
188
+ @staticmethod
189
+ def get_or_create_user_profile(user_info: dict) -> dict:
190
+ """
191
+ Get existing user profile or create new one from OAuth user_info
192
+ Refreshes cached profile data on each call
193
+
194
+ Args:
195
+ user_info: OAuth user info from HF (sub, preferred_username, name, picture)
196
+
197
+ Returns:
198
+ User profile dict
199
+ """
200
+ sub = user_info['sub']
201
+
202
+ # Load existing profile
203
+ profile = PicletDiscoveryService.load_user_data(sub)
204
+
205
+ # Update cached profile fields from OAuth
206
+ profile['sub'] = sub
207
+ profile['preferred_username'] = user_info.get('preferred_username')
208
+ profile['name'] = user_info.get('name')
209
+ profile['picture'] = user_info.get('picture')
210
+ profile['email'] = user_info.get('email')
211
+
212
+ # Set joinedAt only if this is a new profile
213
+ if 'joinedAt' not in profile or not profile['joinedAt']:
214
+ profile['joinedAt'] = datetime.now().isoformat()
215
+
216
+ return profile
217
+
218
  @staticmethod
219
  def update_global_stats() -> dict:
220
  """Update and return global statistics"""
 
291
  "piclet": None
292
  }
293
 
294
+ def create_canonical(object_name: str, piclet_data: str, token_or_username: str) -> dict:
295
  """
296
  Create a new canonical Piclet
297
+
298
+ Args:
299
+ object_name: The normalized object name (e.g., "pillow")
300
+ piclet_data: JSON string of Piclet instance data
301
+ token_or_username: Either OAuth token (starts with "hf_") or username for testing
302
+
303
+ Returns:
304
+ Dict with success status and piclet data
305
  """
306
  try:
307
  piclet_json = json.loads(piclet_data) if isinstance(piclet_data, str) else piclet_data
308
 
309
+ # Determine if this is a token or username
310
+ user_info = None
311
+ if token_or_username and token_or_username.startswith('hf_'):
312
+ # OAuth token - verify it
313
+ user_info = verify_hf_token(token_or_username)
314
+ if not user_info:
315
+ return {
316
+ "success": False,
317
+ "error": "Invalid OAuth token"
318
+ }
319
+ else:
320
+ # Legacy username mode (for testing)
321
+ user_info = {
322
+ "sub": f"legacy_{token_or_username}",
323
+ "preferred_username": token_or_username,
324
+ "name": token_or_username,
325
+ "picture": None
326
+ }
327
+
328
+ # Get or create user profile
329
+ user_profile = PicletDiscoveryService.get_or_create_user_profile(user_info)
330
+
331
+ # Create canonical entry with full discoverer info
332
  canonical_data = {
333
  "canonical": {
334
  "objectName": object_name,
335
  "typeId": f"{PicletDiscoveryService.normalize_object_name(object_name)}_canonical",
336
+ "discoveredBy": user_info['preferred_username'],
337
+ "discovererSub": user_info['sub'],
338
+ "discovererUsername": user_info['preferred_username'],
339
+ "discovererName": user_info.get('name'),
340
+ "discovererPicture": user_info.get('picture'),
341
  "discoveredAt": datetime.now().isoformat(),
342
  "scanCount": 1,
343
  "picletData": piclet_json
 
348
  # Save to dataset
349
  if PicletDiscoveryService.save_piclet_data(object_name, canonical_data):
350
  # Update user profile
351
+ user_profile["discoveries"].append(canonical_data["canonical"]["typeId"])
352
+ user_profile["uniqueFinds"] += 1
353
+ user_profile["totalFinds"] += 1
354
+ user_profile["rarityScore"] += 100 # Bonus for canonical discovery
355
+
356
+ PicletDiscoveryService.save_user_data(user_info['sub'], user_profile)
357
 
358
  return {
359
  "success": True,
 
371
  "error": str(e)
372
  }
373
 
374
+ def create_variation(canonical_id: str, attributes: List[str], piclet_data: str, token_or_username: str, object_name: str) -> dict:
375
  """
376
+ Create a variation of an existing canonical Piclet with OAuth verification
377
+
378
+ Args:
379
+ canonical_id: ID of the canonical Piclet
380
+ attributes: List of variation attributes
381
+ piclet_data: JSON data for the Piclet
382
+ token_or_username: Either OAuth token (starts with "hf_") or username for testing
383
+ object_name: Normalized object name
384
+
385
+ Returns:
386
+ Success/error dict with variation data
387
  """
388
  try:
389
  piclet_json = json.loads(piclet_data) if isinstance(piclet_data, str) else piclet_data
390
 
391
+ # Verify token or use legacy mode
392
+ user_info = None
393
+ if token_or_username and token_or_username.startswith('hf_'):
394
+ user_info = verify_hf_token(token_or_username)
395
+ if not user_info:
396
+ return {"success": False, "error": "Invalid OAuth token"}
397
+ else:
398
+ # Legacy mode for testing
399
+ user_info = {
400
+ "sub": f"legacy_{token_or_username}",
401
+ "preferred_username": token_or_username,
402
+ "name": token_or_username,
403
+ "picture": None
404
+ }
405
+
406
+ # Get or create user profile
407
+ user_profile = PicletDiscoveryService.get_or_create_user_profile(user_info)
408
+
409
  # Load existing data
410
  existing_data = PicletDiscoveryService.load_piclet_data(object_name)
411
  if not existing_data:
 
419
  variation = {
420
  "typeId": variation_id,
421
  "attributes": attributes,
422
+ "discoveredBy": user_info['preferred_username'],
423
+ "discovererSub": user_info['sub'],
424
+ "discovererUsername": user_info['preferred_username'],
425
+ "discovererName": user_info.get('name'),
426
+ "discovererPicture": user_info.get('picture'),
427
  "discoveredAt": datetime.now().isoformat(),
428
  "scanCount": 1,
429
  "picletData": piclet_json
 
435
  # Save updated data
436
  if PicletDiscoveryService.save_piclet_data(object_name, existing_data):
437
  # Update user profile
438
+ user_profile["discoveries"].append(variation_id)
439
+ user_profile["totalFinds"] += 1
440
+ user_profile["rarityScore"] += 50 # Bonus for variation discovery
441
+ PicletDiscoveryService.save_user_data(user_info['sub'], user_profile)
 
442
 
443
  return {
444
  "success": True,
auth.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ OAuth authentication utilities for HuggingFace token verification
3
+ """
4
+ import requests
5
+ from typing import Optional, Dict
6
+
7
+ # HuggingFace OAuth userinfo endpoint
8
+ HF_USERINFO_URL = 'https://huggingface.co/oauth/userinfo'
9
+
10
+
11
+ def verify_hf_token(token: str) -> Optional[Dict]:
12
+ """
13
+ Verify HF OAuth token and return user info
14
+
15
+ Args:
16
+ token: The HuggingFace OAuth access token
17
+
18
+ Returns:
19
+ User info dict with fields: sub, preferred_username, name, picture, email
20
+ None if token is invalid
21
+
22
+ Example:
23
+ {
24
+ "sub": "12345678",
25
+ "preferred_username": "username",
26
+ "name": "Display Name",
27
+ "picture": "https://avatars.huggingface.co/...",
28
+ "email": "user@example.com"
29
+ }
30
+ """
31
+ try:
32
+ response = requests.get(
33
+ HF_USERINFO_URL,
34
+ headers={'Authorization': f'Bearer {token}'},
35
+ timeout=10
36
+ )
37
+
38
+ if response.status_code == 200:
39
+ return response.json()
40
+ else:
41
+ print(f"Token verification failed with status {response.status_code}")
42
+ return None
43
+
44
+ except requests.RequestException as e:
45
+ print(f"Token verification error: {e}")
46
+ return None
47
+
48
+
49
+ def extract_token_from_auth_header(auth_header: str) -> Optional[str]:
50
+ """
51
+ Extract Bearer token from Authorization header
52
+
53
+ Args:
54
+ auth_header: The Authorization header value (e.g., "Bearer abc123...")
55
+
56
+ Returns:
57
+ The token string, or None if invalid format
58
+ """
59
+ if not auth_header:
60
+ return None
61
+
62
+ if not auth_header.startswith('Bearer '):
63
+ return None
64
+
65
+ try:
66
+ return auth_header.split(' ', 1)[1]
67
+ except IndexError:
68
+ return None
69
+
70
+
71
+ def get_user_from_request_headers(headers: Dict[str, str]) -> Optional[Dict]:
72
+ """
73
+ Extract and verify user info from request headers
74
+
75
+ Args:
76
+ headers: Dict of request headers (case-insensitive keys)
77
+
78
+ Returns:
79
+ User info dict if valid token, None otherwise
80
+ """
81
+ # Try to get Authorization header (case-insensitive)
82
+ auth_header = None
83
+ for key, value in headers.items():
84
+ if key.lower() == 'authorization':
85
+ auth_header = value
86
+ break
87
+
88
+ if not auth_header:
89
+ return None
90
+
91
+ token = extract_token_from_auth_header(auth_header)
92
+ if not token:
93
+ return None
94
+
95
+ return verify_hf_token(token)
init_dataset.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Initialize HuggingFace dataset structure for Piclets Discovery game
3
+
4
+ This script sets up the required directory structure and initial files
5
+ in the Fraser/piclets dataset repository.
6
+
7
+ Usage:
8
+ python init_dataset.py
9
+
10
+ Requires:
11
+ - HF_TOKEN environment variable or set in HuggingFace Space secrets
12
+ - Write access to Fraser/piclets dataset
13
+ """
14
+
15
+ import json
16
+ import os
17
+ from datetime import datetime
18
+ from huggingface_hub import HfApi, hf_hub_download
19
+ from pathlib import Path
20
+
21
+ # Configuration
22
+ DATASET_REPO = os.getenv("DATASET_REPO", "Fraser/piclets")
23
+ HF_TOKEN = os.getenv("HF_TOKEN")
24
+
25
+ def init_metadata_files():
26
+ """Initialize global metadata files"""
27
+
28
+ # Initial stats
29
+ stats = {
30
+ "totalPiclets": 0,
31
+ "totalVariations": 0,
32
+ "totalScans": 0,
33
+ "totalUsers": 0,
34
+ "lastUpdated": datetime.now().isoformat()
35
+ }
36
+
37
+ # Initial leaderboard (empty)
38
+ leaderboard = {
39
+ "topDiscoverers": [],
40
+ "recentActivity": [],
41
+ "lastUpdated": datetime.now().isoformat()
42
+ }
43
+
44
+ return stats, leaderboard
45
+
46
+
47
+ def create_example_files():
48
+ """Create example user and piclet files for testing"""
49
+
50
+ # Example user profile
51
+ example_user = {
52
+ "sub": "example_123456",
53
+ "preferred_username": "example_user",
54
+ "name": "Example User",
55
+ "picture": None,
56
+ "joinedAt": datetime.now().isoformat(),
57
+ "lastSeen": datetime.now().isoformat(),
58
+ "discoveries": [],
59
+ "uniqueFinds": 0,
60
+ "totalFinds": 0,
61
+ "rarityScore": 0,
62
+ "visibility": "public"
63
+ }
64
+
65
+ # Example piclet (empty canonical, ready for discoveries)
66
+ example_piclet = {
67
+ "canonical": None,
68
+ "variations": [],
69
+ "metadata": {
70
+ "created": datetime.now().isoformat(),
71
+ "lastUpdated": datetime.now().isoformat()
72
+ }
73
+ }
74
+
75
+ return example_user, example_piclet
76
+
77
+
78
+ def upload_initial_structure():
79
+ """Upload initial dataset structure to HuggingFace"""
80
+
81
+ if not HF_TOKEN:
82
+ print("ERROR: HF_TOKEN environment variable not set")
83
+ print("Please set HF_TOKEN with write access to the dataset")
84
+ return False
85
+
86
+ print(f"Initializing dataset: {DATASET_REPO}")
87
+
88
+ api = HfApi()
89
+
90
+ # Create temporary directory for files
91
+ temp_dir = Path("temp_dataset_init")
92
+ temp_dir.mkdir(exist_ok=True)
93
+
94
+ try:
95
+ # 1. Create metadata directory and files
96
+ metadata_dir = temp_dir / "metadata"
97
+ metadata_dir.mkdir(exist_ok=True)
98
+
99
+ stats, leaderboard = init_metadata_files()
100
+
101
+ with open(metadata_dir / "stats.json", "w") as f:
102
+ json.dump(stats, f, indent=2)
103
+ print("βœ“ Created metadata/stats.json")
104
+
105
+ with open(metadata_dir / "leaderboard.json", "w") as f:
106
+ json.dump(leaderboard, f, indent=2)
107
+ print("βœ“ Created metadata/leaderboard.json")
108
+
109
+ # 2. Create users directory with example
110
+ users_dir = temp_dir / "users"
111
+ users_dir.mkdir(exist_ok=True)
112
+
113
+ example_user, _ = create_example_files()
114
+ with open(users_dir / ".gitkeep", "w") as f:
115
+ f.write("# User profiles stored here\n")
116
+ print("βœ“ Created users/ directory")
117
+
118
+ # 3. Create piclets directory with example
119
+ piclets_dir = temp_dir / "piclets"
120
+ piclets_dir.mkdir(exist_ok=True)
121
+
122
+ with open(piclets_dir / ".gitkeep", "w") as f:
123
+ f.write("# Canonical piclets and variations stored here\n")
124
+ print("βœ“ Created piclets/ directory")
125
+
126
+ # 4. Create README
127
+ readme_content = """# Piclets Discovery Dataset
128
+
129
+ This dataset stores the canonical Piclets, variations, and user profiles for the Piclets Discovery game.
130
+
131
+ ## Structure
132
+
133
+ ```
134
+ metadata/
135
+ stats.json # Global statistics
136
+ leaderboard.json # Top discoverers and recent activity
137
+
138
+ users/
139
+ {sub}.json # User profiles keyed by HuggingFace user ID
140
+
141
+ piclets/
142
+ {normalized_name}.json # Canonical piclet + variations
143
+ ```
144
+
145
+ ## Authentication
146
+
147
+ All endpoints require HuggingFace OAuth tokens:
148
+ - Frontend sends `Authorization: Bearer <token>` headers
149
+ - Server verifies via `https://huggingface.co/oauth/userinfo`
150
+ - User profiles use stable `sub` field as primary key
151
+
152
+ ## API
153
+
154
+ Server endpoint: `Fraser/piclets-server` (HuggingFace Space)
155
+
156
+ See server repository for full API documentation.
157
+ """
158
+
159
+ with open(temp_dir / "README.md", "w") as f:
160
+ f.write(readme_content)
161
+ print("βœ“ Created README.md")
162
+
163
+ # 5. Upload all files to dataset
164
+ print(f"\nUploading to {DATASET_REPO}...")
165
+
166
+ api.upload_folder(
167
+ folder_path=str(temp_dir),
168
+ repo_id=DATASET_REPO,
169
+ repo_type="dataset",
170
+ token=HF_TOKEN,
171
+ commit_message="Initialize dataset structure for Piclets Discovery"
172
+ )
173
+
174
+ print(f"\nβœ“ Dataset initialized successfully!")
175
+ print(f"View at: https://huggingface.co/datasets/{DATASET_REPO}")
176
+
177
+ return True
178
+
179
+ except Exception as e:
180
+ print(f"\nβœ— Error initializing dataset: {e}")
181
+ return False
182
+
183
+ finally:
184
+ # Cleanup temp directory
185
+ import shutil
186
+ if temp_dir.exists():
187
+ shutil.rmtree(temp_dir)
188
+
189
+
190
+ def verify_dataset_structure():
191
+ """Verify that dataset structure exists"""
192
+
193
+ if not HF_TOKEN:
194
+ print("ERROR: HF_TOKEN not set")
195
+ return False
196
+
197
+ print(f"Verifying dataset structure: {DATASET_REPO}")
198
+
199
+ try:
200
+ # Try to download metadata files
201
+ stats_path = hf_hub_download(
202
+ repo_id=DATASET_REPO,
203
+ filename="metadata/stats.json",
204
+ repo_type="dataset",
205
+ token=HF_TOKEN
206
+ )
207
+
208
+ with open(stats_path) as f:
209
+ stats = json.load(f)
210
+
211
+ print(f"βœ“ Dataset exists with {stats['totalPiclets']} piclets")
212
+ print(f"βœ“ Structure verified")
213
+ return True
214
+
215
+ except Exception as e:
216
+ print(f"βœ— Dataset not initialized or error: {e}")
217
+ return False
218
+
219
+
220
+ if __name__ == "__main__":
221
+ import sys
222
+
223
+ if len(sys.argv) > 1 and sys.argv[1] == "--verify":
224
+ # Verify mode
225
+ if verify_dataset_structure():
226
+ sys.exit(0)
227
+ else:
228
+ sys.exit(1)
229
+ else:
230
+ # Initialize mode
231
+ print("=" * 60)
232
+ print("Piclets Discovery Dataset Initialization")
233
+ print("=" * 60)
234
+ print()
235
+
236
+ if upload_initial_structure():
237
+ print("\nNext steps:")
238
+ print("1. Verify at https://huggingface.co/datasets/Fraser/piclets")
239
+ print("2. Test with: python init_dataset.py --verify")
240
+ print("3. Deploy piclets-server to HuggingFace Space")
241
+ sys.exit(0)
242
+ else:
243
+ sys.exit(1)
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  gradio==5.38.2
2
  Pillow>=9.0.0
3
  huggingface_hub>=0.20.0
4
- datasets>=2.15.0
 
 
1
  gradio==5.38.2
2
  Pillow>=9.0.0
3
  huggingface_hub>=0.20.0
4
+ datasets>=2.15.0
5
+ requests>=2.31.0