piclets-server / app.py
Fraser's picture
server
cc4ae68
raw
history blame
21.7 kB
import gradio as gr
import json
import os
import re
from datetime import datetime
from typing import Dict, List, Optional, Tuple
from huggingface_hub import HfApi, hf_hub_download, list_repo_files
from pathlib import Path
import tempfile
# HuggingFace configuration
HF_TOKEN = os.getenv("HF_TOKEN") # Required for writing to dataset
DATASET_REPO = "Fraser/piclets" # Public dataset repository
DATASET_TYPE = "dataset"
# Initialize HuggingFace API with token if available
api = HfApi(token=HF_TOKEN) if HF_TOKEN else HfApi()
# Cache directory for local operations
CACHE_DIR = Path("cache")
CACHE_DIR.mkdir(exist_ok=True)
class PicletDiscoveryService:
"""Manages Piclet discovery using HuggingFace datasets"""
@staticmethod
def normalize_object_name(name: str) -> str:
"""
Normalize object names for consistent storage and lookup
Examples: "The Blue Pillow" -> "pillow", "wooden chairs" -> "wooden_chair"
"""
if not name:
return "unknown"
# Convert to lowercase and strip
name = name.lower().strip()
# Remove articles (the, a, an)
name = re.sub(r'^(the|a|an)\s+', '', name)
# Remove special characters except spaces
name = re.sub(r'[^a-z0-9\s]', '', name)
# Handle common plurals (basic pluralization rules)
if name.endswith('ies') and len(name) > 4:
name = name[:-3] + 'y' # berries -> berry
elif name.endswith('ves') and len(name) > 4:
name = name[:-3] + 'f' # leaves -> leaf
elif name.endswith('es') and len(name) > 3:
# Check if it's a special case like "glasses"
if not name.endswith(('ses', 'xes', 'zes', 'ches', 'shes')):
name = name[:-2] # boxes -> box (but keep glasses)
elif name.endswith('s') and len(name) > 2 and not name.endswith('ss'):
name = name[:-1] # chairs -> chair (but keep glass)
# Replace spaces with underscores
name = re.sub(r'\s+', '_', name.strip())
return name
@staticmethod
def load_piclet_data(object_name: str) -> Optional[dict]:
"""Load Piclet data from HuggingFace dataset"""
try:
normalized_name = PicletDiscoveryService.normalize_object_name(object_name)
file_path = f"piclets/{normalized_name}.json"
# Download the file from HuggingFace
local_path = hf_hub_download(
repo_id=DATASET_REPO,
filename=file_path,
repo_type=DATASET_TYPE,
token=HF_TOKEN,
cache_dir=str(CACHE_DIR)
)
with open(local_path, 'r') as f:
return json.load(f)
except Exception as e:
print(f"Could not load piclet data for {object_name}: {e}")
return None
@staticmethod
def save_piclet_data(object_name: str, data: dict) -> bool:
"""Save Piclet data to HuggingFace dataset"""
try:
normalized_name = PicletDiscoveryService.normalize_object_name(object_name)
file_path = f"piclets/{normalized_name}.json"
# Create a temporary file
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
json.dump(data, f, indent=2)
temp_path = f.name
# Upload to HuggingFace
api.upload_file(
path_or_fileobj=temp_path,
path_in_repo=file_path,
repo_id=DATASET_REPO,
repo_type=DATASET_TYPE,
commit_message=f"Update piclet: {normalized_name}"
)
# Clean up
os.unlink(temp_path)
return True
except Exception as e:
print(f"Failed to save piclet data: {e}")
return False
@staticmethod
def load_user_data(username: str) -> dict:
"""Load user profile from dataset"""
try:
file_path = f"users/{username.lower()}.json"
local_path = hf_hub_download(
repo_id=DATASET_REPO,
filename=file_path,
repo_type=DATASET_TYPE,
token=HF_TOKEN,
cache_dir=str(CACHE_DIR)
)
with open(local_path, 'r') as f:
return json.load(f)
except:
# Return default user profile if not found
return {
"username": username,
"joinedAt": datetime.now().isoformat(),
"discoveries": [],
"uniqueFinds": 0,
"totalFinds": 0,
"rarityScore": 0
}
@staticmethod
def save_user_data(username: str, data: dict) -> bool:
"""Save user profile to dataset"""
try:
file_path = f"users/{username.lower()}.json"
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
json.dump(data, f, indent=2)
temp_path = f.name
api.upload_file(
path_or_fileobj=temp_path,
path_in_repo=file_path,
repo_id=DATASET_REPO,
repo_type=DATASET_TYPE,
commit_message=f"Update user profile: {username}"
)
os.unlink(temp_path)
return True
except Exception as e:
print(f"Failed to save user data: {e}")
return False
@staticmethod
def update_global_stats() -> dict:
"""Update and return global statistics"""
try:
# Try to load existing stats
try:
local_path = hf_hub_download(
repo_id=DATASET_REPO,
filename="metadata/stats.json",
repo_type=DATASET_TYPE,
token=HF_TOKEN,
cache_dir=str(CACHE_DIR)
)
with open(local_path, 'r') as f:
stats = json.load(f)
except:
stats = {
"totalDiscoveries": 0,
"uniqueObjects": 0,
"totalVariations": 0,
"lastUpdated": datetime.now().isoformat()
}
return stats
except Exception as e:
print(f"Failed to update global stats: {e}")
return {}
# API Endpoints
def search_piclet(object_name: str, attributes: List[str]) -> dict:
"""
Search for canonical Piclet or variations
Returns matching piclet or None
"""
piclet_data = PicletDiscoveryService.load_piclet_data(object_name)
if not piclet_data:
return {
"status": "new",
"message": f"No Piclet found for '{object_name}'",
"piclet": None
}
# Check if searching for canonical (no attributes)
if not attributes or len(attributes) == 0:
return {
"status": "existing",
"message": f"Found canonical Piclet for '{object_name}'",
"piclet": piclet_data.get("canonical")
}
# Search for matching variation
variations = piclet_data.get("variations", [])
for variation in variations:
var_attrs = set(variation.get("attributes", []))
search_attrs = set(attributes)
# Check for close match (at least 50% overlap)
overlap = len(var_attrs.intersection(search_attrs))
if overlap >= len(search_attrs) * 0.5:
return {
"status": "variation",
"message": f"Found variation of '{object_name}'",
"piclet": variation,
"canonicalId": piclet_data["canonical"]["typeId"]
}
# No variation found, suggest creating one
return {
"status": "new_variation",
"message": f"No variation found for '{object_name}' with attributes {attributes}",
"canonicalId": piclet_data["canonical"]["typeId"],
"piclet": None
}
def create_canonical(object_name: str, piclet_data: str, username: str) -> dict:
"""
Create a new canonical Piclet
"""
try:
piclet_json = json.loads(piclet_data) if isinstance(piclet_data, str) else piclet_data
# Create canonical entry
canonical_data = {
"canonical": {
"objectName": object_name,
"typeId": f"{PicletDiscoveryService.normalize_object_name(object_name)}_canonical",
"discoveredBy": username,
"discoveredAt": datetime.now().isoformat(),
"scanCount": 1,
"picletData": piclet_json
},
"variations": []
}
# Save to dataset
if PicletDiscoveryService.save_piclet_data(object_name, canonical_data):
# Update user profile
user_data = PicletDiscoveryService.load_user_data(username)
user_data["discoveries"].append(canonical_data["canonical"]["typeId"])
user_data["uniqueFinds"] += 1
user_data["totalFinds"] += 1
user_data["rarityScore"] += 100 # Bonus for canonical discovery
PicletDiscoveryService.save_user_data(username, user_data)
return {
"success": True,
"message": f"Created canonical Piclet for '{object_name}'",
"piclet": canonical_data["canonical"]
}
else:
return {
"success": False,
"error": "Failed to save canonical Piclet"
}
except Exception as e:
return {
"success": False,
"error": str(e)
}
def create_variation(canonical_id: str, attributes: List[str], piclet_data: str, username: str, object_name: str) -> dict:
"""
Create a variation of an existing canonical Piclet
"""
try:
piclet_json = json.loads(piclet_data) if isinstance(piclet_data, str) else piclet_data
# Load existing data
existing_data = PicletDiscoveryService.load_piclet_data(object_name)
if not existing_data:
return {
"success": False,
"error": f"Canonical Piclet not found for '{object_name}'"
}
# Create variation entry
variation_id = f"{PicletDiscoveryService.normalize_object_name(object_name)}_{len(existing_data['variations']) + 1:03d}"
variation = {
"typeId": variation_id,
"attributes": attributes,
"discoveredBy": username,
"discoveredAt": datetime.now().isoformat(),
"scanCount": 1,
"picletData": piclet_json
}
# Add to variations
existing_data["variations"].append(variation)
# Save updated data
if PicletDiscoveryService.save_piclet_data(object_name, existing_data):
# Update user profile
user_data = PicletDiscoveryService.load_user_data(username)
user_data["discoveries"].append(variation_id)
user_data["totalFinds"] += 1
user_data["rarityScore"] += 50 # Bonus for variation discovery
PicletDiscoveryService.save_user_data(username, user_data)
return {
"success": True,
"message": f"Created variation of '{object_name}'",
"piclet": variation
}
else:
return {
"success": False,
"error": "Failed to save variation"
}
except Exception as e:
return {
"success": False,
"error": str(e)
}
def increment_scan_count(piclet_id: str, object_name: str) -> dict:
"""
Increment the scan count for a Piclet
"""
try:
data = PicletDiscoveryService.load_piclet_data(object_name)
if not data:
return {
"success": False,
"error": "Piclet not found"
}
# Check canonical
if data["canonical"]["typeId"] == piclet_id:
data["canonical"]["scanCount"] = data["canonical"].get("scanCount", 0) + 1
scan_count = data["canonical"]["scanCount"]
else:
# Check variations
for variation in data["variations"]:
if variation["typeId"] == piclet_id:
variation["scanCount"] = variation.get("scanCount", 0) + 1
scan_count = variation["scanCount"]
break
else:
return {
"success": False,
"error": "Piclet ID not found"
}
# Save updated data
if PicletDiscoveryService.save_piclet_data(object_name, data):
return {
"success": True,
"scanCount": scan_count
}
else:
return {
"success": False,
"error": "Failed to update scan count"
}
except Exception as e:
return {
"success": False,
"error": str(e)
}
def get_recent_activity(limit: int = 20) -> dict:
"""
Get recent discoveries across all users
"""
try:
activities = []
# List all piclet files
try:
files = list_repo_files(
repo_id=DATASET_REPO,
repo_type=DATASET_TYPE,
token=HF_TOKEN
)
piclet_files = [f for f in files if f.startswith("piclets/") and f.endswith(".json")]
except:
piclet_files = []
# Load recent piclets (simplified - in production, maintain a separate activity log)
for file_path in piclet_files[-limit:]:
try:
object_name = file_path.replace("piclets/", "").replace(".json", "")
data = PicletDiscoveryService.load_piclet_data(object_name)
if data:
# Add canonical discovery
canonical = data["canonical"]
activities.append({
"type": "discovery",
"objectName": object_name,
"typeId": canonical["typeId"],
"discoveredBy": canonical["discoveredBy"],
"discoveredAt": canonical["discoveredAt"],
"scanCount": canonical.get("scanCount", 1)
})
# Add recent variations
for variation in data.get("variations", [])[-5:]:
activities.append({
"type": "variation",
"objectName": object_name,
"typeId": variation["typeId"],
"attributes": variation["attributes"],
"discoveredBy": variation["discoveredBy"],
"discoveredAt": variation["discoveredAt"],
"scanCount": variation.get("scanCount", 1)
})
except:
continue
# Sort by discovery date
activities.sort(key=lambda x: x.get("discoveredAt", ""), reverse=True)
return {
"success": True,
"activities": activities[:limit]
}
except Exception as e:
return {
"success": False,
"error": str(e),
"activities": []
}
def get_leaderboard(limit: int = 10) -> dict:
"""
Get top discoverers
"""
try:
leaderboard = []
# List all user files
try:
files = list_repo_files(
repo_id=DATASET_REPO,
repo_type=DATASET_TYPE,
token=HF_TOKEN
)
user_files = [f for f in files if f.startswith("users/") and f.endswith(".json")]
except:
user_files = []
# Load user data
for file_path in user_files:
try:
username = file_path.replace("users/", "").replace(".json", "")
user_data = PicletDiscoveryService.load_user_data(username)
leaderboard.append({
"username": username,
"totalFinds": user_data.get("totalFinds", 0),
"uniqueFinds": user_data.get("uniqueFinds", 0),
"rarityScore": user_data.get("rarityScore", 0)
})
except:
continue
# Sort by rarity score
leaderboard.sort(key=lambda x: x["rarityScore"], reverse=True)
# Add ranks
for i, entry in enumerate(leaderboard[:limit]):
entry["rank"] = i + 1
return {
"success": True,
"leaderboard": leaderboard[:limit]
}
except Exception as e:
return {
"success": False,
"error": str(e),
"leaderboard": []
}
def get_user_profile(username: str) -> dict:
"""
Get user's discovery profile
"""
try:
user_data = PicletDiscoveryService.load_user_data(username)
return {
"success": True,
"profile": user_data
}
except Exception as e:
return {
"success": False,
"error": str(e),
"profile": None
}
# Create Gradio interface
with gr.Blocks(title="Piclets Discovery Server") as app:
gr.Markdown("""
# πŸ” Piclets Discovery Server
Backend service for the Piclets discovery game. Each real-world object has ONE canonical Piclet!
""")
with gr.Tab("Search Piclet"):
with gr.Row():
with gr.Column():
search_object = gr.Textbox(label="Object Name", placeholder="e.g., pillow")
search_attrs = gr.Textbox(label="Attributes (comma-separated)", placeholder="e.g., velvet, blue")
search_btn = gr.Button("Search", variant="primary")
with gr.Column():
search_result = gr.JSON(label="Search Result")
search_btn.click(
fn=lambda obj, attrs: search_piclet(obj, [a.strip() for a in attrs.split(",")] if attrs else []),
inputs=[search_object, search_attrs],
outputs=search_result
)
with gr.Tab("Create Canonical"):
with gr.Row():
with gr.Column():
canonical_object = gr.Textbox(label="Object Name")
canonical_data = gr.Textbox(label="Piclet Data (JSON)", lines=10)
canonical_user = gr.Textbox(label="Username")
canonical_btn = gr.Button("Create Canonical", variant="primary")
with gr.Column():
canonical_result = gr.JSON(label="Creation Result")
canonical_btn.click(
fn=create_canonical,
inputs=[canonical_object, canonical_data, canonical_user],
outputs=canonical_result
)
with gr.Tab("Create Variation"):
with gr.Row():
with gr.Column():
var_object = gr.Textbox(label="Object Name")
var_canonical = gr.Textbox(label="Canonical ID")
var_attrs = gr.Textbox(label="Variation Attributes (comma-separated)")
var_data = gr.Textbox(label="Piclet Data (JSON)", lines=10)
var_user = gr.Textbox(label="Username")
var_btn = gr.Button("Create Variation", variant="primary")
with gr.Column():
var_result = gr.JSON(label="Creation Result")
var_btn.click(
fn=lambda obj, cid, attrs, data, user: create_variation(
cid, [a.strip() for a in attrs.split(",")] if attrs else [], data, user, obj
),
inputs=[var_object, var_canonical, var_attrs, var_data, var_user],
outputs=var_result
)
with gr.Tab("Activity Feed"):
activity_limit = gr.Slider(5, 50, value=20, label="Number of Activities")
activity_btn = gr.Button("Get Recent Activity")
activity_result = gr.JSON(label="Recent Discoveries")
activity_btn.click(
fn=get_recent_activity,
inputs=activity_limit,
outputs=activity_result
)
with gr.Tab("Leaderboard"):
leader_limit = gr.Slider(5, 20, value=10, label="Top N Discoverers")
leader_btn = gr.Button("Get Leaderboard")
leader_result = gr.JSON(label="Top Discoverers")
leader_btn.click(
fn=get_leaderboard,
inputs=leader_limit,
outputs=leader_result
)
with gr.Tab("User Profile"):
profile_user = gr.Textbox(label="Username")
profile_btn = gr.Button("Get Profile")
profile_result = gr.JSON(label="User Profile")
profile_btn.click(
fn=get_user_profile,
inputs=profile_user,
outputs=profile_result
)
# API Documentation
gr.Markdown("""
## API Endpoints
All endpoints accept JSON and return JSON responses.
- **search_piclet**: Search for canonical or variation Piclets
- **create_canonical**: Register a new canonical Piclet
- **create_variation**: Add a variation to existing canonical
- **increment_scan_count**: Track discovery popularity
- **get_recent_activity**: Global discovery feed
- **get_leaderboard**: Top discoverers
- **get_user_profile**: Individual discovery stats
See API_DOCUMENTATION.md for detailed usage.
""")
if __name__ == "__main__":
app.launch()