Spaces:

nbroad
/

compare-hf-inference-providers

Running

App Files Files Community

nbroad commited on Jun 26

Commit

82d9f36

verified ·

1 Parent(s): bd08f1f

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +50 -5

app.py CHANGED Viewed

@@ -191,26 +191,49 @@ async def get_provider_data(provider: str):
 async def get_historical_data():
     """API endpoint to get historical data for line chart"""
     if not HF_TOKEN:
-        return {"error": "Historical data not available", "data": []}
     try:
         # Load historical dataset
         dataset = load_dataset(DATASET_REPO_NAME, split="train")
         df = dataset.to_pandas()
         # Group by timestamp and provider, get the latest entry for each timestamp-provider combo
         df['timestamp'] = pd.to_datetime(df['timestamp'])
         df = df.sort_values('timestamp')
         # Get last 48 hours of data (48 data points max for performance)
         cutoff_time = datetime.now(timezone.utc) - pd.Timedelta(hours=48)
-        df = df[df['timestamp'] >= cutoff_time]
         # Prepare data for Chart.js line chart
         historical_data = {}
         for provider in PROVIDERS:
-            provider_data = df[df['provider'] == provider].copy()
             if not provider_data.empty:
                 # Format for Chart.js: {x: timestamp, y: value}
                 historical_data[provider] = [
@@ -220,17 +243,39 @@ async def get_historical_data():
                     }
                     for _, row in provider_data.iterrows()
                 ]
             else:
                 historical_data[provider] = []
         return {
             "historical_data": historical_data,
-            "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         }
     except Exception as e:
         logger.error(f"Error fetching historical data: {e}")
-        return {"error": "Failed to fetch historical data", "data": []}
 @app.post("/api/collect-now")
 async def trigger_data_collection(background_tasks: BackgroundTasks):

 async def get_historical_data():
     """API endpoint to get historical data for line chart"""
     if not HF_TOKEN:
+        logger.warning("No HF_TOKEN available for historical data")
+        return {
+            "error": "Historical data not available - no HF token",
+            "historical_data": {},
+            "message": "Historical data collection requires HuggingFace token"
+        }
     try:
         # Load historical dataset
         dataset = load_dataset(DATASET_REPO_NAME, split="train")
         df = dataset.to_pandas()
+        logger.info(f"Loaded dataset with {len(df)} total records")
+        if df.empty:
+            logger.info("Dataset is empty - no historical data available yet")
+            return {
+                "historical_data": {},
+                "message": "No historical data available yet. Data collection is running - check back in 30 minutes.",
+                "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+            }
         # Group by timestamp and provider, get the latest entry for each timestamp-provider combo
         df['timestamp'] = pd.to_datetime(df['timestamp'])
         df = df.sort_values('timestamp')
         # Get last 48 hours of data (48 data points max for performance)
         cutoff_time = datetime.now(timezone.utc) - pd.Timedelta(hours=48)
+        df_filtered = df[df['timestamp'] >= cutoff_time]
+        logger.info(f"Filtered to {len(df_filtered)} records in last 48 hours")
+        # If no recent data, use all available data for initial display
+        if df_filtered.empty:
+            logger.info("No data in last 48 hours, using all available data")
+            df_filtered = df.tail(100)  # Use last 100 records
         # Prepare data for Chart.js line chart
         historical_data = {}
+        total_data_points = 0
         for provider in PROVIDERS:
+            provider_data = df_filtered[df_filtered['provider'] == provider].copy()
             if not provider_data.empty:
                 # Format for Chart.js: {x: timestamp, y: value}
                 historical_data[provider] = [
                     }
                     for _, row in provider_data.iterrows()
                 ]
+                total_data_points += len(historical_data[provider])
             else:
                 historical_data[provider] = []
+        logger.info(f"Returning {total_data_points} total data points across {len([p for p in historical_data.values() if p])} providers")
         return {
             "historical_data": historical_data,
+            "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            "total_data_points": total_data_points,
+            "data_range": f"Last {len(df_filtered)} records" if not df_filtered.empty else "No data"
         }
     except Exception as e:
         logger.error(f"Error fetching historical data: {e}")
+        # Try to create initial data if dataset doesn't exist
+        if "does not exist" in str(e).lower() or "not found" in str(e).lower():
+            logger.info("Dataset doesn't exist yet, triggering initial data collection")
+            try:
+                await collect_and_store_data()
+                return {
+                    "historical_data": {},
+                    "message": "Dataset created! Historical data will appear after a few data collection cycles.",
+                    "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+                }
+            except Exception as create_error:
+                logger.error(f"Failed to create initial dataset: {create_error}")
+        return {
+            "error": f"Failed to fetch historical data: {str(e)}",
+            "historical_data": {},
+            "message": "Historical data temporarily unavailable"
+        }
 @app.post("/api/collect-now")
 async def trigger_data_collection(background_tasks: BackgroundTasks):