Spaces:
Sleeping
Add Hugging Face dataset integration
Browse files⨠New Features:
- Integrated load_dataset() for HF Hub compatibility
- Added factory methods for easy loader creation
- Implemented automatic fallback (HF β local files)
- Added dynamic data source switching
- Created comprehensive test suite
π§ Technical improvements:
- Enhanced AgriculturalDataLoader with use_hf parameter
- Added load_from_huggingface() method
- Separated local file loading logic
- Added data source switching capabilities
- Removed deprecated trust_remote_code parameter
π Documentation:
- Added test_data_sources.py with comprehensive tests
- Created hf_usage_example.py with practical examples
- Updated gradio_app.py to use HF integration
- Added factory methods for different use cases
π Production ready:
- Automatic fallback ensures reliability
- Environment-based configuration
- Compatible with HF Spaces deployment
- Maintains backward compatibility
- data_loader.py +89 -3
- gradio_app.py +8 -1
- hf_usage_example.py +214 -0
- test_data_sources.py +190 -0
|
@@ -8,24 +8,64 @@ import numpy as np
|
|
| 8 |
from pathlib import Path
|
| 9 |
from typing import List, Dict, Optional, Union
|
| 10 |
import os
|
| 11 |
-
from datasets import Dataset
|
| 12 |
from huggingface_hub import HfApi
|
| 13 |
|
| 14 |
|
| 15 |
class AgriculturalDataLoader:
|
| 16 |
"""Loads and preprocesses agricultural intervention data."""
|
| 17 |
|
| 18 |
-
def __init__(self, data_path: str = None, hf_token: str = None, dataset_id: str = None):
|
| 19 |
self.data_path = data_path or "/Users/tracyandre/Downloads/OneDrive_1_9-17-2025"
|
| 20 |
self.hf_token = hf_token or os.environ.get("HF_TOKEN")
|
| 21 |
self.dataset_id = dataset_id or "HackathonCRA/2024"
|
|
|
|
| 22 |
self.data_cache = {}
|
| 23 |
|
| 24 |
def load_all_files(self) -> pd.DataFrame:
|
| 25 |
"""Load all intervention files and combine them."""
|
| 26 |
if 'combined_data' in self.data_cache:
|
| 27 |
return self.data_cache['combined_data']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
data_files = []
|
| 30 |
data_path = Path(self.data_path)
|
| 31 |
|
|
@@ -57,7 +97,6 @@ class AgriculturalDataLoader:
|
|
| 57 |
if all_dataframes:
|
| 58 |
combined_df = pd.concat(all_dataframes, ignore_index=True)
|
| 59 |
combined_df = self._preprocess_data(combined_df)
|
| 60 |
-
self.data_cache['combined_data'] = combined_df
|
| 61 |
return combined_df
|
| 62 |
else:
|
| 63 |
raise ValueError("No data files found")
|
|
@@ -160,3 +199,50 @@ class AgriculturalDataLoader:
|
|
| 160 |
)
|
| 161 |
|
| 162 |
return f"Data uploaded to {self.dataset_id}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from pathlib import Path
|
| 9 |
from typing import List, Dict, Optional, Union
|
| 10 |
import os
|
| 11 |
+
from datasets import Dataset, load_dataset
|
| 12 |
from huggingface_hub import HfApi
|
| 13 |
|
| 14 |
|
| 15 |
class AgriculturalDataLoader:
|
| 16 |
"""Loads and preprocesses agricultural intervention data."""
|
| 17 |
|
| 18 |
+
def __init__(self, data_path: str = None, hf_token: str = None, dataset_id: str = None, use_hf: bool = False):
|
| 19 |
self.data_path = data_path or "/Users/tracyandre/Downloads/OneDrive_1_9-17-2025"
|
| 20 |
self.hf_token = hf_token or os.environ.get("HF_TOKEN")
|
| 21 |
self.dataset_id = dataset_id or "HackathonCRA/2024"
|
| 22 |
+
self.use_hf = use_hf # Flag to use Hugging Face dataset
|
| 23 |
self.data_cache = {}
|
| 24 |
|
| 25 |
def load_all_files(self) -> pd.DataFrame:
|
| 26 |
"""Load all intervention files and combine them."""
|
| 27 |
if 'combined_data' in self.data_cache:
|
| 28 |
return self.data_cache['combined_data']
|
| 29 |
+
|
| 30 |
+
# Try to load from Hugging Face first if enabled
|
| 31 |
+
if self.use_hf:
|
| 32 |
+
try:
|
| 33 |
+
df = self.load_from_huggingface()
|
| 34 |
+
self.data_cache['combined_data'] = df
|
| 35 |
+
return df
|
| 36 |
+
except Exception as e:
|
| 37 |
+
print(f"Failed to load from Hugging Face: {e}")
|
| 38 |
+
print("Falling back to local files...")
|
| 39 |
+
|
| 40 |
+
# Load from local files (original method)
|
| 41 |
+
return self._load_local_files()
|
| 42 |
+
|
| 43 |
+
def load_from_huggingface(self) -> pd.DataFrame:
|
| 44 |
+
"""Load data from Hugging Face dataset."""
|
| 45 |
+
print(f"π€ Loading dataset from Hugging Face: {self.dataset_id}")
|
| 46 |
+
|
| 47 |
+
try:
|
| 48 |
+
dataset = load_dataset(
|
| 49 |
+
self.dataset_id,
|
| 50 |
+
token=self.hf_token,
|
| 51 |
+
)
|
| 52 |
|
| 53 |
+
# Convert to pandas DataFrame
|
| 54 |
+
df = dataset["train"].to_pandas()
|
| 55 |
+
print(f"β
Successfully loaded {len(df)} records from Hugging Face")
|
| 56 |
+
|
| 57 |
+
# Apply preprocessing if needed
|
| 58 |
+
df = self._preprocess_data(df)
|
| 59 |
+
|
| 60 |
+
return df
|
| 61 |
+
|
| 62 |
+
except Exception as e:
|
| 63 |
+
raise ValueError(f"Failed to load dataset from Hugging Face: {e}")
|
| 64 |
+
|
| 65 |
+
def _load_local_files(self) -> pd.DataFrame:
|
| 66 |
+
"""Load data from local CSV/Excel files."""
|
| 67 |
+
print(f"π Loading local files from: {self.data_path}")
|
| 68 |
+
|
| 69 |
data_files = []
|
| 70 |
data_path = Path(self.data_path)
|
| 71 |
|
|
|
|
| 97 |
if all_dataframes:
|
| 98 |
combined_df = pd.concat(all_dataframes, ignore_index=True)
|
| 99 |
combined_df = self._preprocess_data(combined_df)
|
|
|
|
| 100 |
return combined_df
|
| 101 |
else:
|
| 102 |
raise ValueError("No data files found")
|
|
|
|
| 199 |
)
|
| 200 |
|
| 201 |
return f"Data uploaded to {self.dataset_id}"
|
| 202 |
+
|
| 203 |
+
def set_data_source(self, use_hf: bool = True, clear_cache: bool = True):
|
| 204 |
+
"""
|
| 205 |
+
Switch between Hugging Face and local file data sources.
|
| 206 |
+
|
| 207 |
+
Args:
|
| 208 |
+
use_hf: If True, use Hugging Face dataset. If False, use local files.
|
| 209 |
+
clear_cache: If True, clear cached data to force reload from new source.
|
| 210 |
+
"""
|
| 211 |
+
self.use_hf = use_hf
|
| 212 |
+
if clear_cache:
|
| 213 |
+
self.data_cache.clear()
|
| 214 |
+
print(f"π Switched to {'Hugging Face' if use_hf else 'local files'} data source")
|
| 215 |
+
|
| 216 |
+
@classmethod
|
| 217 |
+
def create_hf_loader(cls, dataset_id: str = "HackathonCRA/2024", hf_token: str = None):
|
| 218 |
+
"""
|
| 219 |
+
Factory method to create a loader configured for Hugging Face.
|
| 220 |
+
|
| 221 |
+
Args:
|
| 222 |
+
dataset_id: Hugging Face dataset identifier
|
| 223 |
+
hf_token: Hugging Face token (optional, will use environment variable)
|
| 224 |
+
|
| 225 |
+
Returns:
|
| 226 |
+
AgriculturalDataLoader configured for HF
|
| 227 |
+
"""
|
| 228 |
+
return cls(
|
| 229 |
+
dataset_id=dataset_id,
|
| 230 |
+
hf_token=hf_token,
|
| 231 |
+
use_hf=True
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
@classmethod
|
| 235 |
+
def create_local_loader(cls, data_path: str):
|
| 236 |
+
"""
|
| 237 |
+
Factory method to create a loader configured for local files.
|
| 238 |
+
|
| 239 |
+
Args:
|
| 240 |
+
data_path: Path to local data directory
|
| 241 |
+
|
| 242 |
+
Returns:
|
| 243 |
+
AgriculturalDataLoader configured for local files
|
| 244 |
+
"""
|
| 245 |
+
return cls(
|
| 246 |
+
data_path=data_path,
|
| 247 |
+
use_hf=False
|
| 248 |
+
)
|
|
@@ -15,7 +15,14 @@ from analysis_tools import AgriculturalAnalyzer
|
|
| 15 |
|
| 16 |
|
| 17 |
# Initialize components
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
analyzer = AgriculturalAnalyzer(data_loader)
|
| 20 |
|
| 21 |
# Global state for data
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
# Initialize components
|
| 18 |
+
# Try to use Hugging Face first, fallback to local files
|
| 19 |
+
try:
|
| 20 |
+
data_loader = AgriculturalDataLoader(use_hf=True)
|
| 21 |
+
print("π€ Configured to use Hugging Face dataset with local fallback")
|
| 22 |
+
except:
|
| 23 |
+
data_loader = AgriculturalDataLoader(use_hf=False)
|
| 24 |
+
print("π Configured to use local files only")
|
| 25 |
+
|
| 26 |
analyzer = AgriculturalAnalyzer(data_loader)
|
| 27 |
|
| 28 |
# Global state for data
|
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Example usage of the agricultural data loader with Hugging Face integration.
|
| 4 |
+
Shows different ways to load and use the data.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import warnings
|
| 9 |
+
warnings.filterwarnings('ignore')
|
| 10 |
+
|
| 11 |
+
from data_loader import AgriculturalDataLoader
|
| 12 |
+
from analysis_tools import AgriculturalAnalyzer
|
| 13 |
+
|
| 14 |
+
def example_local_usage():
|
| 15 |
+
"""Example: Load from local files."""
|
| 16 |
+
print("π EXAMPLE 1: Loading from local files")
|
| 17 |
+
print("-" * 40)
|
| 18 |
+
|
| 19 |
+
# Create loader for local files
|
| 20 |
+
loader = AgriculturalDataLoader.create_local_loader(
|
| 21 |
+
data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025"
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Load and analyze data
|
| 25 |
+
df = loader.load_all_files()
|
| 26 |
+
print(f"β
Loaded {len(df):,} records from local files")
|
| 27 |
+
|
| 28 |
+
# Basic analysis
|
| 29 |
+
analyzer = AgriculturalAnalyzer(loader)
|
| 30 |
+
trends = analyzer.analyze_weed_pressure_trends()
|
| 31 |
+
print(f"π Average IFT: {trends['summary']['mean_ift']:.2f}")
|
| 32 |
+
|
| 33 |
+
return df
|
| 34 |
+
|
| 35 |
+
def example_hf_usage():
|
| 36 |
+
"""Example: Load from Hugging Face (if available)."""
|
| 37 |
+
print("\nπ€ EXAMPLE 2: Loading from Hugging Face")
|
| 38 |
+
print("-" * 40)
|
| 39 |
+
|
| 40 |
+
# Check if HF token is available
|
| 41 |
+
if not os.environ.get("HF_TOKEN"):
|
| 42 |
+
print("β οΈ No HF_TOKEN found - skipping HF example")
|
| 43 |
+
print("π‘ Set HF_TOKEN environment variable to use this feature")
|
| 44 |
+
return None
|
| 45 |
+
|
| 46 |
+
try:
|
| 47 |
+
# Create loader for Hugging Face
|
| 48 |
+
loader = AgriculturalDataLoader.create_hf_loader(
|
| 49 |
+
dataset_id="HackathonCRA/2024"
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# Load and analyze data
|
| 53 |
+
df = loader.load_all_files()
|
| 54 |
+
print(f"β
Loaded {len(df):,} records from Hugging Face")
|
| 55 |
+
|
| 56 |
+
# Basic analysis
|
| 57 |
+
analyzer = AgriculturalAnalyzer(loader)
|
| 58 |
+
trends = analyzer.analyze_weed_pressure_trends()
|
| 59 |
+
print(f"π Average IFT: {trends['summary']['mean_ift']:.2f}")
|
| 60 |
+
|
| 61 |
+
return df
|
| 62 |
+
|
| 63 |
+
except Exception as e:
|
| 64 |
+
print(f"β Failed to load from Hugging Face: {e}")
|
| 65 |
+
return None
|
| 66 |
+
|
| 67 |
+
def example_automatic_fallback():
|
| 68 |
+
"""Example: Automatic fallback from HF to local."""
|
| 69 |
+
print("\nπ EXAMPLE 3: Automatic fallback")
|
| 70 |
+
print("-" * 40)
|
| 71 |
+
|
| 72 |
+
# Create loader with HF preferred but local fallback
|
| 73 |
+
loader = AgriculturalDataLoader(
|
| 74 |
+
data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025",
|
| 75 |
+
dataset_id="HackathonCRA/2024",
|
| 76 |
+
use_hf=True # Try HF first
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# This will try HF first, then fallback to local if needed
|
| 80 |
+
df = loader.load_all_files()
|
| 81 |
+
print(f"β
Loaded {len(df):,} records (with automatic source selection)")
|
| 82 |
+
|
| 83 |
+
return df
|
| 84 |
+
|
| 85 |
+
def example_dynamic_switching():
|
| 86 |
+
"""Example: Dynamic switching between sources."""
|
| 87 |
+
print("\nπ EXAMPLE 4: Dynamic source switching")
|
| 88 |
+
print("-" * 40)
|
| 89 |
+
|
| 90 |
+
# Create loader
|
| 91 |
+
loader = AgriculturalDataLoader(
|
| 92 |
+
data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025",
|
| 93 |
+
dataset_id="HackathonCRA/2024"
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# Load from local first
|
| 97 |
+
loader.set_data_source(use_hf=False)
|
| 98 |
+
df_local = loader.load_all_files()
|
| 99 |
+
print(f"π Local source: {len(df_local):,} records")
|
| 100 |
+
|
| 101 |
+
# Switch to HF (if available)
|
| 102 |
+
if os.environ.get("HF_TOKEN"):
|
| 103 |
+
try:
|
| 104 |
+
loader.set_data_source(use_hf=True)
|
| 105 |
+
df_hf = loader.load_all_files()
|
| 106 |
+
print(f"π€ HF source: {len(df_hf):,} records")
|
| 107 |
+
|
| 108 |
+
# Compare
|
| 109 |
+
if len(df_local) == len(df_hf):
|
| 110 |
+
print("β
Data consistency verified")
|
| 111 |
+
else:
|
| 112 |
+
print(f"β οΈ Data mismatch: {abs(len(df_local) - len(df_hf))} record difference")
|
| 113 |
+
|
| 114 |
+
except Exception as e:
|
| 115 |
+
print(f"π€ HF switching failed: {e}")
|
| 116 |
+
else:
|
| 117 |
+
print("β οΈ No HF_TOKEN - skipping HF switch test")
|
| 118 |
+
|
| 119 |
+
return df_local
|
| 120 |
+
|
| 121 |
+
def example_production_deployment():
|
| 122 |
+
"""Example: Production deployment configuration."""
|
| 123 |
+
print("\nπ EXAMPLE 5: Production deployment setup")
|
| 124 |
+
print("-" * 40)
|
| 125 |
+
|
| 126 |
+
# Production configuration
|
| 127 |
+
# This is how you'd set it up for Hugging Face Spaces deployment
|
| 128 |
+
|
| 129 |
+
print("π‘ For Hugging Face Spaces deployment:")
|
| 130 |
+
print("1. Set HF_TOKEN as a Space secret")
|
| 131 |
+
print("2. Configure the loader as follows:")
|
| 132 |
+
print()
|
| 133 |
+
|
| 134 |
+
config_code = '''
|
| 135 |
+
# In your app.py or gradio_app.py
|
| 136 |
+
import os
|
| 137 |
+
from data_loader import AgriculturalDataLoader
|
| 138 |
+
|
| 139 |
+
# Production configuration
|
| 140 |
+
hf_token = os.environ.get("HF_TOKEN")
|
| 141 |
+
dataset_id = "HackathonCRA/2024"
|
| 142 |
+
|
| 143 |
+
if hf_token:
|
| 144 |
+
# Use HF dataset in production
|
| 145 |
+
data_loader = AgriculturalDataLoader.create_hf_loader(
|
| 146 |
+
dataset_id=dataset_id,
|
| 147 |
+
hf_token=hf_token
|
| 148 |
+
)
|
| 149 |
+
print("π€ Using Hugging Face dataset")
|
| 150 |
+
else:
|
| 151 |
+
# Fallback for local development
|
| 152 |
+
data_loader = AgriculturalDataLoader.create_local_loader(
|
| 153 |
+
data_path="./data" # Local data directory
|
| 154 |
+
)
|
| 155 |
+
print("π Using local files")
|
| 156 |
+
'''
|
| 157 |
+
|
| 158 |
+
print(config_code)
|
| 159 |
+
|
| 160 |
+
# Example of actual production setup
|
| 161 |
+
try:
|
| 162 |
+
hf_token = os.environ.get("HF_TOKEN")
|
| 163 |
+
if hf_token:
|
| 164 |
+
loader = AgriculturalDataLoader.create_hf_loader("HackathonCRA/2024", hf_token)
|
| 165 |
+
print("β
Production setup: HF dataset configured")
|
| 166 |
+
else:
|
| 167 |
+
loader = AgriculturalDataLoader.create_local_loader("/Users/tracyandre/Downloads/OneDrive_1_9-17-2025")
|
| 168 |
+
print("β
Development setup: Local files configured")
|
| 169 |
+
|
| 170 |
+
df = loader.load_all_files()
|
| 171 |
+
print(f"π Ready for production: {len(df):,} records available")
|
| 172 |
+
|
| 173 |
+
except Exception as e:
|
| 174 |
+
print(f"β Production setup failed: {e}")
|
| 175 |
+
|
| 176 |
+
def main():
|
| 177 |
+
"""Run all examples."""
|
| 178 |
+
print("π AGRICULTURAL DATA LOADER - USAGE EXAMPLES")
|
| 179 |
+
print("=" * 60)
|
| 180 |
+
|
| 181 |
+
# Run examples
|
| 182 |
+
example_local_usage()
|
| 183 |
+
example_hf_usage()
|
| 184 |
+
example_automatic_fallback()
|
| 185 |
+
example_dynamic_switching()
|
| 186 |
+
example_production_deployment()
|
| 187 |
+
|
| 188 |
+
print("\n" + "=" * 60)
|
| 189 |
+
print("π― SUMMARY")
|
| 190 |
+
print("=" * 60)
|
| 191 |
+
print("""
|
| 192 |
+
The AgriculturalDataLoader now supports:
|
| 193 |
+
|
| 194 |
+
β
Local file loading (CSV/Excel)
|
| 195 |
+
β
Hugging Face dataset loading
|
| 196 |
+
β
Automatic fallback (HF β Local)
|
| 197 |
+
β
Dynamic source switching
|
| 198 |
+
β
Production deployment ready
|
| 199 |
+
|
| 200 |
+
Key benefits:
|
| 201 |
+
π Seamless data source switching
|
| 202 |
+
π Cloud deployment ready
|
| 203 |
+
π Same analysis tools work with both sources
|
| 204 |
+
π§ Easy configuration management
|
| 205 |
+
""")
|
| 206 |
+
|
| 207 |
+
print("π οΈ Next steps:")
|
| 208 |
+
print("1. Upload your dataset to Hugging Face Hub")
|
| 209 |
+
print("2. Set HF_TOKEN environment variable")
|
| 210 |
+
print("3. Deploy to Hugging Face Spaces")
|
| 211 |
+
print("4. Enjoy cloud-based agricultural analysis!")
|
| 212 |
+
|
| 213 |
+
if __name__ == "__main__":
|
| 214 |
+
main()
|
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test script to demonstrate loading data from both local files and Hugging Face.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import warnings
|
| 7 |
+
warnings.filterwarnings('ignore')
|
| 8 |
+
|
| 9 |
+
from data_loader import AgriculturalDataLoader
|
| 10 |
+
import os
|
| 11 |
+
|
| 12 |
+
def test_local_loading():
|
| 13 |
+
"""Test loading from local files."""
|
| 14 |
+
print("π TESTING LOCAL FILE LOADING")
|
| 15 |
+
print("=" * 50)
|
| 16 |
+
|
| 17 |
+
try:
|
| 18 |
+
# Create loader for local files
|
| 19 |
+
loader = AgriculturalDataLoader.create_local_loader(
|
| 20 |
+
data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025"
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# Load data
|
| 24 |
+
df = loader.load_all_files()
|
| 25 |
+
|
| 26 |
+
print(f"β
Local loading successful!")
|
| 27 |
+
print(f"π Records: {len(df):,}")
|
| 28 |
+
print(f"π
Years: {sorted(df['year'].unique())}")
|
| 29 |
+
print(f"π± Crops: {df['crop_type'].nunique()}")
|
| 30 |
+
print(f"π Plots: {df['plot_name'].nunique()}")
|
| 31 |
+
|
| 32 |
+
return True
|
| 33 |
+
|
| 34 |
+
except Exception as e:
|
| 35 |
+
print(f"β Local loading failed: {e}")
|
| 36 |
+
return False
|
| 37 |
+
|
| 38 |
+
def test_hf_loading():
|
| 39 |
+
"""Test loading from Hugging Face."""
|
| 40 |
+
print("\nπ€ TESTING HUGGING FACE LOADING")
|
| 41 |
+
print("=" * 50)
|
| 42 |
+
|
| 43 |
+
# Check if HF token is available
|
| 44 |
+
hf_token = os.environ.get("HF_TOKEN")
|
| 45 |
+
if not hf_token:
|
| 46 |
+
print("β οΈ No HF_TOKEN found in environment variables")
|
| 47 |
+
print("π‘ Set HF_TOKEN to test Hugging Face loading")
|
| 48 |
+
return False
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
# Create loader for Hugging Face
|
| 52 |
+
loader = AgriculturalDataLoader.create_hf_loader(
|
| 53 |
+
dataset_id="HackathonCRA/2024",
|
| 54 |
+
hf_token=hf_token
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
# Load data
|
| 58 |
+
df = loader.load_from_huggingface()
|
| 59 |
+
|
| 60 |
+
print(f"β
Hugging Face loading successful!")
|
| 61 |
+
print(f"π Records: {len(df):,}")
|
| 62 |
+
print(f"π
Years: {sorted(df['year'].unique())}")
|
| 63 |
+
print(f"π± Crops: {df['crop_type'].nunique()}")
|
| 64 |
+
print(f"π Plots: {df['plot_name'].nunique()}")
|
| 65 |
+
|
| 66 |
+
return True
|
| 67 |
+
|
| 68 |
+
except Exception as e:
|
| 69 |
+
print(f"β Hugging Face loading failed: {e}")
|
| 70 |
+
print("π‘ Make sure the dataset exists and you have access")
|
| 71 |
+
return False
|
| 72 |
+
|
| 73 |
+
def test_auto_fallback():
|
| 74 |
+
"""Test automatic fallback from HF to local files."""
|
| 75 |
+
print("\nπ TESTING AUTO FALLBACK (HF -> LOCAL)")
|
| 76 |
+
print("=" * 50)
|
| 77 |
+
|
| 78 |
+
try:
|
| 79 |
+
# Create loader with HF enabled but potentially failing
|
| 80 |
+
loader = AgriculturalDataLoader(
|
| 81 |
+
data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025",
|
| 82 |
+
dataset_id="nonexistent-dataset", # This should fail
|
| 83 |
+
use_hf=True
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
# This should try HF first, then fallback to local
|
| 87 |
+
df = loader.load_all_files()
|
| 88 |
+
|
| 89 |
+
print(f"β
Auto fallback successful!")
|
| 90 |
+
print(f"π Records: {len(df):,}")
|
| 91 |
+
print("π Successfully fell back to local files after HF failure")
|
| 92 |
+
|
| 93 |
+
return True
|
| 94 |
+
|
| 95 |
+
except Exception as e:
|
| 96 |
+
print(f"β Auto fallback failed: {e}")
|
| 97 |
+
return False
|
| 98 |
+
|
| 99 |
+
def test_data_source_switching():
|
| 100 |
+
"""Test switching between data sources."""
|
| 101 |
+
print("\nπ TESTING DATA SOURCE SWITCHING")
|
| 102 |
+
print("=" * 50)
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
# Create loader
|
| 106 |
+
loader = AgriculturalDataLoader(
|
| 107 |
+
data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025",
|
| 108 |
+
dataset_id="HackathonCRA/2024"
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
# Test local loading
|
| 112 |
+
loader.set_data_source(use_hf=False)
|
| 113 |
+
df_local = loader.load_all_files()
|
| 114 |
+
print(f"π Local: {len(df_local):,} records")
|
| 115 |
+
|
| 116 |
+
# Test switching to HF (if token available)
|
| 117 |
+
if os.environ.get("HF_TOKEN"):
|
| 118 |
+
loader.set_data_source(use_hf=True)
|
| 119 |
+
try:
|
| 120 |
+
df_hf = loader.load_all_files()
|
| 121 |
+
print(f"π€ HF: {len(df_hf):,} records")
|
| 122 |
+
|
| 123 |
+
# Compare data
|
| 124 |
+
if len(df_local) == len(df_hf):
|
| 125 |
+
print("β
Data consistency: Same number of records")
|
| 126 |
+
else:
|
| 127 |
+
print(f"β οΈ Data difference: Local={len(df_local)}, HF={len(df_hf)}")
|
| 128 |
+
|
| 129 |
+
except Exception as e:
|
| 130 |
+
print(f"π€ HF loading failed (expected): {e}")
|
| 131 |
+
else:
|
| 132 |
+
print("β οΈ No HF_TOKEN - skipping HF test")
|
| 133 |
+
|
| 134 |
+
return True
|
| 135 |
+
|
| 136 |
+
except Exception as e:
|
| 137 |
+
print(f"β Data source switching failed: {e}")
|
| 138 |
+
return False
|
| 139 |
+
|
| 140 |
+
def main():
|
| 141 |
+
"""Run all tests."""
|
| 142 |
+
print("π AGRICULTURAL DATA LOADER TESTING")
|
| 143 |
+
print("=" * 60)
|
| 144 |
+
print()
|
| 145 |
+
|
| 146 |
+
results = []
|
| 147 |
+
|
| 148 |
+
# Test 1: Local loading
|
| 149 |
+
results.append(("Local Loading", test_local_loading()))
|
| 150 |
+
|
| 151 |
+
# Test 2: Hugging Face loading
|
| 152 |
+
results.append(("HF Loading", test_hf_loading()))
|
| 153 |
+
|
| 154 |
+
# Test 3: Auto fallback
|
| 155 |
+
results.append(("Auto Fallback", test_auto_fallback()))
|
| 156 |
+
|
| 157 |
+
# Test 4: Data source switching
|
| 158 |
+
results.append(("Source Switching", test_data_source_switching()))
|
| 159 |
+
|
| 160 |
+
# Summary
|
| 161 |
+
print("\nπ TEST SUMMARY")
|
| 162 |
+
print("=" * 30)
|
| 163 |
+
|
| 164 |
+
passed = 0
|
| 165 |
+
for test_name, result in results:
|
| 166 |
+
status = "β
PASS" if result else "β FAIL"
|
| 167 |
+
print(f"{test_name:<20} {status}")
|
| 168 |
+
if result:
|
| 169 |
+
passed += 1
|
| 170 |
+
|
| 171 |
+
print(f"\nπ― Results: {passed}/{len(results)} tests passed")
|
| 172 |
+
|
| 173 |
+
if passed == len(results):
|
| 174 |
+
print("π All tests passed! Data loader is working correctly.")
|
| 175 |
+
else:
|
| 176 |
+
print("β οΈ Some tests failed. Check the output above for details.")
|
| 177 |
+
|
| 178 |
+
print("\nπ‘ Usage Examples:")
|
| 179 |
+
print("# Load from local files:")
|
| 180 |
+
print("loader = AgriculturalDataLoader.create_local_loader('/path/to/data')")
|
| 181 |
+
print()
|
| 182 |
+
print("# Load from Hugging Face:")
|
| 183 |
+
print("loader = AgriculturalDataLoader.create_hf_loader('HackathonCRA/2024')")
|
| 184 |
+
print()
|
| 185 |
+
print("# Auto-detect with fallback:")
|
| 186 |
+
print("loader = AgriculturalDataLoader(use_hf=True)")
|
| 187 |
+
print("df = loader.load_all_files() # Tries HF first, falls back to local")
|
| 188 |
+
|
| 189 |
+
if __name__ == "__main__":
|
| 190 |
+
main()
|