mcp / test_data_sources.py
Tracy AndrΓ©
Add Hugging Face dataset integration
5977115
raw
history blame
6.1 kB
#!/usr/bin/env python3
"""
Test script to demonstrate loading data from both local files and Hugging Face.
"""
import warnings
warnings.filterwarnings('ignore')
from data_loader import AgriculturalDataLoader
import os
def test_local_loading():
"""Test loading from local files."""
print("πŸ” TESTING LOCAL FILE LOADING")
print("=" * 50)
try:
# Create loader for local files
loader = AgriculturalDataLoader.create_local_loader(
data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025"
)
# Load data
df = loader.load_all_files()
print(f"βœ… Local loading successful!")
print(f"πŸ“Š Records: {len(df):,}")
print(f"πŸ“… Years: {sorted(df['year'].unique())}")
print(f"🌱 Crops: {df['crop_type'].nunique()}")
print(f"πŸ“ Plots: {df['plot_name'].nunique()}")
return True
except Exception as e:
print(f"❌ Local loading failed: {e}")
return False
def test_hf_loading():
"""Test loading from Hugging Face."""
print("\nπŸ€— TESTING HUGGING FACE LOADING")
print("=" * 50)
# Check if HF token is available
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
print("⚠️ No HF_TOKEN found in environment variables")
print("πŸ’‘ Set HF_TOKEN to test Hugging Face loading")
return False
try:
# Create loader for Hugging Face
loader = AgriculturalDataLoader.create_hf_loader(
dataset_id="HackathonCRA/2024",
hf_token=hf_token
)
# Load data
df = loader.load_from_huggingface()
print(f"βœ… Hugging Face loading successful!")
print(f"πŸ“Š Records: {len(df):,}")
print(f"πŸ“… Years: {sorted(df['year'].unique())}")
print(f"🌱 Crops: {df['crop_type'].nunique()}")
print(f"πŸ“ Plots: {df['plot_name'].nunique()}")
return True
except Exception as e:
print(f"❌ Hugging Face loading failed: {e}")
print("πŸ’‘ Make sure the dataset exists and you have access")
return False
def test_auto_fallback():
"""Test automatic fallback from HF to local files."""
print("\nπŸ”„ TESTING AUTO FALLBACK (HF -> LOCAL)")
print("=" * 50)
try:
# Create loader with HF enabled but potentially failing
loader = AgriculturalDataLoader(
data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025",
dataset_id="nonexistent-dataset", # This should fail
use_hf=True
)
# This should try HF first, then fallback to local
df = loader.load_all_files()
print(f"βœ… Auto fallback successful!")
print(f"πŸ“Š Records: {len(df):,}")
print("πŸ”„ Successfully fell back to local files after HF failure")
return True
except Exception as e:
print(f"❌ Auto fallback failed: {e}")
return False
def test_data_source_switching():
"""Test switching between data sources."""
print("\nπŸ”€ TESTING DATA SOURCE SWITCHING")
print("=" * 50)
try:
# Create loader
loader = AgriculturalDataLoader(
data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025",
dataset_id="HackathonCRA/2024"
)
# Test local loading
loader.set_data_source(use_hf=False)
df_local = loader.load_all_files()
print(f"πŸ“ Local: {len(df_local):,} records")
# Test switching to HF (if token available)
if os.environ.get("HF_TOKEN"):
loader.set_data_source(use_hf=True)
try:
df_hf = loader.load_all_files()
print(f"πŸ€— HF: {len(df_hf):,} records")
# Compare data
if len(df_local) == len(df_hf):
print("βœ… Data consistency: Same number of records")
else:
print(f"⚠️ Data difference: Local={len(df_local)}, HF={len(df_hf)}")
except Exception as e:
print(f"πŸ€— HF loading failed (expected): {e}")
else:
print("⚠️ No HF_TOKEN - skipping HF test")
return True
except Exception as e:
print(f"❌ Data source switching failed: {e}")
return False
def main():
"""Run all tests."""
print("🚜 AGRICULTURAL DATA LOADER TESTING")
print("=" * 60)
print()
results = []
# Test 1: Local loading
results.append(("Local Loading", test_local_loading()))
# Test 2: Hugging Face loading
results.append(("HF Loading", test_hf_loading()))
# Test 3: Auto fallback
results.append(("Auto Fallback", test_auto_fallback()))
# Test 4: Data source switching
results.append(("Source Switching", test_data_source_switching()))
# Summary
print("\nπŸ“‹ TEST SUMMARY")
print("=" * 30)
passed = 0
for test_name, result in results:
status = "βœ… PASS" if result else "❌ FAIL"
print(f"{test_name:<20} {status}")
if result:
passed += 1
print(f"\n🎯 Results: {passed}/{len(results)} tests passed")
if passed == len(results):
print("πŸŽ‰ All tests passed! Data loader is working correctly.")
else:
print("⚠️ Some tests failed. Check the output above for details.")
print("\nπŸ’‘ Usage Examples:")
print("# Load from local files:")
print("loader = AgriculturalDataLoader.create_local_loader('/path/to/data')")
print()
print("# Load from Hugging Face:")
print("loader = AgriculturalDataLoader.create_hf_loader('HackathonCRA/2024')")
print()
print("# Auto-detect with fallback:")
print("loader = AgriculturalDataLoader(use_hf=True)")
print("df = loader.load_all_files() # Tries HF first, falls back to local")
if __name__ == "__main__":
main()