Spaces:
Sleeping
Sleeping
File size: 6,096 Bytes
5977115 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
#!/usr/bin/env python3
"""
Test script to demonstrate loading data from both local files and Hugging Face.
"""
import warnings
warnings.filterwarnings('ignore')
from data_loader import AgriculturalDataLoader
import os
def test_local_loading():
"""Test loading from local files."""
print("π TESTING LOCAL FILE LOADING")
print("=" * 50)
try:
# Create loader for local files
loader = AgriculturalDataLoader.create_local_loader(
data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025"
)
# Load data
df = loader.load_all_files()
print(f"β
Local loading successful!")
print(f"π Records: {len(df):,}")
print(f"π
Years: {sorted(df['year'].unique())}")
print(f"π± Crops: {df['crop_type'].nunique()}")
print(f"π Plots: {df['plot_name'].nunique()}")
return True
except Exception as e:
print(f"β Local loading failed: {e}")
return False
def test_hf_loading():
"""Test loading from Hugging Face."""
print("\nπ€ TESTING HUGGING FACE LOADING")
print("=" * 50)
# Check if HF token is available
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
print("β οΈ No HF_TOKEN found in environment variables")
print("π‘ Set HF_TOKEN to test Hugging Face loading")
return False
try:
# Create loader for Hugging Face
loader = AgriculturalDataLoader.create_hf_loader(
dataset_id="HackathonCRA/2024",
hf_token=hf_token
)
# Load data
df = loader.load_from_huggingface()
print(f"β
Hugging Face loading successful!")
print(f"π Records: {len(df):,}")
print(f"π
Years: {sorted(df['year'].unique())}")
print(f"π± Crops: {df['crop_type'].nunique()}")
print(f"π Plots: {df['plot_name'].nunique()}")
return True
except Exception as e:
print(f"β Hugging Face loading failed: {e}")
print("π‘ Make sure the dataset exists and you have access")
return False
def test_auto_fallback():
"""Test automatic fallback from HF to local files."""
print("\nπ TESTING AUTO FALLBACK (HF -> LOCAL)")
print("=" * 50)
try:
# Create loader with HF enabled but potentially failing
loader = AgriculturalDataLoader(
data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025",
dataset_id="nonexistent-dataset", # This should fail
use_hf=True
)
# This should try HF first, then fallback to local
df = loader.load_all_files()
print(f"β
Auto fallback successful!")
print(f"π Records: {len(df):,}")
print("π Successfully fell back to local files after HF failure")
return True
except Exception as e:
print(f"β Auto fallback failed: {e}")
return False
def test_data_source_switching():
"""Test switching between data sources."""
print("\nπ TESTING DATA SOURCE SWITCHING")
print("=" * 50)
try:
# Create loader
loader = AgriculturalDataLoader(
data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025",
dataset_id="HackathonCRA/2024"
)
# Test local loading
loader.set_data_source(use_hf=False)
df_local = loader.load_all_files()
print(f"π Local: {len(df_local):,} records")
# Test switching to HF (if token available)
if os.environ.get("HF_TOKEN"):
loader.set_data_source(use_hf=True)
try:
df_hf = loader.load_all_files()
print(f"π€ HF: {len(df_hf):,} records")
# Compare data
if len(df_local) == len(df_hf):
print("β
Data consistency: Same number of records")
else:
print(f"β οΈ Data difference: Local={len(df_local)}, HF={len(df_hf)}")
except Exception as e:
print(f"π€ HF loading failed (expected): {e}")
else:
print("β οΈ No HF_TOKEN - skipping HF test")
return True
except Exception as e:
print(f"β Data source switching failed: {e}")
return False
def main():
"""Run all tests."""
print("π AGRICULTURAL DATA LOADER TESTING")
print("=" * 60)
print()
results = []
# Test 1: Local loading
results.append(("Local Loading", test_local_loading()))
# Test 2: Hugging Face loading
results.append(("HF Loading", test_hf_loading()))
# Test 3: Auto fallback
results.append(("Auto Fallback", test_auto_fallback()))
# Test 4: Data source switching
results.append(("Source Switching", test_data_source_switching()))
# Summary
print("\nπ TEST SUMMARY")
print("=" * 30)
passed = 0
for test_name, result in results:
status = "β
PASS" if result else "β FAIL"
print(f"{test_name:<20} {status}")
if result:
passed += 1
print(f"\nπ― Results: {passed}/{len(results)} tests passed")
if passed == len(results):
print("π All tests passed! Data loader is working correctly.")
else:
print("β οΈ Some tests failed. Check the output above for details.")
print("\nπ‘ Usage Examples:")
print("# Load from local files:")
print("loader = AgriculturalDataLoader.create_local_loader('/path/to/data')")
print()
print("# Load from Hugging Face:")
print("loader = AgriculturalDataLoader.create_hf_loader('HackathonCRA/2024')")
print()
print("# Auto-detect with fallback:")
print("loader = AgriculturalDataLoader(use_hf=True)")
print("df = loader.load_all_files() # Tries HF first, falls back to local")
if __name__ == "__main__":
main()
|