Spaces:
Sleeping
Sleeping
File size: 4,732 Bytes
8ecf5f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
#!/usr/bin/env python3
"""
Test script to validate Hugging Face only loading.
"""
import os
import warnings
warnings.filterwarnings('ignore')
def test_hf_only_loading():
"""Test that the loader only works with Hugging Face."""
print("π€ TESTING HUGGING FACE ONLY LOADING")
print("=" * 50)
from data_loader import AgriculturalDataLoader
# Check if HF token is available
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
print("β οΈ No HF_TOKEN found in environment variables")
print("π‘ Set HF_TOKEN to test Hugging Face loading")
print("π§ For this test, we'll try without token (may fail)")
try:
# Create loader (HF only)
loader = AgriculturalDataLoader(
dataset_id="HackathonCRA/2024",
hf_token=hf_token
)
print(f"π€ Attempting to load from dataset: {loader.dataset_id}")
# Load data
df = loader.load_all_files()
print(f"β
Success! Loaded {len(df):,} records from Hugging Face")
print(f"π Years: {sorted(df['year'].unique())}")
print(f"π± Crops: {df['crop_type'].nunique()}")
print(f"π Plots: {df['plot_name'].nunique()}")
print(f"π Herbicide applications: {df['is_herbicide'].sum()}")
return True
except Exception as e:
print(f"β Failed to load from Hugging Face: {e}")
print("π‘ This is expected if the dataset doesn't exist yet")
print("π§ Make sure to upload your dataset to HF Hub first")
return False
def test_no_local_fallback():
"""Test that there's no local fallback."""
print("\nπ« TESTING NO LOCAL FALLBACK")
print("=" * 50)
from data_loader import AgriculturalDataLoader
try:
# Create loader with non-existent dataset
loader = AgriculturalDataLoader(
dataset_id="nonexistent/dataset"
)
# This should fail without falling back to local
df = loader.load_all_files()
print(f"β Unexpected success - loaded {len(df)} records")
print("β οΈ This suggests local fallback is still active")
return False
except Exception as e:
print(f"β
Expected failure: {e}")
print("β
Confirmed: No local fallback, HF only")
return True
def test_simple_usage():
"""Test simple usage pattern."""
print("\nπ SIMPLE USAGE EXAMPLE")
print("=" * 50)
print("π‘ Recommended usage pattern:")
print()
usage_code = '''
from data_loader import AgriculturalDataLoader
# Simple HF-only loader
loader = AgriculturalDataLoader(dataset_id="HackathonCRA/2024")
# Load data (will use HF_TOKEN from environment)
df = loader.load_all_files()
# Analyze data
print(f"Loaded {len(df)} records from Hugging Face")
'''
print(usage_code)
try:
from data_loader import AgriculturalDataLoader
loader = AgriculturalDataLoader(dataset_id="HackathonCRA/2024")
print("β
Loader created successfully")
print(f"π― Target dataset: {loader.dataset_id}")
print(f"π Using token: {'Yes' if loader.hf_token else 'No (from env)'}")
return True
except Exception as e:
print(f"β Failed to create loader: {e}")
return False
def main():
"""Run all tests."""
print("π HUGGING FACE ONLY - VALIDATION TESTS")
print("=" * 60)
print()
results = []
# Test 1: HF loading
results.append(("HF Only Loading", test_hf_only_loading()))
# Test 2: No local fallback
results.append(("No Local Fallback", test_no_local_fallback()))
# Test 3: Simple usage
results.append(("Simple Usage", test_simple_usage()))
# Summary
print("\nπ TEST SUMMARY")
print("=" * 30)
passed = 0
for test_name, result in results:
status = "β
PASS" if result else "β FAIL"
print(f"{test_name:<20} {status}")
if result:
passed += 1
print(f"\nπ― Results: {passed}/{len(results)} tests passed")
if passed >= 2: # Allow HF loading to fail if dataset doesn't exist
print("π Validation successful! Loader is HF-only.")
else:
print("β οΈ Validation issues detected.")
print("\nπ DEPLOYMENT CHECKLIST:")
print("β
Remove local file dependencies")
print("β
HF-only data loading")
print("β
No fallback mechanisms")
print("π² Upload dataset to HF Hub")
print("π² Set HF_TOKEN in production")
print("π² Test with real HF dataset")
if __name__ == "__main__":
main()
|