mcp / test_hf_only.py
Tracy AndrΓ©
Remove local file loading - HF dataset only
8ecf5f5
raw
history blame
4.73 kB
#!/usr/bin/env python3
"""
Test script to validate Hugging Face only loading.
"""
import os
import warnings
warnings.filterwarnings('ignore')
def test_hf_only_loading():
"""Test that the loader only works with Hugging Face."""
print("πŸ€— TESTING HUGGING FACE ONLY LOADING")
print("=" * 50)
from data_loader import AgriculturalDataLoader
# Check if HF token is available
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
print("⚠️ No HF_TOKEN found in environment variables")
print("πŸ’‘ Set HF_TOKEN to test Hugging Face loading")
print("πŸ”§ For this test, we'll try without token (may fail)")
try:
# Create loader (HF only)
loader = AgriculturalDataLoader(
dataset_id="HackathonCRA/2024",
hf_token=hf_token
)
print(f"πŸ€— Attempting to load from dataset: {loader.dataset_id}")
# Load data
df = loader.load_all_files()
print(f"βœ… Success! Loaded {len(df):,} records from Hugging Face")
print(f"πŸ“Š Years: {sorted(df['year'].unique())}")
print(f"🌱 Crops: {df['crop_type'].nunique()}")
print(f"πŸ“ Plots: {df['plot_name'].nunique()}")
print(f"πŸ’Š Herbicide applications: {df['is_herbicide'].sum()}")
return True
except Exception as e:
print(f"❌ Failed to load from Hugging Face: {e}")
print("πŸ’‘ This is expected if the dataset doesn't exist yet")
print("πŸ”§ Make sure to upload your dataset to HF Hub first")
return False
def test_no_local_fallback():
"""Test that there's no local fallback."""
print("\n🚫 TESTING NO LOCAL FALLBACK")
print("=" * 50)
from data_loader import AgriculturalDataLoader
try:
# Create loader with non-existent dataset
loader = AgriculturalDataLoader(
dataset_id="nonexistent/dataset"
)
# This should fail without falling back to local
df = loader.load_all_files()
print(f"❌ Unexpected success - loaded {len(df)} records")
print("⚠️ This suggests local fallback is still active")
return False
except Exception as e:
print(f"βœ… Expected failure: {e}")
print("βœ… Confirmed: No local fallback, HF only")
return True
def test_simple_usage():
"""Test simple usage pattern."""
print("\nπŸ“ SIMPLE USAGE EXAMPLE")
print("=" * 50)
print("πŸ’‘ Recommended usage pattern:")
print()
usage_code = '''
from data_loader import AgriculturalDataLoader
# Simple HF-only loader
loader = AgriculturalDataLoader(dataset_id="HackathonCRA/2024")
# Load data (will use HF_TOKEN from environment)
df = loader.load_all_files()
# Analyze data
print(f"Loaded {len(df)} records from Hugging Face")
'''
print(usage_code)
try:
from data_loader import AgriculturalDataLoader
loader = AgriculturalDataLoader(dataset_id="HackathonCRA/2024")
print("βœ… Loader created successfully")
print(f"🎯 Target dataset: {loader.dataset_id}")
print(f"πŸ”‘ Using token: {'Yes' if loader.hf_token else 'No (from env)'}")
return True
except Exception as e:
print(f"❌ Failed to create loader: {e}")
return False
def main():
"""Run all tests."""
print("🚜 HUGGING FACE ONLY - VALIDATION TESTS")
print("=" * 60)
print()
results = []
# Test 1: HF loading
results.append(("HF Only Loading", test_hf_only_loading()))
# Test 2: No local fallback
results.append(("No Local Fallback", test_no_local_fallback()))
# Test 3: Simple usage
results.append(("Simple Usage", test_simple_usage()))
# Summary
print("\nπŸ“‹ TEST SUMMARY")
print("=" * 30)
passed = 0
for test_name, result in results:
status = "βœ… PASS" if result else "❌ FAIL"
print(f"{test_name:<20} {status}")
if result:
passed += 1
print(f"\n🎯 Results: {passed}/{len(results)} tests passed")
if passed >= 2: # Allow HF loading to fail if dataset doesn't exist
print("πŸŽ‰ Validation successful! Loader is HF-only.")
else:
print("⚠️ Validation issues detected.")
print("\nπŸš€ DEPLOYMENT CHECKLIST:")
print("βœ… Remove local file dependencies")
print("βœ… HF-only data loading")
print("βœ… No fallback mechanisms")
print("πŸ”² Upload dataset to HF Hub")
print("πŸ”² Set HF_TOKEN in production")
print("πŸ”² Test with real HF dataset")
if __name__ == "__main__":
main()