#!/usr/bin/env python3 """ Test script to validate Hugging Face only loading. """ import os import warnings warnings.filterwarnings('ignore') def test_hf_only_loading(): """Test that the loader only works with Hugging Face.""" print("šŸ¤— TESTING HUGGING FACE ONLY LOADING") print("=" * 50) from data_loader import AgriculturalDataLoader # Check if HF token is available hf_token = os.environ.get("HF_TOKEN") if not hf_token: print("āš ļø No HF_TOKEN found in environment variables") print("šŸ’” Set HF_TOKEN to test Hugging Face loading") print("šŸ”§ For this test, we'll try without token (may fail)") try: # Create loader (HF only) loader = AgriculturalDataLoader( dataset_id="HackathonCRA/2024", hf_token=hf_token ) print(f"šŸ¤— Attempting to load from dataset: {loader.dataset_id}") # Load data df = loader.load_all_files() print(f"āœ… Success! Loaded {len(df):,} records from Hugging Face") print(f"šŸ“Š Years: {sorted(df['year'].unique())}") print(f"🌱 Crops: {df['crop_type'].nunique()}") print(f"šŸ“ Plots: {df['plot_name'].nunique()}") print(f"šŸ’Š Herbicide applications: {df['is_herbicide'].sum()}") return True except Exception as e: print(f"āŒ Failed to load from Hugging Face: {e}") print("šŸ’” This is expected if the dataset doesn't exist yet") print("šŸ”§ Make sure to upload your dataset to HF Hub first") return False def test_no_local_fallback(): """Test that there's no local fallback.""" print("\n🚫 TESTING NO LOCAL FALLBACK") print("=" * 50) from data_loader import AgriculturalDataLoader try: # Create loader with non-existent dataset loader = AgriculturalDataLoader( dataset_id="nonexistent/dataset" ) # This should fail without falling back to local df = loader.load_all_files() print(f"āŒ Unexpected success - loaded {len(df)} records") print("āš ļø This suggests local fallback is still active") return False except Exception as e: print(f"āœ… Expected failure: {e}") print("āœ… Confirmed: No local fallback, HF only") return True def test_simple_usage(): """Test simple usage pattern.""" print("\nšŸ“ SIMPLE USAGE EXAMPLE") print("=" * 50) print("šŸ’” Recommended usage pattern:") print() usage_code = ''' from data_loader import AgriculturalDataLoader # Simple HF-only loader loader = AgriculturalDataLoader(dataset_id="HackathonCRA/2024") # Load data (will use HF_TOKEN from environment) df = loader.load_all_files() # Analyze data print(f"Loaded {len(df)} records from Hugging Face") ''' print(usage_code) try: from data_loader import AgriculturalDataLoader loader = AgriculturalDataLoader(dataset_id="HackathonCRA/2024") print("āœ… Loader created successfully") print(f"šŸŽÆ Target dataset: {loader.dataset_id}") print(f"šŸ”‘ Using token: {'Yes' if loader.hf_token else 'No (from env)'}") return True except Exception as e: print(f"āŒ Failed to create loader: {e}") return False def main(): """Run all tests.""" print("🚜 HUGGING FACE ONLY - VALIDATION TESTS") print("=" * 60) print() results = [] # Test 1: HF loading results.append(("HF Only Loading", test_hf_only_loading())) # Test 2: No local fallback results.append(("No Local Fallback", test_no_local_fallback())) # Test 3: Simple usage results.append(("Simple Usage", test_simple_usage())) # Summary print("\nšŸ“‹ TEST SUMMARY") print("=" * 30) passed = 0 for test_name, result in results: status = "āœ… PASS" if result else "āŒ FAIL" print(f"{test_name:<20} {status}") if result: passed += 1 print(f"\nšŸŽÆ Results: {passed}/{len(results)} tests passed") if passed >= 2: # Allow HF loading to fail if dataset doesn't exist print("šŸŽ‰ Validation successful! Loader is HF-only.") else: print("āš ļø Validation issues detected.") print("\nšŸš€ DEPLOYMENT CHECKLIST:") print("āœ… Remove local file dependencies") print("āœ… HF-only data loading") print("āœ… No fallback mechanisms") print("šŸ”² Upload dataset to HF Hub") print("šŸ”² Set HF_TOKEN in production") print("šŸ”² Test with real HF dataset") if __name__ == "__main__": main()