File size: 4,732 Bytes
8ecf5f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env python3
"""
Test script to validate Hugging Face only loading.
"""

import os
import warnings
warnings.filterwarnings('ignore')

def test_hf_only_loading():
    """Test that the loader only works with Hugging Face."""
    print("πŸ€— TESTING HUGGING FACE ONLY LOADING")
    print("=" * 50)
    
    from data_loader import AgriculturalDataLoader
    
    # Check if HF token is available
    hf_token = os.environ.get("HF_TOKEN")
    if not hf_token:
        print("⚠️  No HF_TOKEN found in environment variables")
        print("πŸ’‘ Set HF_TOKEN to test Hugging Face loading")
        print("πŸ”§ For this test, we'll try without token (may fail)")
    
    try:
        # Create loader (HF only)
        loader = AgriculturalDataLoader(
            dataset_id="HackathonCRA/2024",
            hf_token=hf_token
        )
        
        print(f"πŸ€— Attempting to load from dataset: {loader.dataset_id}")
        
        # Load data
        df = loader.load_all_files()
        
        print(f"βœ… Success! Loaded {len(df):,} records from Hugging Face")
        print(f"πŸ“Š Years: {sorted(df['year'].unique())}")
        print(f"🌱 Crops: {df['crop_type'].nunique()}")
        print(f"πŸ“ Plots: {df['plot_name'].nunique()}")
        print(f"πŸ’Š Herbicide applications: {df['is_herbicide'].sum()}")
        
        return True
        
    except Exception as e:
        print(f"❌ Failed to load from Hugging Face: {e}")
        print("πŸ’‘ This is expected if the dataset doesn't exist yet")
        print("πŸ”§ Make sure to upload your dataset to HF Hub first")
        return False

def test_no_local_fallback():
    """Test that there's no local fallback."""
    print("\n🚫 TESTING NO LOCAL FALLBACK")
    print("=" * 50)
    
    from data_loader import AgriculturalDataLoader
    
    try:
        # Create loader with non-existent dataset
        loader = AgriculturalDataLoader(
            dataset_id="nonexistent/dataset"
        )
        
        # This should fail without falling back to local
        df = loader.load_all_files()
        
        print(f"❌ Unexpected success - loaded {len(df)} records")
        print("⚠️  This suggests local fallback is still active")
        return False
        
    except Exception as e:
        print(f"βœ… Expected failure: {e}")
        print("βœ… Confirmed: No local fallback, HF only")
        return True

def test_simple_usage():
    """Test simple usage pattern."""
    print("\nπŸ“ SIMPLE USAGE EXAMPLE")
    print("=" * 50)
    
    print("πŸ’‘ Recommended usage pattern:")
    print()
    
    usage_code = '''
from data_loader import AgriculturalDataLoader

# Simple HF-only loader
loader = AgriculturalDataLoader(dataset_id="HackathonCRA/2024")

# Load data (will use HF_TOKEN from environment)
df = loader.load_all_files()

# Analyze data
print(f"Loaded {len(df)} records from Hugging Face")
'''
    
    print(usage_code)
    
    try:
        from data_loader import AgriculturalDataLoader
        loader = AgriculturalDataLoader(dataset_id="HackathonCRA/2024")
        print("βœ… Loader created successfully")
        print(f"🎯 Target dataset: {loader.dataset_id}")
        print(f"πŸ”‘ Using token: {'Yes' if loader.hf_token else 'No (from env)'}")
        
        return True
        
    except Exception as e:
        print(f"❌ Failed to create loader: {e}")
        return False

def main():
    """Run all tests."""
    print("🚜 HUGGING FACE ONLY - VALIDATION TESTS")
    print("=" * 60)
    print()
    
    results = []
    
    # Test 1: HF loading
    results.append(("HF Only Loading", test_hf_only_loading()))
    
    # Test 2: No local fallback
    results.append(("No Local Fallback", test_no_local_fallback()))
    
    # Test 3: Simple usage
    results.append(("Simple Usage", test_simple_usage()))
    
    # Summary
    print("\nπŸ“‹ TEST SUMMARY")
    print("=" * 30)
    
    passed = 0
    for test_name, result in results:
        status = "βœ… PASS" if result else "❌ FAIL"
        print(f"{test_name:<20} {status}")
        if result:
            passed += 1
    
    print(f"\n🎯 Results: {passed}/{len(results)} tests passed")
    
    if passed >= 2:  # Allow HF loading to fail if dataset doesn't exist
        print("πŸŽ‰ Validation successful! Loader is HF-only.")
    else:
        print("⚠️  Validation issues detected.")
    
    print("\nπŸš€ DEPLOYMENT CHECKLIST:")
    print("βœ… Remove local file dependencies")
    print("βœ… HF-only data loading")
    print("βœ… No fallback mechanisms")
    print("πŸ”² Upload dataset to HF Hub")
    print("πŸ”² Set HF_TOKEN in production")
    print("πŸ”² Test with real HF dataset")

if __name__ == "__main__":
    main()