File size: 6,096 Bytes
5977115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!/usr/bin/env python3
"""
Test script to demonstrate loading data from both local files and Hugging Face.
"""

import warnings
warnings.filterwarnings('ignore')

from data_loader import AgriculturalDataLoader
import os

def test_local_loading():
    """Test loading from local files."""
    print("πŸ” TESTING LOCAL FILE LOADING")
    print("=" * 50)
    
    try:
        # Create loader for local files
        loader = AgriculturalDataLoader.create_local_loader(
            data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025"
        )
        
        # Load data
        df = loader.load_all_files()
        
        print(f"βœ… Local loading successful!")
        print(f"πŸ“Š Records: {len(df):,}")
        print(f"πŸ“… Years: {sorted(df['year'].unique())}")
        print(f"🌱 Crops: {df['crop_type'].nunique()}")
        print(f"πŸ“ Plots: {df['plot_name'].nunique()}")
        
        return True
        
    except Exception as e:
        print(f"❌ Local loading failed: {e}")
        return False

def test_hf_loading():
    """Test loading from Hugging Face."""
    print("\nπŸ€— TESTING HUGGING FACE LOADING")
    print("=" * 50)
    
    # Check if HF token is available
    hf_token = os.environ.get("HF_TOKEN")
    if not hf_token:
        print("⚠️  No HF_TOKEN found in environment variables")
        print("πŸ’‘ Set HF_TOKEN to test Hugging Face loading")
        return False
    
    try:
        # Create loader for Hugging Face
        loader = AgriculturalDataLoader.create_hf_loader(
            dataset_id="HackathonCRA/2024",
            hf_token=hf_token
        )
        
        # Load data
        df = loader.load_from_huggingface()
        
        print(f"βœ… Hugging Face loading successful!")
        print(f"πŸ“Š Records: {len(df):,}")
        print(f"πŸ“… Years: {sorted(df['year'].unique())}")
        print(f"🌱 Crops: {df['crop_type'].nunique()}")
        print(f"πŸ“ Plots: {df['plot_name'].nunique()}")
        
        return True
        
    except Exception as e:
        print(f"❌ Hugging Face loading failed: {e}")
        print("πŸ’‘ Make sure the dataset exists and you have access")
        return False

def test_auto_fallback():
    """Test automatic fallback from HF to local files."""
    print("\nπŸ”„ TESTING AUTO FALLBACK (HF -> LOCAL)")
    print("=" * 50)
    
    try:
        # Create loader with HF enabled but potentially failing
        loader = AgriculturalDataLoader(
            data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025",
            dataset_id="nonexistent-dataset",  # This should fail
            use_hf=True
        )
        
        # This should try HF first, then fallback to local
        df = loader.load_all_files()
        
        print(f"βœ… Auto fallback successful!")
        print(f"πŸ“Š Records: {len(df):,}")
        print("πŸ”„ Successfully fell back to local files after HF failure")
        
        return True
        
    except Exception as e:
        print(f"❌ Auto fallback failed: {e}")
        return False

def test_data_source_switching():
    """Test switching between data sources."""
    print("\nπŸ”€ TESTING DATA SOURCE SWITCHING")
    print("=" * 50)
    
    try:
        # Create loader
        loader = AgriculturalDataLoader(
            data_path="/Users/tracyandre/Downloads/OneDrive_1_9-17-2025",
            dataset_id="HackathonCRA/2024"
        )
        
        # Test local loading
        loader.set_data_source(use_hf=False)
        df_local = loader.load_all_files()
        print(f"πŸ“ Local: {len(df_local):,} records")
        
        # Test switching to HF (if token available)
        if os.environ.get("HF_TOKEN"):
            loader.set_data_source(use_hf=True)
            try:
                df_hf = loader.load_all_files()
                print(f"πŸ€— HF: {len(df_hf):,} records")
                
                # Compare data
                if len(df_local) == len(df_hf):
                    print("βœ… Data consistency: Same number of records")
                else:
                    print(f"⚠️  Data difference: Local={len(df_local)}, HF={len(df_hf)}")
                    
            except Exception as e:
                print(f"πŸ€— HF loading failed (expected): {e}")
        else:
            print("⚠️  No HF_TOKEN - skipping HF test")
        
        return True
        
    except Exception as e:
        print(f"❌ Data source switching failed: {e}")
        return False

def main():
    """Run all tests."""
    print("🚜 AGRICULTURAL DATA LOADER TESTING")
    print("=" * 60)
    print()
    
    results = []
    
    # Test 1: Local loading
    results.append(("Local Loading", test_local_loading()))
    
    # Test 2: Hugging Face loading
    results.append(("HF Loading", test_hf_loading()))
    
    # Test 3: Auto fallback
    results.append(("Auto Fallback", test_auto_fallback()))
    
    # Test 4: Data source switching
    results.append(("Source Switching", test_data_source_switching()))
    
    # Summary
    print("\nπŸ“‹ TEST SUMMARY")
    print("=" * 30)
    
    passed = 0
    for test_name, result in results:
        status = "βœ… PASS" if result else "❌ FAIL"
        print(f"{test_name:<20} {status}")
        if result:
            passed += 1
    
    print(f"\n🎯 Results: {passed}/{len(results)} tests passed")
    
    if passed == len(results):
        print("πŸŽ‰ All tests passed! Data loader is working correctly.")
    else:
        print("⚠️  Some tests failed. Check the output above for details.")
    
    print("\nπŸ’‘ Usage Examples:")
    print("# Load from local files:")
    print("loader = AgriculturalDataLoader.create_local_loader('/path/to/data')")
    print()
    print("# Load from Hugging Face:")
    print("loader = AgriculturalDataLoader.create_hf_loader('HackathonCRA/2024')")
    print()
    print("# Auto-detect with fallback:")
    print("loader = AgriculturalDataLoader(use_hf=True)")
    print("df = loader.load_all_files()  # Tries HF first, falls back to local")

if __name__ == "__main__":
    main()