#!/usr/bin/env python3
"""
Test downloading files from URLs
"""

import requests
import pandas as pd
import PyPDF2
from io import BytesIO

def test_file_download():
    """Test downloading different file types from URLs"""
    
    # Example URLs (these are hypothetical)
    test_urls = [
        {
            "url": "https://example.com/sales_data.xlsx",
            "type": "excel",
            "question": "What is the total sales from the Excel file at https://example.com/sales_data.xlsx?"
        },
        {
            "url": "https://example.com/document.pdf", 
            "type": "pdf",
            "question": "How many times does 'therefore' appear in https://example.com/document.pdf?"
        }
    ]
    
    for test in test_urls:
        print(f"\nTesting {test['type']} download:")
        print(f"URL: {test['url']}")
        
        try:
            # Download the file
            response = requests.get(test['url'], timeout=10)
            
            if response.status_code == 200:
                print("✅ File downloaded successfully")
                
                # Process based on file type
                if test['type'] == 'excel':
                    # Read Excel file
                    df = pd.read_excel(BytesIO(response.content))
                    print(f"Excel shape: {df.shape}")
                    print(f"Columns: {list(df.columns)}")
                    
                elif test['type'] == 'pdf':
                    # Read PDF file
                    pdf_reader = PyPDF2.PdfReader(BytesIO(response.content))
                    print(f"PDF pages: {len(pdf_reader.pages)}")
                    
            else:
                print(f"❌ Failed to download: {response.status_code}")
                
        except Exception as e:
            print(f"❌ Error: {e}")

if __name__ == "__main__":
    test_file_download()