#!/usr/bin/env python3 """ Test downloading files from URLs """ import requests import pandas as pd import PyPDF2 from io import BytesIO def test_file_download(): """Test downloading different file types from URLs""" # Example URLs (these are hypothetical) test_urls = [ { "url": "https://example.com/sales_data.xlsx", "type": "excel", "question": "What is the total sales from the Excel file at https://example.com/sales_data.xlsx?" }, { "url": "https://example.com/document.pdf", "type": "pdf", "question": "How many times does 'therefore' appear in https://example.com/document.pdf?" } ] for test in test_urls: print(f"\nTesting {test['type']} download:") print(f"URL: {test['url']}") try: # Download the file response = requests.get(test['url'], timeout=10) if response.status_code == 200: print("✅ File downloaded successfully") # Process based on file type if test['type'] == 'excel': # Read Excel file df = pd.read_excel(BytesIO(response.content)) print(f"Excel shape: {df.shape}") print(f"Columns: {list(df.columns)}") elif test['type'] == 'pdf': # Read PDF file pdf_reader = PyPDF2.PdfReader(BytesIO(response.content)) print(f"PDF pages: {len(pdf_reader.pages)}") else: print(f"❌ Failed to download: {response.status_code}") except Exception as e: print(f"❌ Error: {e}") if __name__ == "__main__": test_file_download()