Final_Assignment_Agent / test_download_files.py
Markus Clauss DIRU Vetsuisse
First agent traila
1637cd5
#!/usr/bin/env python3
"""
Test downloading files from URLs
"""
import requests
import pandas as pd
import PyPDF2
from io import BytesIO
def test_file_download():
"""Test downloading different file types from URLs"""
# Example URLs (these are hypothetical)
test_urls = [
{
"url": "https://example.com/sales_data.xlsx",
"type": "excel",
"question": "What is the total sales from the Excel file at https://example.com/sales_data.xlsx?"
},
{
"url": "https://example.com/document.pdf",
"type": "pdf",
"question": "How many times does 'therefore' appear in https://example.com/document.pdf?"
}
]
for test in test_urls:
print(f"\nTesting {test['type']} download:")
print(f"URL: {test['url']}")
try:
# Download the file
response = requests.get(test['url'], timeout=10)
if response.status_code == 200:
print("βœ… File downloaded successfully")
# Process based on file type
if test['type'] == 'excel':
# Read Excel file
df = pd.read_excel(BytesIO(response.content))
print(f"Excel shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
elif test['type'] == 'pdf':
# Read PDF file
pdf_reader = PyPDF2.PdfReader(BytesIO(response.content))
print(f"PDF pages: {len(pdf_reader.pages)}")
else:
print(f"❌ Failed to download: {response.status_code}")
except Exception as e:
print(f"❌ Error: {e}")
if __name__ == "__main__":
test_file_download()