namelessai's picture
Update app.py
d011276 verified
import gradio as gr
from ddgs import DDGS
import requests
import os
import zipfile
from pathlib import Path
import time
def scrape_and_zip_images(search_term, max_images=500):
"""
Scrape images from DuckDuckGo and create a zip file
Args:
search_term: The search query for images
max_images: Maximum number of images to download (default 500)
Returns:
Path to the zip file
"""
if not search_term:
return None, "Please enter a search term"
# Create temporary directory for images
temp_dir = f"temp_images_{int(time.time())}"
os.makedirs(temp_dir, exist_ok=True)
try:
# Initialize DDGS and search for images
ddgs = DDGS()
# Clamp max_images to reasonable limits
max_images = min(max_images, 500)
results = ddgs.images(
query=search_term,
region="wt-wt",
safesearch="moderate",
max_results=max_images
)
# Download images
downloaded_count = 0
status_message = f"Searching for images with query: '{search_term}'\n"
for idx, result in enumerate(results):
try:
image_url = result.get('image')
if not image_url:
continue
# Get image extension from URL or default to jpg
ext = Path(image_url).suffix
if not ext or len(ext) > 5:
ext = '.jpg'
# Download image
response = requests.get(image_url, timeout=10, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
})
if response.status_code == 200:
# Save image
image_path = os.path.join(temp_dir, f"image_{idx:04d}{ext}")
with open(image_path, 'wb') as f:
f.write(response.content)
downloaded_count += 1
except Exception as e:
# Skip failed downloads
continue
if downloaded_count == 0:
status_message += "No images were downloaded."
return None, status_message
# Create zip file
zip_filename = f"{search_term.replace(' ', '_')}_images.zip"
zip_path = zip_filename
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for root, dirs, files in os.walk(temp_dir):
for file in files:
file_path = os.path.join(root, file)
zipf.write(file_path, arcname=file)
status_message += f"Successfully downloaded {downloaded_count} images!\n"
status_message += f"Created zip file: {zip_filename}"
# Clean up temporary directory
for file in os.listdir(temp_dir):
os.remove(os.path.join(temp_dir, file))
os.rmdir(temp_dir)
return zip_path, status_message
except Exception as e:
# Clean up on error
if os.path.exists(temp_dir):
for file in os.listdir(temp_dir):
try:
os.remove(os.path.join(temp_dir, file))
except:
pass
try:
os.rmdir(temp_dir)
except:
pass
return None, f"Error: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="DuckDuckGo Image Scraper") as demo:
gr.Markdown("# πŸ¦† DuckDuckGo Image Scraper")
gr.Markdown("Search and download images from DuckDuckGo. Enter a search term and the app will download up to 500 images (or rate limit) and provide them as a zip file.")
with gr.Row():
with gr.Column():
search_input = gr.Textbox(
label="Search Term",
placeholder="Enter your search query (e.g., 'sunset', 'mountains', 'cats')",
lines=1
)
max_images_slider = gr.Slider(
minimum=10,
maximum=500,
value=100,
step=10,
label="Maximum Images",
info="Number of images to download (subject to rate limits)"
)
search_button = gr.Button("πŸ” Search and Download", variant="primary")
with gr.Column():
status_output = gr.Textbox(
label="Status",
lines=5,
interactive=False
)
download_output = gr.File(label="Download ZIP File")
search_button.click(
fn=scrape_and_zip_images,
inputs=[search_input, max_images_slider],
outputs=[download_output, status_output]
)
gr.Markdown("""
### Instructions:
1. Enter a search term in the text box
2. Adjust the maximum number of images (default: 100, max: 500)
3. Click "Search and Download" button
4. Wait for the images to be downloaded and zipped
5. Download the ZIP file when ready
### Notes:
- The actual number of images downloaded may be less than requested due to rate limits or availability
- Images are temporarily stored during processing and deleted after zipping
- Download may take some time depending on the number of images
- Uses the updated `ddgs` package (formerly `duckduckgo_search`)
""")
if __name__ == "__main__":
demo.launch()