import gradio as gr from ddgs import DDGS import requests import os import zipfile from pathlib import Path import time def scrape_and_zip_images(search_term, max_images=500): """ Scrape images from DuckDuckGo and create a zip file Args: search_term: The search query for images max_images: Maximum number of images to download (default 500) Returns: Path to the zip file """ if not search_term: return None, "Please enter a search term" # Create temporary directory for images temp_dir = f"temp_images_{int(time.time())}" os.makedirs(temp_dir, exist_ok=True) try: # Initialize DDGS and search for images ddgs = DDGS() # Clamp max_images to reasonable limits max_images = min(max_images, 500) results = ddgs.images( query=search_term, region="wt-wt", safesearch="moderate", max_results=max_images ) # Download images downloaded_count = 0 status_message = f"Searching for images with query: '{search_term}'\n" for idx, result in enumerate(results): try: image_url = result.get('image') if not image_url: continue # Get image extension from URL or default to jpg ext = Path(image_url).suffix if not ext or len(ext) > 5: ext = '.jpg' # Download image response = requests.get(image_url, timeout=10, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' }) if response.status_code == 200: # Save image image_path = os.path.join(temp_dir, f"image_{idx:04d}{ext}") with open(image_path, 'wb') as f: f.write(response.content) downloaded_count += 1 except Exception as e: # Skip failed downloads continue if downloaded_count == 0: status_message += "No images were downloaded." return None, status_message # Create zip file zip_filename = f"{search_term.replace(' ', '_')}_images.zip" zip_path = zip_filename with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: for root, dirs, files in os.walk(temp_dir): for file in files: file_path = os.path.join(root, file) zipf.write(file_path, arcname=file) status_message += f"Successfully downloaded {downloaded_count} images!\n" status_message += f"Created zip file: {zip_filename}" # Clean up temporary directory for file in os.listdir(temp_dir): os.remove(os.path.join(temp_dir, file)) os.rmdir(temp_dir) return zip_path, status_message except Exception as e: # Clean up on error if os.path.exists(temp_dir): for file in os.listdir(temp_dir): try: os.remove(os.path.join(temp_dir, file)) except: pass try: os.rmdir(temp_dir) except: pass return None, f"Error: {str(e)}" # Create Gradio interface with gr.Blocks(title="DuckDuckGo Image Scraper") as demo: gr.Markdown("# 🦆 DuckDuckGo Image Scraper") gr.Markdown("Search and download images from DuckDuckGo. Enter a search term and the app will download up to 500 images (or rate limit) and provide them as a zip file.") with gr.Row(): with gr.Column(): search_input = gr.Textbox( label="Search Term", placeholder="Enter your search query (e.g., 'sunset', 'mountains', 'cats')", lines=1 ) max_images_slider = gr.Slider( minimum=10, maximum=500, value=100, step=10, label="Maximum Images", info="Number of images to download (subject to rate limits)" ) search_button = gr.Button("🔍 Search and Download", variant="primary") with gr.Column(): status_output = gr.Textbox( label="Status", lines=5, interactive=False ) download_output = gr.File(label="Download ZIP File") search_button.click( fn=scrape_and_zip_images, inputs=[search_input, max_images_slider], outputs=[download_output, status_output] ) gr.Markdown(""" ### Instructions: 1. Enter a search term in the text box 2. Adjust the maximum number of images (default: 100, max: 500) 3. Click "Search and Download" button 4. Wait for the images to be downloaded and zipped 5. Download the ZIP file when ready ### Notes: - The actual number of images downloaded may be less than requested due to rate limits or availability - Images are temporarily stored during processing and deleted after zipping - Download may take some time depending on the number of images - Uses the updated `ddgs` package (formerly `duckduckgo_search`) """) if __name__ == "__main__": demo.launch()