Spaces:

namelessai
/

ddg-image-downloader

Running

File size: 5,417 Bytes

import gradio as gr
from ddgs import DDGS
import requests
import os
import zipfile
from pathlib import Path
import time

def scrape_and_zip_images(search_term, max_images=500):
    """
    Scrape images from DuckDuckGo and create a zip file

    Args:
        search_term: The search query for images
        max_images: Maximum number of images to download (default 500)

    Returns:
        Path to the zip file
    """
    if not search_term:
        return None, "Please enter a search term"

    # Create temporary directory for images
    temp_dir = f"temp_images_{int(time.time())}"
    os.makedirs(temp_dir, exist_ok=True)

    try:
        # Initialize DDGS and search for images
        ddgs = DDGS()

        # Clamp max_images to reasonable limits
        max_images = min(max_images, 500)

        results = ddgs.images(
            query=search_term,
            region="wt-wt",
            safesearch="moderate",
            max_results=max_images
        )

        # Download images
        downloaded_count = 0
        status_message = f"Searching for images with query: '{search_term}'\n"

        for idx, result in enumerate(results):
            try:
                image_url = result.get('image')
                if not image_url:
                    continue

                # Get image extension from URL or default to jpg
                ext = Path(image_url).suffix
                if not ext or len(ext) > 5:
                    ext = '.jpg'

                # Download image
                response = requests.get(image_url, timeout=10, headers={
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
                })

                if response.status_code == 200:
                    # Save image
                    image_path = os.path.join(temp_dir, f"image_{idx:04d}{ext}")
                    with open(image_path, 'wb') as f:
                        f.write(response.content)
                    downloaded_count += 1

            except Exception as e:
                # Skip failed downloads
                continue

        if downloaded_count == 0:
            status_message += "No images were downloaded."
            return None, status_message

        # Create zip file
        zip_filename = f"{search_term.replace(' ', '_')}_images.zip"
        zip_path = zip_filename

        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
            for root, dirs, files in os.walk(temp_dir):
                for file in files:
                    file_path = os.path.join(root, file)
                    zipf.write(file_path, arcname=file)

        status_message += f"Successfully downloaded {downloaded_count} images!\n"
        status_message += f"Created zip file: {zip_filename}"

        # Clean up temporary directory
        for file in os.listdir(temp_dir):
            os.remove(os.path.join(temp_dir, file))
        os.rmdir(temp_dir)

        return zip_path, status_message

    except Exception as e:
        # Clean up on error
        if os.path.exists(temp_dir):
            for file in os.listdir(temp_dir):
                try:
                    os.remove(os.path.join(temp_dir, file))
                except:
                    pass
            try:
                os.rmdir(temp_dir)
            except:
                pass

        return None, f"Error: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="DuckDuckGo Image Scraper") as demo:
    gr.Markdown("# 🦆 DuckDuckGo Image Scraper")
    gr.Markdown("Search and download images from DuckDuckGo. Enter a search term and the app will download up to 500 images (or rate limit) and provide them as a zip file.")

    with gr.Row():
        with gr.Column():
            search_input = gr.Textbox(
                label="Search Term",
                placeholder="Enter your search query (e.g., 'sunset', 'mountains', 'cats')",
                lines=1
            )
            max_images_slider = gr.Slider(
                minimum=10,
                maximum=500,
                value=100,
                step=10,
                label="Maximum Images",
                info="Number of images to download (subject to rate limits)"
            )
            search_button = gr.Button("🔍 Search and Download", variant="primary")

        with gr.Column():
            status_output = gr.Textbox(
                label="Status",
                lines=5,
                interactive=False
            )
            download_output = gr.File(label="Download ZIP File")

    search_button.click(
        fn=scrape_and_zip_images,
        inputs=[search_input, max_images_slider],
        outputs=[download_output, status_output]
    )

    gr.Markdown("""
    ### Instructions:
    1. Enter a search term in the text box
    2. Adjust the maximum number of images (default: 100, max: 500)
    3. Click "Search and Download" button
    4. Wait for the images to be downloaded and zipped
    5. Download the ZIP file when ready

    ### Notes:
    - The actual number of images downloaded may be less than requested due to rate limits or availability
    - Images are temporarily stored during processing and deleted after zipping
    - Download may take some time depending on the number of images
    - Uses the updated `ddgs` package (formerly `duckduckgo_search`)
    """)

if __name__ == "__main__":
    demo.launch()