File size: 5,417 Bytes
f040ed0
d011276
f040ed0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d011276
f040ed0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d011276
f040ed0
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import gradio as gr
from ddgs import DDGS
import requests
import os
import zipfile
from pathlib import Path
import time

def scrape_and_zip_images(search_term, max_images=500):
    """
    Scrape images from DuckDuckGo and create a zip file

    Args:
        search_term: The search query for images
        max_images: Maximum number of images to download (default 500)

    Returns:
        Path to the zip file
    """
    if not search_term:
        return None, "Please enter a search term"

    # Create temporary directory for images
    temp_dir = f"temp_images_{int(time.time())}"
    os.makedirs(temp_dir, exist_ok=True)

    try:
        # Initialize DDGS and search for images
        ddgs = DDGS()

        # Clamp max_images to reasonable limits
        max_images = min(max_images, 500)

        results = ddgs.images(
            query=search_term,
            region="wt-wt",
            safesearch="moderate",
            max_results=max_images
        )

        # Download images
        downloaded_count = 0
        status_message = f"Searching for images with query: '{search_term}'\n"

        for idx, result in enumerate(results):
            try:
                image_url = result.get('image')
                if not image_url:
                    continue

                # Get image extension from URL or default to jpg
                ext = Path(image_url).suffix
                if not ext or len(ext) > 5:
                    ext = '.jpg'

                # Download image
                response = requests.get(image_url, timeout=10, headers={
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
                })

                if response.status_code == 200:
                    # Save image
                    image_path = os.path.join(temp_dir, f"image_{idx:04d}{ext}")
                    with open(image_path, 'wb') as f:
                        f.write(response.content)
                    downloaded_count += 1

            except Exception as e:
                # Skip failed downloads
                continue

        if downloaded_count == 0:
            status_message += "No images were downloaded."
            return None, status_message

        # Create zip file
        zip_filename = f"{search_term.replace(' ', '_')}_images.zip"
        zip_path = zip_filename

        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
            for root, dirs, files in os.walk(temp_dir):
                for file in files:
                    file_path = os.path.join(root, file)
                    zipf.write(file_path, arcname=file)

        status_message += f"Successfully downloaded {downloaded_count} images!\n"
        status_message += f"Created zip file: {zip_filename}"

        # Clean up temporary directory
        for file in os.listdir(temp_dir):
            os.remove(os.path.join(temp_dir, file))
        os.rmdir(temp_dir)

        return zip_path, status_message

    except Exception as e:
        # Clean up on error
        if os.path.exists(temp_dir):
            for file in os.listdir(temp_dir):
                try:
                    os.remove(os.path.join(temp_dir, file))
                except:
                    pass
            try:
                os.rmdir(temp_dir)
            except:
                pass

        return None, f"Error: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="DuckDuckGo Image Scraper") as demo:
    gr.Markdown("# πŸ¦† DuckDuckGo Image Scraper")
    gr.Markdown("Search and download images from DuckDuckGo. Enter a search term and the app will download up to 500 images (or rate limit) and provide them as a zip file.")

    with gr.Row():
        with gr.Column():
            search_input = gr.Textbox(
                label="Search Term",
                placeholder="Enter your search query (e.g., 'sunset', 'mountains', 'cats')",
                lines=1
            )
            max_images_slider = gr.Slider(
                minimum=10,
                maximum=500,
                value=100,
                step=10,
                label="Maximum Images",
                info="Number of images to download (subject to rate limits)"
            )
            search_button = gr.Button("πŸ” Search and Download", variant="primary")

        with gr.Column():
            status_output = gr.Textbox(
                label="Status",
                lines=5,
                interactive=False
            )
            download_output = gr.File(label="Download ZIP File")

    search_button.click(
        fn=scrape_and_zip_images,
        inputs=[search_input, max_images_slider],
        outputs=[download_output, status_output]
    )

    gr.Markdown("""
    ### Instructions:
    1. Enter a search term in the text box
    2. Adjust the maximum number of images (default: 100, max: 500)
    3. Click "Search and Download" button
    4. Wait for the images to be downloaded and zipped
    5. Download the ZIP file when ready

    ### Notes:
    - The actual number of images downloaded may be less than requested due to rate limits or availability
    - Images are temporarily stored during processing and deleted after zipping
    - Download may take some time depending on the number of images
    - Uses the updated `ddgs` package (formerly `duckduckgo_search`)
    """)

if __name__ == "__main__":
    demo.launch()