Spaces:
Running
Running
File size: 5,417 Bytes
f040ed0 d011276 f040ed0 d011276 f040ed0 d011276 f040ed0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import gradio as gr
from ddgs import DDGS
import requests
import os
import zipfile
from pathlib import Path
import time
def scrape_and_zip_images(search_term, max_images=500):
"""
Scrape images from DuckDuckGo and create a zip file
Args:
search_term: The search query for images
max_images: Maximum number of images to download (default 500)
Returns:
Path to the zip file
"""
if not search_term:
return None, "Please enter a search term"
# Create temporary directory for images
temp_dir = f"temp_images_{int(time.time())}"
os.makedirs(temp_dir, exist_ok=True)
try:
# Initialize DDGS and search for images
ddgs = DDGS()
# Clamp max_images to reasonable limits
max_images = min(max_images, 500)
results = ddgs.images(
query=search_term,
region="wt-wt",
safesearch="moderate",
max_results=max_images
)
# Download images
downloaded_count = 0
status_message = f"Searching for images with query: '{search_term}'\n"
for idx, result in enumerate(results):
try:
image_url = result.get('image')
if not image_url:
continue
# Get image extension from URL or default to jpg
ext = Path(image_url).suffix
if not ext or len(ext) > 5:
ext = '.jpg'
# Download image
response = requests.get(image_url, timeout=10, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
})
if response.status_code == 200:
# Save image
image_path = os.path.join(temp_dir, f"image_{idx:04d}{ext}")
with open(image_path, 'wb') as f:
f.write(response.content)
downloaded_count += 1
except Exception as e:
# Skip failed downloads
continue
if downloaded_count == 0:
status_message += "No images were downloaded."
return None, status_message
# Create zip file
zip_filename = f"{search_term.replace(' ', '_')}_images.zip"
zip_path = zip_filename
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for root, dirs, files in os.walk(temp_dir):
for file in files:
file_path = os.path.join(root, file)
zipf.write(file_path, arcname=file)
status_message += f"Successfully downloaded {downloaded_count} images!\n"
status_message += f"Created zip file: {zip_filename}"
# Clean up temporary directory
for file in os.listdir(temp_dir):
os.remove(os.path.join(temp_dir, file))
os.rmdir(temp_dir)
return zip_path, status_message
except Exception as e:
# Clean up on error
if os.path.exists(temp_dir):
for file in os.listdir(temp_dir):
try:
os.remove(os.path.join(temp_dir, file))
except:
pass
try:
os.rmdir(temp_dir)
except:
pass
return None, f"Error: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="DuckDuckGo Image Scraper") as demo:
gr.Markdown("# π¦ DuckDuckGo Image Scraper")
gr.Markdown("Search and download images from DuckDuckGo. Enter a search term and the app will download up to 500 images (or rate limit) and provide them as a zip file.")
with gr.Row():
with gr.Column():
search_input = gr.Textbox(
label="Search Term",
placeholder="Enter your search query (e.g., 'sunset', 'mountains', 'cats')",
lines=1
)
max_images_slider = gr.Slider(
minimum=10,
maximum=500,
value=100,
step=10,
label="Maximum Images",
info="Number of images to download (subject to rate limits)"
)
search_button = gr.Button("π Search and Download", variant="primary")
with gr.Column():
status_output = gr.Textbox(
label="Status",
lines=5,
interactive=False
)
download_output = gr.File(label="Download ZIP File")
search_button.click(
fn=scrape_and_zip_images,
inputs=[search_input, max_images_slider],
outputs=[download_output, status_output]
)
gr.Markdown("""
### Instructions:
1. Enter a search term in the text box
2. Adjust the maximum number of images (default: 100, max: 500)
3. Click "Search and Download" button
4. Wait for the images to be downloaded and zipped
5. Download the ZIP file when ready
### Notes:
- The actual number of images downloaded may be less than requested due to rate limits or availability
- Images are temporarily stored during processing and deleted after zipping
- Download may take some time depending on the number of images
- Uses the updated `ddgs` package (formerly `duckduckgo_search`)
""")
if __name__ == "__main__":
demo.launch()
|