Spaces:

namelessai
/

ddg-image-downloader

Sleeping

App Files Files Community

ddg-image-downloader / app.py

namelessai

Update app.py

d011276 verified 8 days ago

raw

history blame contribute delete

5.42 kB

	import gradio as gr
	from ddgs import DDGS
	import requests
	import os
	import zipfile
	from pathlib import Path
	import time

	def scrape_and_zip_images(search_term, max_images=500):
	"""
	Scrape images from DuckDuckGo and create a zip file

	Args:
	search_term: The search query for images
	max_images: Maximum number of images to download (default 500)

	Returns:
	Path to the zip file
	"""
	if not search_term:
	return None, "Please enter a search term"

	# Create temporary directory for images
	temp_dir = f"temp_images_{int(time.time())}"
	os.makedirs(temp_dir, exist_ok=True)

	try:
	# Initialize DDGS and search for images
	ddgs = DDGS()

	# Clamp max_images to reasonable limits
	max_images = min(max_images, 500)

	results = ddgs.images(
	query=search_term,
	region="wt-wt",
	safesearch="moderate",
	max_results=max_images
	)

	# Download images
	downloaded_count = 0
	status_message = f"Searching for images with query: '{search_term}'\n"

	for idx, result in enumerate(results):
	try:
	image_url = result.get('image')
	if not image_url:
	continue

	# Get image extension from URL or default to jpg
	ext = Path(image_url).suffix
	if not ext or len(ext) > 5:
	ext = '.jpg'

	# Download image
	response = requests.get(image_url, timeout=10, headers={
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
	})

	if response.status_code == 200:
	# Save image
	image_path = os.path.join(temp_dir, f"image_{idx:04d}{ext}")
	with open(image_path, 'wb') as f:
	f.write(response.content)
	downloaded_count += 1

	except Exception as e:
	# Skip failed downloads
	continue

	if downloaded_count == 0:
	status_message += "No images were downloaded."
	return None, status_message

	# Create zip file
	zip_filename = f"{search_term.replace(' ', '_')}_images.zip"
	zip_path = zip_filename

	with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
	for root, dirs, files in os.walk(temp_dir):
	for file in files:
	file_path = os.path.join(root, file)
	zipf.write(file_path, arcname=file)

	status_message += f"Successfully downloaded {downloaded_count} images!\n"
	status_message += f"Created zip file: {zip_filename}"

	# Clean up temporary directory
	for file in os.listdir(temp_dir):
	os.remove(os.path.join(temp_dir, file))
	os.rmdir(temp_dir)

	return zip_path, status_message

	except Exception as e:
	# Clean up on error
	if os.path.exists(temp_dir):
	for file in os.listdir(temp_dir):
	try:
	os.remove(os.path.join(temp_dir, file))
	except:
	pass
	try:
	os.rmdir(temp_dir)
	except:
	pass

	return None, f"Error: {str(e)}"

	# Create Gradio interface
	with gr.Blocks(title="DuckDuckGo Image Scraper") as demo:
	gr.Markdown("# 🦆 DuckDuckGo Image Scraper")
	gr.Markdown("Search and download images from DuckDuckGo. Enter a search term and the app will download up to 500 images (or rate limit) and provide them as a zip file.")

	with gr.Row():
	with gr.Column():
	search_input = gr.Textbox(
	label="Search Term",
	placeholder="Enter your search query (e.g., 'sunset', 'mountains', 'cats')",
	lines=1
	)
	max_images_slider = gr.Slider(
	minimum=10,
	maximum=500,
	value=100,
	step=10,
	label="Maximum Images",
	info="Number of images to download (subject to rate limits)"
	)
	search_button = gr.Button("🔍 Search and Download", variant="primary")

	with gr.Column():
	status_output = gr.Textbox(
	label="Status",
	lines=5,
	interactive=False
	)
	download_output = gr.File(label="Download ZIP File")

	search_button.click(
	fn=scrape_and_zip_images,
	inputs=[search_input, max_images_slider],
	outputs=[download_output, status_output]
	)

	gr.Markdown("""
	### Instructions:
	1. Enter a search term in the text box
	2. Adjust the maximum number of images (default: 100, max: 500)
	3. Click "Search and Download" button
	4. Wait for the images to be downloaded and zipped
	5. Download the ZIP file when ready

	### Notes:
	- The actual number of images downloaded may be less than requested due to rate limits or availability
	- Images are temporarily stored during processing and deleted after zipping
	- Download may take some time depending on the number of images
	- Uses the updated `ddgs` package (formerly `duckduckgo_search`)
	""")

	if __name__ == "__main__":
	demo.launch()