Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from ddgs import DDGS | |
| import requests | |
| import os | |
| import zipfile | |
| from pathlib import Path | |
| import time | |
| def scrape_and_zip_images(search_term, max_images=500): | |
| """ | |
| Scrape images from DuckDuckGo and create a zip file | |
| Args: | |
| search_term: The search query for images | |
| max_images: Maximum number of images to download (default 500) | |
| Returns: | |
| Path to the zip file | |
| """ | |
| if not search_term: | |
| return None, "Please enter a search term" | |
| # Create temporary directory for images | |
| temp_dir = f"temp_images_{int(time.time())}" | |
| os.makedirs(temp_dir, exist_ok=True) | |
| try: | |
| # Initialize DDGS and search for images | |
| ddgs = DDGS() | |
| # Clamp max_images to reasonable limits | |
| max_images = min(max_images, 500) | |
| results = ddgs.images( | |
| query=search_term, | |
| region="wt-wt", | |
| safesearch="moderate", | |
| max_results=max_images | |
| ) | |
| # Download images | |
| downloaded_count = 0 | |
| status_message = f"Searching for images with query: '{search_term}'\n" | |
| for idx, result in enumerate(results): | |
| try: | |
| image_url = result.get('image') | |
| if not image_url: | |
| continue | |
| # Get image extension from URL or default to jpg | |
| ext = Path(image_url).suffix | |
| if not ext or len(ext) > 5: | |
| ext = '.jpg' | |
| # Download image | |
| response = requests.get(image_url, timeout=10, headers={ | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' | |
| }) | |
| if response.status_code == 200: | |
| # Save image | |
| image_path = os.path.join(temp_dir, f"image_{idx:04d}{ext}") | |
| with open(image_path, 'wb') as f: | |
| f.write(response.content) | |
| downloaded_count += 1 | |
| except Exception as e: | |
| # Skip failed downloads | |
| continue | |
| if downloaded_count == 0: | |
| status_message += "No images were downloaded." | |
| return None, status_message | |
| # Create zip file | |
| zip_filename = f"{search_term.replace(' ', '_')}_images.zip" | |
| zip_path = zip_filename | |
| with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: | |
| for root, dirs, files in os.walk(temp_dir): | |
| for file in files: | |
| file_path = os.path.join(root, file) | |
| zipf.write(file_path, arcname=file) | |
| status_message += f"Successfully downloaded {downloaded_count} images!\n" | |
| status_message += f"Created zip file: {zip_filename}" | |
| # Clean up temporary directory | |
| for file in os.listdir(temp_dir): | |
| os.remove(os.path.join(temp_dir, file)) | |
| os.rmdir(temp_dir) | |
| return zip_path, status_message | |
| except Exception as e: | |
| # Clean up on error | |
| if os.path.exists(temp_dir): | |
| for file in os.listdir(temp_dir): | |
| try: | |
| os.remove(os.path.join(temp_dir, file)) | |
| except: | |
| pass | |
| try: | |
| os.rmdir(temp_dir) | |
| except: | |
| pass | |
| return None, f"Error: {str(e)}" | |
| # Create Gradio interface | |
| with gr.Blocks(title="DuckDuckGo Image Scraper") as demo: | |
| gr.Markdown("# π¦ DuckDuckGo Image Scraper") | |
| gr.Markdown("Search and download images from DuckDuckGo. Enter a search term and the app will download up to 500 images (or rate limit) and provide them as a zip file.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| search_input = gr.Textbox( | |
| label="Search Term", | |
| placeholder="Enter your search query (e.g., 'sunset', 'mountains', 'cats')", | |
| lines=1 | |
| ) | |
| max_images_slider = gr.Slider( | |
| minimum=10, | |
| maximum=500, | |
| value=100, | |
| step=10, | |
| label="Maximum Images", | |
| info="Number of images to download (subject to rate limits)" | |
| ) | |
| search_button = gr.Button("π Search and Download", variant="primary") | |
| with gr.Column(): | |
| status_output = gr.Textbox( | |
| label="Status", | |
| lines=5, | |
| interactive=False | |
| ) | |
| download_output = gr.File(label="Download ZIP File") | |
| search_button.click( | |
| fn=scrape_and_zip_images, | |
| inputs=[search_input, max_images_slider], | |
| outputs=[download_output, status_output] | |
| ) | |
| gr.Markdown(""" | |
| ### Instructions: | |
| 1. Enter a search term in the text box | |
| 2. Adjust the maximum number of images (default: 100, max: 500) | |
| 3. Click "Search and Download" button | |
| 4. Wait for the images to be downloaded and zipped | |
| 5. Download the ZIP file when ready | |
| ### Notes: | |
| - The actual number of images downloaded may be less than requested due to rate limits or availability | |
| - Images are temporarily stored during processing and deleted after zipping | |
| - Download may take some time depending on the number of images | |
| - Uses the updated `ddgs` package (formerly `duckduckgo_search`) | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |