Spaces:
Sleeping
Sleeping
| import requests | |
| import json | |
| from datetime import date, timedelta | |
| from dotenv import load_dotenv | |
| import os | |
| # Load values from .env into environment | |
| load_dotenv() | |
| # Access the API key | |
| API_KEY = os.getenv("TWITTER_API_KEY", "").strip() | |
| def search_tweets(query, query_type="Latest", limit=20): | |
| """ | |
| Searches for tweets using the twitterapi.io advanced search endpoint. | |
| """ | |
| if not API_KEY: | |
| print("β Error: TWITTER_API_KEY not found or empty") | |
| return None | |
| url = "https://api.twitterapi.io/twitter/tweet/advanced_search" | |
| headers = {"X-API-Key": API_KEY.strip()} | |
| params = {"query": query, "queryType": query_type, "limit": limit} | |
| print(f"π Executing search with query: {query}") | |
| try: | |
| response = requests.get(url, headers=headers, params=params) | |
| except Exception as e: | |
| print(f"β Request error: {e}") | |
| return None | |
| if response.status_code == 200: | |
| return response.json() | |
| else: | |
| print(f"Error: {response.status_code}") | |
| print(response.text) | |
| return None | |
| def extract_tweets(result_json): | |
| """ | |
| Extracts a normalized list of tweets from the API result. | |
| Returns a list of dicts with keys: tweet_url, location, created_at, text, hashtags | |
| """ | |
| if not result_json or 'tweets' not in result_json: | |
| return [] | |
| tweets = result_json.get('tweets', []) | |
| extracted_data = [] | |
| for tweet in tweets: | |
| tweet_url = tweet.get('url') | |
| text = tweet.get('text') | |
| created_at = tweet.get('createdAt') | |
| location = tweet.get('author', {}).get('location', None) | |
| hashtags = [tag['text'] for tag in tweet.get('entities', {}).get('hashtags', [])] | |
| extracted_data.append({ | |
| 'tweet_url': tweet_url, | |
| 'location': location, | |
| 'created_at': created_at, | |
| 'text': text, | |
| 'hashtags': hashtags | |
| }) | |
| return extracted_data | |
| def build_custom_query(hazard_type=None, location=None, days_back=1): | |
| """ | |
| Builds a custom query based on provided hazard type and location. | |
| Args: | |
| hazard_type (str): Specific hazard type to search for | |
| location (str): Specific location to search for | |
| days_back (int): Number of days back to search (default: 1) | |
| Returns: | |
| str: Custom search query | |
| """ | |
| # Default hazard keywords | |
| default_hazards = [ | |
| "flood", "tsunami", "cyclone", "storm surge", "high tide", "high waves", | |
| "swell", "coastal flooding", "rip current", "coastal erosion", | |
| "water discoloration", "algal bloom", "marine debris", "pollution" | |
| ] | |
| # Default location keywords | |
| default_locations = [ | |
| "Mumbai", "Chennai", "Kolkata", "Odisha", "Kerala", "Gujarat", "Goa", | |
| "Andhra Pradesh", "West Bengal", "Vizag", "Puri", "Bay of Bengal", "Arabian Sea" | |
| ] | |
| # Build hazard query | |
| if hazard_type: | |
| # Use provided hazard type | |
| hazard_query = f'"{hazard_type}"' | |
| else: | |
| # Use default hazards | |
| hazard_query = "(" + " OR ".join([f'"{hazard}"' for hazard in default_hazards]) + ")" | |
| # Build location query | |
| if location: | |
| # Use provided location | |
| location_query = f'"{location}"' | |
| else: | |
| # Use default locations | |
| location_query = "(" + " OR ".join([f'"{loc}"' for loc in default_locations]) + ")" | |
| # Language filter | |
| allowed_languages = [ | |
| "as", "bn", "brx", "doi", "gu", "hi", "kn", "ks", "kok", "ml", "mni", | |
| "mr", "ne", "or", "pa", "sa", "sat", "sd", "ta", "te", "ur", "en", "bh" | |
| ] | |
| lang_query = "(" + " OR ".join([f"lang:{lang}" for lang in allowed_languages]) + ")" | |
| # Date filter | |
| search_date = date.today() - timedelta(days=days_back) | |
| date_filter = f"since:{search_date.strftime('%Y-%m-%d')}" | |
| # Combine all parts | |
| full_query = f"{hazard_query} {location_query} {lang_query} {date_filter}" | |
| return full_query | |
| def build_default_query(): | |
| """ | |
| Builds the default hazard + India coastal locations + language + date query. | |
| """ | |
| return build_custom_query() | |
| def fetch_hazard_tweets(limit=20): | |
| """ | |
| Fetches tweets matching the default hazard query and returns extracted list. | |
| """ | |
| query = build_default_query() | |
| result = search_tweets(query=query, query_type="Latest", limit=limit) | |
| return extract_tweets(result) | |
| def fetch_custom_tweets(hazard_type=None, location=None, limit=20, days_back=1): | |
| """ | |
| Fetches tweets based on custom hazard type and location keywords. | |
| Args: | |
| hazard_type (str, optional): Specific hazard type to search for | |
| location (str, optional): Specific location to search for | |
| limit (int): Maximum number of tweets to fetch (default: 20) | |
| days_back (int): Number of days back to search (default: 1) | |
| Returns: | |
| list: List of extracted tweets | |
| """ | |
| query = build_custom_query(hazard_type=hazard_type, location=location, days_back=days_back) | |
| print(f"π Custom search query: {query}") | |
| result = search_tweets(query=query, query_type="Latest", limit=limit) | |
| return extract_tweets(result) | |
| def get_available_hazards(): | |
| """ | |
| Returns a list of available hazard types for keyword search. | |
| """ | |
| return [ | |
| "flood", "tsunami", "cyclone", "storm surge", "high tide", "high waves", | |
| "swell", "coastal flooding", "rip current", "coastal erosion", | |
| "water discoloration", "algal bloom", "marine debris", "pollution" | |
| ] | |
| def get_available_locations(): | |
| """ | |
| Returns a list of available locations for keyword search. | |
| """ | |
| return [ | |
| "Mumbai", "Chennai", "Kolkata", "Odisha", "Kerala", "Gujarat", "Goa", | |
| "Andhra Pradesh", "West Bengal", "Vizag", "Puri", "Bay of Bengal", "Arabian Sea", | |
| "Tamil Nadu", "Maharashtra", "Karnataka", "Andaman", "Nicobar", "Lakshadweep", | |
| "Kochi", "Cochin", "Mangaluru", "Mangalore", "Chandipur", "Paradip", "Digha", "Gopalpur" | |
| ] | |
| if __name__ == "__main__": | |
| tweets = fetch_hazard_tweets(limit=20) | |
| if tweets: | |
| print("\nExtracted tweets:") | |
| print(json.dumps(tweets, indent=2, ensure_ascii=False)) | |