import requests import json from datetime import date, timedelta from dotenv import load_dotenv import os # Load values from .env into environment load_dotenv() # Access the API key API_KEY = os.getenv("TWITTER_API_KEY", "").strip() def search_tweets(query, query_type="Latest", limit=20): """ Searches for tweets using the twitterapi.io advanced search endpoint. """ if not API_KEY: print("❌ Error: TWITTER_API_KEY not found or empty") return None url = "https://api.twitterapi.io/twitter/tweet/advanced_search" headers = {"X-API-Key": API_KEY.strip()} params = {"query": query, "queryType": query_type, "limit": limit} print(f"🔍 Executing search with query: {query}") try: response = requests.get(url, headers=headers, params=params) except Exception as e: print(f"❌ Request error: {e}") return None if response.status_code == 200: return response.json() else: print(f"Error: {response.status_code}") print(response.text) return None def extract_tweets(result_json): """ Extracts a normalized list of tweets from the API result. Returns a list of dicts with keys: tweet_url, location, created_at, text, hashtags """ if not result_json or 'tweets' not in result_json: return [] tweets = result_json.get('tweets', []) extracted_data = [] for tweet in tweets: tweet_url = tweet.get('url') text = tweet.get('text') created_at = tweet.get('createdAt') location = tweet.get('author', {}).get('location', None) hashtags = [tag['text'] for tag in tweet.get('entities', {}).get('hashtags', [])] extracted_data.append({ 'tweet_url': tweet_url, 'location': location, 'created_at': created_at, 'text': text, 'hashtags': hashtags }) return extracted_data def build_custom_query(hazard_type=None, location=None, days_back=1): """ Builds a custom query based on provided hazard type and location. Args: hazard_type (str): Specific hazard type to search for location (str): Specific location to search for days_back (int): Number of days back to search (default: 1) Returns: str: Custom search query """ # Default hazard keywords default_hazards = [ "flood", "tsunami", "cyclone", "storm surge", "high tide", "high waves", "swell", "coastal flooding", "rip current", "coastal erosion", "water discoloration", "algal bloom", "marine debris", "pollution" ] # Default location keywords default_locations = [ "Mumbai", "Chennai", "Kolkata", "Odisha", "Kerala", "Gujarat", "Goa", "Andhra Pradesh", "West Bengal", "Vizag", "Puri", "Bay of Bengal", "Arabian Sea" ] # Build hazard query if hazard_type: # Use provided hazard type hazard_query = f'"{hazard_type}"' else: # Use default hazards hazard_query = "(" + " OR ".join([f'"{hazard}"' for hazard in default_hazards]) + ")" # Build location query if location: # Use provided location location_query = f'"{location}"' else: # Use default locations location_query = "(" + " OR ".join([f'"{loc}"' for loc in default_locations]) + ")" # Language filter allowed_languages = [ "as", "bn", "brx", "doi", "gu", "hi", "kn", "ks", "kok", "ml", "mni", "mr", "ne", "or", "pa", "sa", "sat", "sd", "ta", "te", "ur", "en", "bh" ] lang_query = "(" + " OR ".join([f"lang:{lang}" for lang in allowed_languages]) + ")" # Date filter search_date = date.today() - timedelta(days=days_back) date_filter = f"since:{search_date.strftime('%Y-%m-%d')}" # Combine all parts full_query = f"{hazard_query} {location_query} {lang_query} {date_filter}" return full_query def build_default_query(): """ Builds the default hazard + India coastal locations + language + date query. """ return build_custom_query() def fetch_hazard_tweets(limit=20): """ Fetches tweets matching the default hazard query and returns extracted list. """ query = build_default_query() result = search_tweets(query=query, query_type="Latest", limit=limit) return extract_tweets(result) def fetch_custom_tweets(hazard_type=None, location=None, limit=20, days_back=1): """ Fetches tweets based on custom hazard type and location keywords. Args: hazard_type (str, optional): Specific hazard type to search for location (str, optional): Specific location to search for limit (int): Maximum number of tweets to fetch (default: 20) days_back (int): Number of days back to search (default: 1) Returns: list: List of extracted tweets """ query = build_custom_query(hazard_type=hazard_type, location=location, days_back=days_back) print(f"🔍 Custom search query: {query}") result = search_tweets(query=query, query_type="Latest", limit=limit) return extract_tweets(result) def get_available_hazards(): """ Returns a list of available hazard types for keyword search. """ return [ "flood", "tsunami", "cyclone", "storm surge", "high tide", "high waves", "swell", "coastal flooding", "rip current", "coastal erosion", "water discoloration", "algal bloom", "marine debris", "pollution" ] def get_available_locations(): """ Returns a list of available locations for keyword search. """ return [ "Mumbai", "Chennai", "Kolkata", "Odisha", "Kerala", "Gujarat", "Goa", "Andhra Pradesh", "West Bengal", "Vizag", "Puri", "Bay of Bengal", "Arabian Sea", "Tamil Nadu", "Maharashtra", "Karnataka", "Andaman", "Nicobar", "Lakshadweep", "Kochi", "Cochin", "Mangaluru", "Mangalore", "Chandipur", "Paradip", "Digha", "Gopalpur" ] if __name__ == "__main__": tweets = fetch_hazard_tweets(limit=20) if tweets: print("\nExtracted tweets:") print(json.dumps(tweets, indent=2, ensure_ascii=False))