File size: 10,189 Bytes
49e67a8
 
 
 
 
 
 
 
 
 
 
 
f4d6026
49e67a8
 
 
 
 
 
 
 
 
 
 
 
f4d6026
49e67a8
 
f4d6026
 
 
 
 
 
 
 
 
 
 
 
 
49e67a8
 
f4d6026
f0663fb
49e67a8
f0663fb
49e67a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4d6026
49e67a8
 
 
f4d6026
 
 
 
 
 
 
 
 
 
 
 
49e67a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4d6026
 
 
 
 
 
 
49e67a8
 
f4d6026
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49e67a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4d6026
49e67a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import gradio as gr
import json
import os
import logging
from datetime import datetime
from email.utils import parsedate_to_datetime

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

try:
    from scraper import fetch_hazard_tweets, fetch_custom_tweets, get_available_hazards, get_available_locations
    from classifier import classify_tweets
    from pg_db import init_db, upsert_hazardous_tweet
    # Initialize database (optional - will work without it)
    try:
        init_db()
        logger.info("Database initialized successfully")
    except Exception as e:
        logger.warning(f"Database initialization failed: {e}. App will work without database persistence.")
except ImportError as e:
    logger.error(f"Failed to import required modules: {e}")
    raise

def run_pipeline(limit=20, hazard_type=None, location=None, days_back=1):
    """Run the hazard detection pipeline"""
    try:
        logger.info(f"Starting pipeline with limit: {limit}, hazard: {hazard_type}, location: {location}")
        
        # Choose search method based on parameters
        if hazard_type or location:
            tweets = fetch_custom_tweets(
                hazard_type=hazard_type,
                location=location,
                limit=limit,
                days_back=days_back
            )
        else:
            tweets = fetch_hazard_tweets(limit=limit)
        
        logger.info(f"Fetched {len(tweets)} tweets")
        
        # Process tweets: translate -> classify -> analyze
        logger.info("πŸ”„ Processing tweets (this may take 1-2 minutes for first request)...")
        results = classify_tweets(tweets)
        logger.info(f"βœ… Processed {len(results)} tweets (translated, classified, and analyzed)")
        
        # Store hazardous tweets in database (optional)
        try:
            hazardous_count = 0
            for r in results:
                if r.get('hazardous') == 1:
                    hazardous_count += 1
                    hazards = (r.get('ner') or {}).get('hazards') or []
                    hazard_type = ", ".join(hazards) if hazards else "unknown"
                    locs = (r.get('ner') or {}).get('locations') or []
                    if not locs and r.get('location'):
                        locs = [r['location']]
                    location = ", ".join(locs) if locs else "unknown"
                    sentiment = r.get('sentiment') or {"label": "unknown", "score": 0.0}
                    created_at = r.get('created_at') or ""
                    tweet_date = ""
                    tweet_time = ""
                    if created_at:
                        dt = None
                        try:
                            dt = parsedate_to_datetime(created_at)
                        except Exception:
                            dt = None
                        if dt is None and 'T' in created_at:
                            try:
                                iso = created_at.replace('Z', '+00:00')
                                dt = datetime.fromisoformat(iso)
                            except Exception:
                                dt = None
                        if dt is not None:
                            tweet_date = dt.date().isoformat()
                            tweet_time = dt.time().strftime('%H:%M:%S')
                    upsert_hazardous_tweet(
                        tweet_url=r.get('tweet_url') or "",
                        hazard_type=hazard_type,
                        location=location,
                        sentiment_label=sentiment.get('label', 'unknown'),
                        sentiment_score=float(sentiment.get('score', 0.0)),
                        tweet_date=tweet_date,
                        tweet_time=tweet_time,
                    )
            logger.info(f"Stored {hazardous_count} hazardous tweets in database")
        except Exception as db_error:
            logger.warning(f"Database storage failed: {db_error}. Results will not be persisted.")
        
        return results
    except Exception as e:
        logger.error(f"Pipeline failed: {str(e)}")
        return f"Error: {str(e)}"

def analyze_tweets(limit, hazard_type, location, days_back):
    """Gradio interface function to analyze tweets"""
    try:
        limit = int(limit) if limit else 20
        days_back = int(days_back) if days_back else 1
        
        # Clean up inputs
        hazard_type = hazard_type.strip() if hazard_type else None
        location = location.strip() if location else None
        
        results = run_pipeline(
            limit=limit, 
            hazard_type=hazard_type, 
            location=location, 
            days_back=days_back
        )
        
        if isinstance(results, str):  # Error case
            return results, ""
        
        # Count hazardous tweets
        hazardous_count = sum(1 for r in results if r.get('hazardous') == 1)
        total_count = len(results)
        
        # Format results for display
        display_text = f"Analyzed {total_count} tweets, found {hazardous_count} hazardous tweets.\n\n"
        
        for i, result in enumerate(results, 1):
            status = "🚨 HAZARDOUS" if result.get('hazardous') == 1 else "βœ… Safe"
            display_text += f"{i}. {status}\n"
            display_text += f"   Text: {result.get('text', 'N/A')[:100]}...\n"
            if result.get('translated_text'):
                display_text += f"   Translated: {result.get('translated_text', 'N/A')[:100]}...\n"
            if result.get('hazardous') == 1:
                sentiment = result.get('sentiment', {})
                display_text += f"   Sentiment: {sentiment.get('label', 'unknown')} ({sentiment.get('score', 0):.2f})\n"
                ner = result.get('ner', {})
                if ner.get('hazards'):
                    display_text += f"   Hazards: {', '.join(ner.get('hazards', []))}\n"
                if ner.get('locations'):
                    display_text += f"   Locations: {', '.join(ner.get('locations', []))}\n"
            display_text += f"   URL: {result.get('tweet_url', 'N/A')}\n\n"
        
        # Create JSON output
        json_output = json.dumps(results, indent=2, ensure_ascii=False)
        
        return display_text, json_output
        
    except Exception as e:
        return f"Error: {str(e)}", ""

# Health check endpoint
def health_check():
    """Simple health check for Docker"""
    return {"status": "healthy", "message": "Ocean Hazard Detection System is running"}

# Create Gradio interface
with gr.Blocks(title="Ocean Hazard Detection", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🌊 Ocean Hazard Detection System
    
    This system analyzes tweets to detect ocean-related hazards using AI. It:
    - Scrapes tweets about ocean hazards from Indian coastal regions
    - Classifies tweets as hazardous or safe using multilingual AI
    - Translates non-English tweets to English
    - Analyzes sentiment and extracts hazard types and locations
    - Stores hazardous tweets in a database for tracking
    
    **Note**: This demo uses a limited dataset. In production, it would analyze real-time tweets.
    """)
    
    with gr.Row():
        with gr.Column():
            limit_input = gr.Number(
                label="Number of tweets to analyze",
                value=10,
                minimum=1,
                maximum=50,
                step=1
            )
            days_back_input = gr.Number(
                label="Days back to search",
                value=1,
                minimum=1,
                maximum=7,
                step=1
            )
            analyze_btn = gr.Button("πŸ” Analyze Tweets", variant="primary")
        
        with gr.Column():
            hazard_type_input = gr.Dropdown(
                label="Hazard Type (Optional)",
                choices=get_available_hazards() if 'get_available_hazards' in globals() else [],
                value=None,
                allow_custom_value=True,
                info="Select a specific hazard type or leave empty for all hazards"
            )
            location_input = gr.Dropdown(
                label="Location (Optional)",
                choices=get_available_locations() if 'get_available_locations' in globals() else [],
                value=None,
                allow_custom_value=True,
                info="Select a specific location or leave empty for all locations"
            )
        
        with gr.Column():
            gr.Markdown("### πŸ“Š Analysis Results")
            results_text = gr.Textbox(
                label="Analysis Summary",
                lines=15,
                max_lines=20,
                interactive=False
            )
    
    with gr.Row():
        gr.Markdown("### πŸ“„ Raw JSON Output")
        json_output = gr.Textbox(
            label="Complete Analysis Data (JSON)",
            lines=10,
            max_lines=15,
            interactive=False
        )
    
    # Event handlers
    analyze_btn.click(
        fn=analyze_tweets,
        inputs=[limit_input, hazard_type_input, location_input, days_back_input],
        outputs=[results_text, json_output]
    )
    
    # Add some example queries
    gr.Markdown("""
    ### πŸ” What this system looks for:
    - **Hazard Keywords**: flood, tsunami, cyclone, storm surge, high tide, high waves, swell, coastal flooding, rip current, coastal erosion, water discoloration, algal bloom, marine debris, pollution
    - **Locations**: Mumbai, Chennai, Kolkata, Odisha, Kerala, Gujarat, Goa, Andhra Pradesh, West Bengal, Vizag, Puri, Bay of Bengal, Arabian Sea
    - **Languages**: Supports 20+ Indian languages including Hindi, Bengali, Tamil, Telugu, Marathi, Gujarati, and English
    """)

if __name__ == "__main__":
    # Add health check route
    demo.launch(
        server_name="0.0.0.0",  # Important for Docker
        server_port=7860,       # Gradio default port
        show_error=True,        # Show errors in the interface
        share=False,            # Don't create public link
        debug=True              # Enable debug mode
    )