rdune71 commited on
Commit
2a12b48
·
1 Parent(s): 03df531
app.py CHANGED
@@ -1,12 +1,22 @@
1
  # app.py
2
  import gradio as gr
3
  import logging
 
 
 
 
 
 
 
 
 
 
 
4
  from modules.input_handler import InputHandler
5
  from modules.retriever import Retriever
6
  from modules.analyzer import Analyzer
7
  from modules.citation import CitationManager
8
  from modules.formatter import OutputFormatter
9
- import os
10
 
11
  # Configure logging
12
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -18,13 +28,109 @@ class ResearchOrchestrator:
18
  self.analyzer = analyzer
19
  self.citation_manager = citation_manager
20
  self.formatter = formatter
21
-
22
- def run(self, query, progress=gr.Progress()):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  """Execute the research pipeline with streaming updates"""
24
  try:
25
  progress(0.0, desc="Starting research...")
26
  logging.info(f"Starting research for query: {query}")
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  # Step 1: Process input
29
  progress(0.1, desc="🔍 Processing your query...")
30
  processed_query = self.input_handler.process_query(query)
@@ -32,7 +138,7 @@ class ResearchOrchestrator:
32
 
33
  # Step 2: Retrieve data
34
  progress(0.3, desc="🌐 Searching for relevant information...")
35
- search_results = self.retriever.search(processed_query)
36
 
37
  if not search_results:
38
  result = "⚠️ No relevant information found for your query. Please try rephrasing."
@@ -71,8 +177,21 @@ class ResearchOrchestrator:
71
  formatted_output = self.formatter.format_response(cited_analysis, search_results)
72
  logging.info("Response formatted successfully")
73
 
 
 
 
 
 
74
  # Add completion notification
75
  progress(1.0, desc="✅ Research complete!")
 
 
 
 
 
 
 
 
76
  if len(search_results) >= 3:
77
  completion_message = "\n\n---\n[ANALYSIS COMPLETE] ✅ Research finished with sufficient sources."
78
  else:
@@ -91,30 +210,33 @@ CONFIG = {
91
  "hf_api_base": "https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
92
  "hf_api_key": os.getenv("HF_TOKEN"),
93
  "tavily_api_key": os.getenv("TAVILY_API_KEY"),
 
94
  }
95
 
 
 
 
 
 
 
 
96
  # Initialize modules with error handling
97
  def initialize_modules():
98
  """Initialize all modules with proper error handling"""
99
  try:
100
  if not CONFIG["tavily_api_key"]:
101
  raise ValueError("TAVILY_API_KEY environment variable is not set")
102
-
103
  if not CONFIG["hf_api_key"]:
104
  raise ValueError("HF_TOKEN environment variable is not set")
105
-
106
  input_handler = InputHandler()
107
  retriever = Retriever(api_key=CONFIG["tavily_api_key"])
108
  analyzer = Analyzer(base_url=CONFIG["hf_api_base"], api_key=CONFIG["hf_api_key"])
109
  citation_manager = CitationManager()
110
  formatter = OutputFormatter()
111
-
112
  return ResearchOrchestrator(
113
- input_handler,
114
- retriever,
115
- analyzer,
116
- citation_manager,
117
- formatter
118
  )
119
  except Exception as e:
120
  logging.error(f"Failed to initialize modules: {str(e)}")
@@ -123,78 +245,230 @@ def initialize_modules():
123
  # Initialize orchestrator
124
  orchestrator = initialize_modules()
125
 
126
- # Custom CSS for spinner and streaming
127
  custom_css = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  .spinner {
129
- border: 4px solid #f3f3f3;
130
- border-top: 4px solid #3498db;
131
- border-radius: 50%;
132
- width: 24px;
133
- height: 24px;
134
- animation: spin 1s linear infinite;
135
- display: inline-block;
136
- margin-right: 8px;
137
  }
138
  .streaming-content {
139
- white-space: pre-wrap;
140
- font-family: monospace;
141
- background-color: #f8f9fa;
142
- padding: 10px;
143
- border-radius: 5px;
144
- margin: 10px 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  }
146
  @keyframes spin {
147
- 0% { transform: rotate(0deg); }
148
- 100% { transform: rotate(360deg); }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  }
150
  """
151
 
152
- def research_assistant(query, progress=gr.Progress()):
153
  """Main entry point for the research assistant with streaming"""
154
  logging.info(f"Research assistant called with query: {query}")
155
- for step in orchestrator.run(query, progress):
156
  yield step
157
 
158
  # Create Gradio interface
159
  with gr.Blocks(css=custom_css, title="Research Assistant") as demo:
160
- gr.Markdown("# 🧠 AI Research Assistant")
161
- gr.Markdown("Enter a research topic to get a structured analysis with sources")
 
 
 
 
162
 
163
- with gr.Row():
164
- with gr.Column():
165
- query_input = gr.Textbox(
166
- label="Research Query",
167
- placeholder="Enter your research question...",
168
- lines=3
169
- )
170
- submit_btn = gr.Button("Research", variant="primary")
171
-
172
- with gr.Column():
173
- output = gr.Markdown(label="Analysis Results", elem_classes=["streaming-content"])
174
-
175
- # Status indicator with spinner
176
- status_indicator = gr.HTML("<div id='status'><span class='spinner'></span> Ready for your research query</div>")
177
 
178
- examples = gr.Examples(
179
- examples=[
180
- "Latest advancements in quantum computing",
181
- "Impact of climate change on global agriculture",
182
- "Recent developments in Alzheimer's treatment research"
183
- ],
184
- inputs=query_input
185
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
  submit_btn.click(
188
  fn=research_assistant,
189
- inputs=query_input,
190
  outputs=output
191
  )
192
 
193
  query_input.submit(
194
  fn=research_assistant,
195
- inputs=query_input,
196
  outputs=output
197
  )
 
 
 
 
 
 
 
198
 
199
  if __name__ == "__main__":
200
  demo.launch()
 
1
  # app.py
2
  import gradio as gr
3
  import logging
4
+ import os
5
+ import json
6
+ import requests
7
+ from datetime import datetime
8
+ from PyPDF2 import PdfReader
9
+ from PIL import Image
10
+ import pytesseract
11
+ from duckduckgo_search import DDGS
12
+ from googletrans import Translator
13
+ from datasets import load_dataset
14
+
15
  from modules.input_handler import InputHandler
16
  from modules.retriever import Retriever
17
  from modules.analyzer import Analyzer
18
  from modules.citation import CitationManager
19
  from modules.formatter import OutputFormatter
 
20
 
21
  # Configure logging
22
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
28
  self.analyzer = analyzer
29
  self.citation_manager = citation_manager
30
  self.formatter = formatter
31
+ self.translator = Translator()
32
+ self.chat_history = []
33
+
34
+ def process_file(self, file):
35
+ """Process uploaded files (PDF, images)"""
36
+ if not file:
37
+ return ""
38
+
39
+ file_path = file.name
40
+ file_ext = file_path.split('.')[-1].lower()
41
+
42
+ if file_ext == 'pdf':
43
+ try:
44
+ reader = PdfReader(file_path)
45
+ text = ""
46
+ for page in reader.pages:
47
+ text += page.extract_text() + "\n"
48
+ return f"\n\n[PDF Content]: {text[:1000]}..." # Limit content size
49
+ except Exception as e:
50
+ logging.error(f"PDF processing error: {e}")
51
+ return "\n\n[Error processing PDF file]"
52
+
53
+ elif file_ext in ['png', 'jpg', 'jpeg']:
54
+ try:
55
+ image = Image.open(file_path)
56
+ text = pytesseract.image_to_string(image)
57
+ return f"\n\n[Image Text]: {text[:500]}..." # Limit content size
58
+ except Exception as e:
59
+ logging.error(f"Image processing error: {e}")
60
+ return "\n\n[Error processing image file]"
61
+
62
+ return ""
63
+
64
+ def search_with_fallback(self, query, use_ddg=False):
65
+ """Search with fallback to DDG if Tavily fails"""
66
+ if use_ddg:
67
+ try:
68
+ with DDGS() as ddgs:
69
+ results = []
70
+ for r in ddgs.text(query, max_results=5):
71
+ results.append({
72
+ 'title': r.get('title', 'No title'),
73
+ 'url': r.get('href', 'No URL'),
74
+ 'content': r.get('body', 'No content')
75
+ })
76
+ return results
77
+ except Exception as e:
78
+ logging.error(f"DDG search failed: {e}")
79
+ return []
80
+ else:
81
+ try:
82
+ return self.retriever.search(query)
83
+ except Exception as e:
84
+ logging.error(f"Tavily search failed: {e}")
85
+ # Fallback to DDG
86
+ return self.search_with_fallback(query, use_ddg=True)
87
+
88
+ def get_time_weather(self):
89
+ """Get current time and weather for Boston and Bogor"""
90
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
91
+
92
+ # For demo purposes, using mock weather data
93
+ # In production, you would use a real weather API
94
+ boston_weather = {
95
+ "temp": 22,
96
+ "desc": "Partly cloudy"
97
+ }
98
+
99
+ bogor_weather = {
100
+ "temp": 28,
101
+ "desc": "Thunderstorms"
102
+ }
103
+
104
+ return {
105
+ "time": current_time,
106
+ "boston": boston_weather,
107
+ "bogor": bogor_weather
108
+ }
109
+
110
+ def run(self, query, file=None, use_ddg=False, progress=gr.Progress()):
111
  """Execute the research pipeline with streaming updates"""
112
  try:
113
  progress(0.0, desc="Starting research...")
114
  logging.info(f"Starting research for query: {query}")
115
 
116
+ # Add time/weather info
117
+ time_weather = self.get_time_weather()
118
+
119
+ # Process file if uploaded
120
+ file_content = self.process_file(file)
121
+ if file_content:
122
+ query += file_content
123
+
124
+ # Detect and translate if needed
125
+ try:
126
+ detected_lang = self.translator.detect(query).lang
127
+ if detected_lang not in ['en', 'English']:
128
+ translated_query = self.translator.translate(query, src=detected_lang, dest='en').text
129
+ query = translated_query
130
+ logging.info(f"Translated query from {detected_lang} to English")
131
+ except Exception as e:
132
+ logging.warning(f"Translation failed: {e}")
133
+
134
  # Step 1: Process input
135
  progress(0.1, desc="🔍 Processing your query...")
136
  processed_query = self.input_handler.process_query(query)
 
138
 
139
  # Step 2: Retrieve data
140
  progress(0.3, desc="🌐 Searching for relevant information...")
141
+ search_results = self.search_with_fallback(processed_query, use_ddg)
142
 
143
  if not search_results:
144
  result = "⚠️ No relevant information found for your query. Please try rephrasing."
 
177
  formatted_output = self.formatter.format_response(cited_analysis, search_results)
178
  logging.info("Response formatted successfully")
179
 
180
+ # Add time/weather info
181
+ formatted_output += f"\n\n### 📅 Current Time: {time_weather['time']}"
182
+ formatted_output += f"\n### 🌤 Weather in Boston: {time_weather['boston']['temp']}°C, {time_weather['boston']['desc']}"
183
+ formatted_output += f"\n### 🌧 Weather in Bogor: {time_weather['bogor']['temp']}°C, {time_weather['bogor']['desc']}"
184
+
185
  # Add completion notification
186
  progress(1.0, desc="✅ Research complete!")
187
+
188
+ # Translate back if needed
189
+ if 'detected_lang' in locals() and detected_lang not in ['en', 'English']:
190
+ try:
191
+ formatted_output = self.translator.translate(formatted_output, src='en', dest=detected_lang).text
192
+ except Exception as e:
193
+ logging.warning(f"Back-translation failed: {e}")
194
+
195
  if len(search_results) >= 3:
196
  completion_message = "\n\n---\n[ANALYSIS COMPLETE] ✅ Research finished with sufficient sources."
197
  else:
 
210
  "hf_api_base": "https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
211
  "hf_api_key": os.getenv("HF_TOKEN"),
212
  "tavily_api_key": os.getenv("TAVILY_API_KEY"),
213
+ "openweather_api_key": os.getenv("OPENWEATHER_API_KEY", "demo_key")
214
  }
215
 
216
+ # Load version info
217
+ try:
218
+ with open("version.json", "r") as f:
219
+ VERSION = json.load(f)
220
+ except FileNotFoundError:
221
+ VERSION = {"version": "133", "description": "Initial release"}
222
+
223
  # Initialize modules with error handling
224
  def initialize_modules():
225
  """Initialize all modules with proper error handling"""
226
  try:
227
  if not CONFIG["tavily_api_key"]:
228
  raise ValueError("TAVILY_API_KEY environment variable is not set")
 
229
  if not CONFIG["hf_api_key"]:
230
  raise ValueError("HF_TOKEN environment variable is not set")
231
+
232
  input_handler = InputHandler()
233
  retriever = Retriever(api_key=CONFIG["tavily_api_key"])
234
  analyzer = Analyzer(base_url=CONFIG["hf_api_base"], api_key=CONFIG["hf_api_key"])
235
  citation_manager = CitationManager()
236
  formatter = OutputFormatter()
237
+
238
  return ResearchOrchestrator(
239
+ input_handler, retriever, analyzer, citation_manager, formatter
 
 
 
 
240
  )
241
  except Exception as e:
242
  logging.error(f"Failed to initialize modules: {str(e)}")
 
245
  # Initialize orchestrator
246
  orchestrator = initialize_modules()
247
 
248
+ # Custom CSS for enhanced UI
249
  custom_css = """
250
+ body {
251
+ background: linear-gradient(135deg, #e0f7fa, #f5f5f5);
252
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
253
+ color: #003366;
254
+ }
255
+ .container {
256
+ max-width: 1200px;
257
+ margin: 0 auto;
258
+ padding: 20px;
259
+ }
260
+ .header {
261
+ text-align: center;
262
+ margin-bottom: 30px;
263
+ padding: 20px;
264
+ background: linear-gradient(90deg, #007BFF, #0056b3);
265
+ color: white;
266
+ border-radius: 10px;
267
+ box-shadow: 0 4px 8px rgba(0,0,0,0.1);
268
+ }
269
+ .title {
270
+ font-size: 2.5em;
271
+ margin-bottom: 10px;
272
+ }
273
+ .subtitle {
274
+ font-size: 1.2em;
275
+ opacity: 0.9;
276
+ }
277
+ .card {
278
+ background: white;
279
+ border-radius: 10px;
280
+ box-shadow: 0 4px 12px rgba(0,0,0,0.08);
281
+ padding: 25px;
282
+ margin-bottom: 25px;
283
+ transition: transform 0.3s ease, box-shadow 0.3s ease;
284
+ }
285
+ .card:hover {
286
+ transform: translateY(-5px);
287
+ box-shadow: 0 6px 16px rgba(0,0,0,0.12);
288
+ }
289
+ .input-section {
290
+ background: #e3f2fd;
291
+ }
292
+ .output-section {
293
+ background: #f8f9fa;
294
+ }
295
+ .btn-primary {
296
+ background: linear-gradient(90deg, #007BFF, #0056b3);
297
+ color: white;
298
+ border: none;
299
+ padding: 12px 25px;
300
+ font-size: 16px;
301
+ border-radius: 30px;
302
+ cursor: pointer;
303
+ transition: all 0.3s ease;
304
+ font-weight: 600;
305
+ box-shadow: 0 4px 8px rgba(0,123,255,0.3);
306
+ }
307
+ .btn-primary:hover {
308
+ transform: translateY(-3px);
309
+ box-shadow: 0 6px 12px rgba(0,123,255,0.4);
310
+ background: linear-gradient(90deg, #0056b3, #003d7a);
311
+ }
312
+ .btn-primary:active {
313
+ transform: translateY(1px);
314
+ box-shadow: 0 2px 4px rgba(0,123,255,0.3);
315
+ }
316
  .spinner {
317
+ border: 4px solid #f3f3f3;
318
+ border-top: 4px solid #007BFF;
319
+ border-radius: 50%;
320
+ width: 24px;
321
+ height: 24px;
322
+ animation: spin 1s linear infinite;
323
+ display: inline-block;
324
+ margin-right: 8px;
325
  }
326
  .streaming-content {
327
+ white-space: pre-wrap;
328
+ font-family: 'Courier New', monospace;
329
+ background-color: #f0f8ff;
330
+ padding: 15px;
331
+ border-radius: 8px;
332
+ margin: 15px 0;
333
+ border-left: 4px solid #007BFF;
334
+ box-shadow: inset 0 0 5px rgba(0,0,0,0.05);
335
+ }
336
+ .chat-history {
337
+ max-height: 400px;
338
+ overflow-y: auto;
339
+ padding: 15px;
340
+ background: #ffffff;
341
+ border-radius: 8px;
342
+ border: 1px solid #e0e0e0;
343
+ margin-bottom: 20px;
344
+ }
345
+ .version-info {
346
+ text-align: center;
347
+ padding: 15px;
348
+ background: #fff8e1;
349
+ border-radius: 8px;
350
+ margin-top: 20px;
351
+ border: 1px solid #ffecb3;
352
+ color: #5d4037;
353
+ }
354
+ .examples {
355
+ background: #e8f5e9;
356
+ border-left: 4px solid #4caf50;
357
  }
358
  @keyframes spin {
359
+ 0% { transform: rotate(0deg); }
360
+ 100% { transform: rotate(360deg); }
361
+ }
362
+ .status-indicator {
363
+ display: flex;
364
+ align-items: center;
365
+ justify-content: center;
366
+ padding: 10px;
367
+ background: #e3f2fd;
368
+ border-radius: 8px;
369
+ margin: 10px 0;
370
+ }
371
+ .progress-text {
372
+ margin-left: 10px;
373
+ font-weight: 500;
374
+ }
375
+ .footer {
376
+ text-align: center;
377
+ margin-top: 30px;
378
+ padding: 20px;
379
+ color: #666;
380
+ font-size: 0.9em;
381
+ }
382
+ .highlight {
383
+ background: linear-gradient(120deg, #ffebee, #fff3e0);
384
+ padding: 3px 6px;
385
+ border-radius: 4px;
386
+ font-weight: 500;
387
  }
388
  """
389
 
390
+ def research_assistant(query, file, use_ddg, progress=gr.Progress()):
391
  """Main entry point for the research assistant with streaming"""
392
  logging.info(f"Research assistant called with query: {query}")
393
+ for step in orchestrator.run(query, file, use_ddg, progress):
394
  yield step
395
 
396
  # Create Gradio interface
397
  with gr.Blocks(css=custom_css, title="Research Assistant") as demo:
398
+ gr.Markdown(f"""
399
+ <div class="header">
400
+ <div class="title">🧠 AI Research Assistant</div>
401
+ <div class="subtitle">Your intelligent research companion with multi-language support</div>
402
+ </div>
403
+ """)
404
 
405
+ gr.Markdown(f"""
406
+ <div class="version-info">
407
+ 🧬 Version: {VERSION['version']} | {VERSION['description']}
408
+ </div>
409
+ """)
 
 
 
 
 
 
 
 
 
410
 
411
+ with gr.Row():
412
+ with gr.Column(scale=1):
413
+ with gr.Card(elem_classes=["card", "input-section"]):
414
+ gr.Markdown("### 🔍 Research Input")
415
+ query_input = gr.Textbox(
416
+ label="Research Query",
417
+ placeholder="Enter your research question...",
418
+ lines=3
419
+ )
420
+ file_upload = gr.File(
421
+ label="📄 Upload Files (PDF, Images)",
422
+ file_types=[".pdf", ".png", ".jpg", ".jpeg"]
423
+ )
424
+ ddg_checkbox = gr.Checkbox(
425
+ label="Use DuckDuckGo Search (Alternative)",
426
+ value=False
427
+ )
428
+ submit_btn = gr.Button("Research", elem_classes=["btn-primary"])
429
+
430
+ gr.Markdown("### 💡 Example Queries")
431
+ examples = gr.Examples(
432
+ examples=[
433
+ "Latest advancements in quantum computing",
434
+ "Impact of climate change on global agriculture",
435
+ "Recent developments in Alzheimer's treatment research",
436
+ "Perkembangan terbaru dalam kecerdasan buatan",
437
+ "Dampak perubahan iklim terhadap pertanian di Indonesia"
438
+ ],
439
+ inputs=query_input
440
+ )
441
+
442
+ with gr.Column(scale=2):
443
+ with gr.Card(elem_classes=["card", "output-section"]):
444
+ gr.Markdown("### 📊 Analysis Results")
445
+ output = gr.Markdown(elem_classes=["streaming-content"])
446
+
447
+ status_indicator = gr.HTML("""
448
+ <div class="status-indicator">
449
+ <div class="spinner"></div>
450
+ <div class="progress-text">Ready for your research query</div>
451
+ </div>
452
+ """)
453
 
454
  submit_btn.click(
455
  fn=research_assistant,
456
+ inputs=[query_input, file_upload, ddg_checkbox],
457
  outputs=output
458
  )
459
 
460
  query_input.submit(
461
  fn=research_assistant,
462
+ inputs=[query_input, file_upload, ddg_checkbox],
463
  outputs=output
464
  )
465
+
466
+ gr.Markdown("""
467
+ <div class="footer">
468
+ <p>Built with ❤️ for researchers worldwide | Supports English and Bahasa Indonesia</p>
469
+ <p>Features: Multi-language support • File upload • Web search • Real-time analysis</p>
470
+ </div>
471
+ """)
472
 
473
  if __name__ == "__main__":
474
  demo.launch()
modules/analyzer.py CHANGED
@@ -20,14 +20,14 @@ class Analyzer:
20
 
21
  # Connect to Redis cache
22
  self.cache = RedisServerStatusCache()
23
-
24
  def is_server_ready(self):
25
  # Check cache first
26
  cached_status = self.cache.get(self.cache_key)
27
  if cached_status is not None:
28
  logging.info(f"Using cached server status: {cached_status}")
29
  return cached_status
30
-
31
  # Try multiple approaches to check if server is ready
32
  is_ready = False
33
 
@@ -61,12 +61,12 @@ class Analyzer:
61
  self.cache.set(self.cache_key, is_ready, ttl=60) # Cache for 1 minute
62
  log_server_status(self.cache_key, is_ready)
63
  return is_ready
64
-
65
  def wait_for_server(self, timeout=180, interval=15):
66
  if self.is_server_ready():
67
  logging.info("✅ Server is already ready (from cache or direct check).")
68
  return True
69
-
70
  logging.info("⏳ Server not ready. Starting polling...")
71
  start_time = time.time()
72
 
@@ -79,12 +79,11 @@ class Analyzer:
79
 
80
  logging.warning("⏰ Server initialization timeout reached")
81
  return False
82
-
83
  def analyze_stream(self, query, search_results):
84
- """
85
- Analyze search results using the custom LLM with streaming output
86
- Yields chunks of the response as they arrive
87
- """
88
  # Prepare context from search results
89
  context = "\n\n".join([
90
  f"Source: {result.get('url', 'N/A')}\nTitle: {result.get('title', 'N/A')}\nContent: {result.get('content', 'N/A')}"
@@ -92,22 +91,20 @@ class Analyzer:
92
  ])
93
 
94
  prompt = f"""
95
- You are an expert research analyst. Analyze the following query and information to provide a comprehensive summary.
96
-
97
- Query: {query}
98
 
99
- Information:
100
- {context}
101
 
102
- Please provide:
103
- 1. A brief overview of the topic
104
- 2. Key findings or developments
105
- 3. Different perspectives or approaches
106
- 4. Potential implications or future directions
107
- 5. Any controversies or conflicting viewpoints
108
 
109
- Structure your response clearly with these sections. If there is insufficient information, state that clearly.
110
- """
111
 
112
  try:
113
  # First check if server is ready
@@ -136,12 +133,13 @@ class Analyzer:
136
  for chunk in response:
137
  if chunk.choices[0].delta.content:
138
  yield chunk.choices[0].delta.content
139
-
140
  logging.info("Analysis streaming completed successfully")
141
 
142
  except Exception as e:
143
  error_msg = str(e)
144
  logging.error(f"Analysis streaming failed: {error_msg}")
 
145
  if "503" in error_msg or "Service Unavailable" in error_msg:
146
  yield "⚠️ The AI model server is currently unavailable. It may be initializing. Please try again in a few minutes."
147
  elif "timeout" in error_msg.lower() or "read timeout" in error_msg.lower():
@@ -150,11 +148,10 @@ class Analyzer:
150
  yield "⚠️ The AI model endpoint was not found. Please check the configuration."
151
  else:
152
  yield f"Analysis failed: {str(e)}"
153
-
154
  def analyze(self, query, search_results):
155
- """
156
- Non-streaming version for compatibility
157
- """
158
  # Prepare context from search results
159
  context = "\n\n".join([
160
  f"Source: {result.get('url', 'N/A')}\nTitle: {result.get('title', 'N/A')}\nContent: {result.get('content', 'N/A')}"
@@ -162,22 +159,20 @@ class Analyzer:
162
  ])
163
 
164
  prompt = f"""
165
- You are an expert research analyst. Analyze the following query and information to provide a comprehensive summary.
166
 
167
- Query: {query}
 
168
 
169
- Information:
170
- {context}
 
 
 
 
171
 
172
- Please provide:
173
- 1. A brief overview of the topic
174
- 2. Key findings or developments
175
- 3. Different perspectives or approaches
176
- 4. Potential implications or future directions
177
- 5. Any controversies or conflicting viewpoints
178
-
179
- Structure your response clearly with these sections. If there is insufficient information, state that clearly.
180
- """
181
 
182
  try:
183
  # First check if server is ready
@@ -188,6 +183,7 @@ class Analyzer:
188
  return error_msg
189
 
190
  logging.info("Server is ready. Sending request to AI model...")
 
191
  response = self.client.chat.completions.create(
192
  model="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
193
  messages=[
@@ -206,10 +202,12 @@ class Analyzer:
206
  except Exception as e:
207
  error_msg = str(e)
208
  logging.error(f"Analysis failed: {error_msg}")
 
209
  if "503" in error_msg or "Service Unavailable" in error_msg:
210
  return "⚠️ The AI model server is currently unavailable. It may be initializing. Please try again in a few minutes."
211
  elif "timeout" in error_msg.lower() or "read timeout" in error_msg.lower():
212
  return "⚠️ The AI model request timed out. The server may be overloaded or initializing. Please try again in a few minutes."
213
  elif "404" in error_msg:
214
  return "⚠️ The AI model endpoint was not found. Please check the configuration."
 
215
  return f"Analysis failed: {str(e)}"
 
20
 
21
  # Connect to Redis cache
22
  self.cache = RedisServerStatusCache()
23
+
24
  def is_server_ready(self):
25
  # Check cache first
26
  cached_status = self.cache.get(self.cache_key)
27
  if cached_status is not None:
28
  logging.info(f"Using cached server status: {cached_status}")
29
  return cached_status
30
+
31
  # Try multiple approaches to check if server is ready
32
  is_ready = False
33
 
 
61
  self.cache.set(self.cache_key, is_ready, ttl=60) # Cache for 1 minute
62
  log_server_status(self.cache_key, is_ready)
63
  return is_ready
64
+
65
  def wait_for_server(self, timeout=180, interval=15):
66
  if self.is_server_ready():
67
  logging.info("✅ Server is already ready (from cache or direct check).")
68
  return True
69
+
70
  logging.info("⏳ Server not ready. Starting polling...")
71
  start_time = time.time()
72
 
 
79
 
80
  logging.warning("⏰ Server initialization timeout reached")
81
  return False
82
+
83
  def analyze_stream(self, query, search_results):
84
+ """ Analyze search results using the custom LLM with streaming output
85
+ Yields chunks of the response as they arrive """
86
+
 
87
  # Prepare context from search results
88
  context = "\n\n".join([
89
  f"Source: {result.get('url', 'N/A')}\nTitle: {result.get('title', 'N/A')}\nContent: {result.get('content', 'N/A')}"
 
91
  ])
92
 
93
  prompt = f"""
94
+ You are an expert research analyst. Analyze the following query and information to provide a comprehensive summary.
 
 
95
 
96
+ Query: {query}
97
+ Information: {context}
98
 
99
+ Please provide:
100
+ 1. A brief overview of the topic
101
+ 2. Key findings or developments
102
+ 3. Different perspectives or approaches
103
+ 4. Potential implications or future directions
104
+ 5. Any controversies or conflicting viewpoints
105
 
106
+ Structure your response clearly with these sections. If there is insufficient information, state that clearly.
107
+ """
108
 
109
  try:
110
  # First check if server is ready
 
133
  for chunk in response:
134
  if chunk.choices[0].delta.content:
135
  yield chunk.choices[0].delta.content
136
+
137
  logging.info("Analysis streaming completed successfully")
138
 
139
  except Exception as e:
140
  error_msg = str(e)
141
  logging.error(f"Analysis streaming failed: {error_msg}")
142
+
143
  if "503" in error_msg or "Service Unavailable" in error_msg:
144
  yield "⚠️ The AI model server is currently unavailable. It may be initializing. Please try again in a few minutes."
145
  elif "timeout" in error_msg.lower() or "read timeout" in error_msg.lower():
 
148
  yield "⚠️ The AI model endpoint was not found. Please check the configuration."
149
  else:
150
  yield f"Analysis failed: {str(e)}"
151
+
152
  def analyze(self, query, search_results):
153
+ """ Non-streaming version for compatibility """
154
+
 
155
  # Prepare context from search results
156
  context = "\n\n".join([
157
  f"Source: {result.get('url', 'N/A')}\nTitle: {result.get('title', 'N/A')}\nContent: {result.get('content', 'N/A')}"
 
159
  ])
160
 
161
  prompt = f"""
162
+ You are an expert research analyst. Analyze the following query and information to provide a comprehensive summary.
163
 
164
+ Query: {query}
165
+ Information: {context}
166
 
167
+ Please provide:
168
+ 1. A brief overview of the topic
169
+ 2. Key findings or developments
170
+ 3. Different perspectives or approaches
171
+ 4. Potential implications or future directions
172
+ 5. Any controversies or conflicting viewpoints
173
 
174
+ Structure your response clearly with these sections. If there is insufficient information, state that clearly.
175
+ """
 
 
 
 
 
 
 
176
 
177
  try:
178
  # First check if server is ready
 
183
  return error_msg
184
 
185
  logging.info("Server is ready. Sending request to AI model...")
186
+
187
  response = self.client.chat.completions.create(
188
  model="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
189
  messages=[
 
202
  except Exception as e:
203
  error_msg = str(e)
204
  logging.error(f"Analysis failed: {error_msg}")
205
+
206
  if "503" in error_msg or "Service Unavailable" in error_msg:
207
  return "⚠️ The AI model server is currently unavailable. It may be initializing. Please try again in a few minutes."
208
  elif "timeout" in error_msg.lower() or "read timeout" in error_msg.lower():
209
  return "⚠️ The AI model request timed out. The server may be overloaded or initializing. Please try again in a few minutes."
210
  elif "404" in error_msg:
211
  return "⚠️ The AI model endpoint was not found. Please check the configuration."
212
+
213
  return f"Analysis failed: {str(e)}"
modules/citation.py CHANGED
@@ -1,11 +1,9 @@
1
  class CitationManager:
2
  def add_citations(self, analysis, search_results):
3
- """
4
- Add citations to the analysis based on source URLs
5
- """
6
  if not search_results:
7
  return analysis
8
-
9
  # Create a simple citation mapping
10
  citations = {}
11
  for i, result in enumerate(search_results):
@@ -18,15 +16,13 @@ class CitationManager:
18
 
19
  # Add citation references to analysis
20
  cited_analysis = analysis
 
21
  # In a more sophisticated implementation, we would match claims to sources
22
  # For now, we'll just append the citation list
23
-
24
  return cited_analysis, citations
25
 
26
  def format_bibliography(self, citations):
27
- """
28
- Format citations into a bibliography
29
- """
30
  bib_items = []
31
  for cite_id, info in citations.items():
32
  bib_item = f"{cite_id} {info['title']}. {info['source']}. Retrieved from: {info['url']}"
 
1
  class CitationManager:
2
  def add_citations(self, analysis, search_results):
3
+ """ Add citations to the analysis based on source URLs """
 
 
4
  if not search_results:
5
  return analysis
6
+
7
  # Create a simple citation mapping
8
  citations = {}
9
  for i, result in enumerate(search_results):
 
16
 
17
  # Add citation references to analysis
18
  cited_analysis = analysis
19
+
20
  # In a more sophisticated implementation, we would match claims to sources
21
  # For now, we'll just append the citation list
 
22
  return cited_analysis, citations
23
 
24
  def format_bibliography(self, citations):
25
+ """ Format citations into a bibliography """
 
 
26
  bib_items = []
27
  for cite_id, info in citations.items():
28
  bib_item = f"{cite_id} {info['title']}. {info['source']}. Retrieved from: {info['url']}"
modules/formatter.py CHANGED
@@ -1,8 +1,6 @@
1
  class OutputFormatter:
2
  def format_response(self, analysis_result, search_results):
3
- """
4
- Format the final response with proper structure
5
- """
6
  if isinstance(analysis_result, tuple):
7
  analysis, citations = analysis_result
8
  else:
@@ -16,12 +14,12 @@ class OutputFormatter:
16
  if search_results:
17
  formatted_output += "## Sources\n"
18
  for i, result in enumerate(search_results):
19
- formatted_output += f"{i+1}. [{result.get('title', 'Untitled')}]({result.get('url', '')})\n"
20
 
21
  # Add citation details if available
22
  if citations:
23
  formatted_output += "\n## Detailed Citations\n"
24
  for cite_id, info in citations.items():
25
- formatted_output += f"- {cite_id} **{info['title']}** - {info['source']}: {info['url']}\n"
26
 
27
  return formatted_output
 
1
  class OutputFormatter:
2
  def format_response(self, analysis_result, search_results):
3
+ """ Format the final response with proper structure """
 
 
4
  if isinstance(analysis_result, tuple):
5
  analysis, citations = analysis_result
6
  else:
 
14
  if search_results:
15
  formatted_output += "## Sources\n"
16
  for i, result in enumerate(search_results):
17
+ formatted_output += f"{i+1}. [{result.get('title', 'Untitled')}]({result.get('url', '#')})\n"
18
 
19
  # Add citation details if available
20
  if citations:
21
  formatted_output += "\n## Detailed Citations\n"
22
  for cite_id, info in citations.items():
23
+ formatted_output += f"- {cite_id} {info['title']} - {info['source']}: {info['url']}\n"
24
 
25
  return formatted_output
modules/input_handler.py CHANGED
@@ -1,23 +1,22 @@
1
  class InputHandler:
2
  def process_query(self, query):
3
- """
4
- Process and validate user input
5
- """
6
  # Clean and normalize query
7
  cleaned_query = query.strip()
8
 
9
  # Add context if needed
10
  if len(cleaned_query) < 5:
11
  raise ValueError("Query too short. Please provide more details.")
12
-
13
  return cleaned_query
14
 
15
  def extract_keywords(self, query):
16
- """
17
- Extract important keywords from query
18
- """
19
  # Simple keyword extraction (could be enhanced with NLP)
20
- stop_words = {'the', 'is', 'at', 'which', 'on', 'in', 'for', 'of', 'with', 'by'}
 
 
 
21
  words = query.lower().split()
22
  keywords = [word for word in words if word not in stop_words]
23
  return keywords
 
1
  class InputHandler:
2
  def process_query(self, query):
3
+ """ Process and validate user input """
 
 
4
  # Clean and normalize query
5
  cleaned_query = query.strip()
6
 
7
  # Add context if needed
8
  if len(cleaned_query) < 5:
9
  raise ValueError("Query too short. Please provide more details.")
10
+
11
  return cleaned_query
12
 
13
  def extract_keywords(self, query):
14
+ """ Extract important keywords from query """
 
 
15
  # Simple keyword extraction (could be enhanced with NLP)
16
+ stop_words = {
17
+ 'the', 'is', 'at', 'which', 'on', 'in', 'for', 'of', 'with', 'by',
18
+ 'dan', 'di', 'ke', 'dari', 'pada', 'untuk', 'oleh', 'sebagai'
19
+ }
20
  words = query.lower().split()
21
  keywords = [word for word in words if word not in stop_words]
22
  return keywords
modules/retriever.py CHANGED
@@ -4,11 +4,9 @@ import logging
4
  class Retriever:
5
  def __init__(self, api_key):
6
  self.client = TavilyClient(api_key=api_key)
7
-
8
  def search(self, query, max_results=5):
9
- """
10
- Search for relevant content using Tavily API
11
- """
12
  try:
13
  response = self.client.search(
14
  query=query,
@@ -23,13 +21,10 @@ class Retriever:
23
  return []
24
 
25
  def get_related_queries(self, query):
26
- """
27
- Generate related search queries
28
- """
29
  # This could be enhanced with LLM-based query expansion
30
  return [
31
  f"{query} research paper",
32
  f"{query} latest developments",
33
  f"{query} pros and cons"
34
- ]
35
-
 
4
  class Retriever:
5
  def __init__(self, api_key):
6
  self.client = TavilyClient(api_key=api_key)
7
+
8
  def search(self, query, max_results=5):
9
+ """ Search for relevant content using Tavily API """
 
 
10
  try:
11
  response = self.client.search(
12
  query=query,
 
21
  return []
22
 
23
  def get_related_queries(self, query):
24
+ """ Generate related search queries """
 
 
25
  # This could be enhanced with LLM-based query expansion
26
  return [
27
  f"{query} research paper",
28
  f"{query} latest developments",
29
  f"{query} pros and cons"
30
+ ]
 
modules/server_cache.py CHANGED
@@ -18,8 +18,9 @@ class RedisServerStatusCache:
18
  self.fallback_cache = {}
19
  self.use_redis = False
20
  logging.warning(f"Redis not available, using in-memory cache: {e}")
 
21
  self.default_ttl = default_ttl
22
-
23
  def get(self, server_key):
24
  try:
25
  if self.use_redis and self.redis:
@@ -40,7 +41,7 @@ class RedisServerStatusCache:
40
  except Exception as e:
41
  logging.error(f"Cache get error: {e}")
42
  return None
43
-
44
  def set(self, server_key, status, ttl=None):
45
  try:
46
  ttl = ttl or self.default_ttl
 
18
  self.fallback_cache = {}
19
  self.use_redis = False
20
  logging.warning(f"Redis not available, using in-memory cache: {e}")
21
+
22
  self.default_ttl = default_ttl
23
+
24
  def get(self, server_key):
25
  try:
26
  if self.use_redis and self.redis:
 
41
  except Exception as e:
42
  logging.error(f"Cache get error: {e}")
43
  return None
44
+
45
  def set(self, server_key, status, ttl=None):
46
  try:
47
  ttl = ttl or self.default_ttl
modules/status_logger.py CHANGED
@@ -39,6 +39,6 @@ def log_analysis_result(query, success, message=""):
39
  with open(ANALYSIS_LOG_FILE, 'a') as f:
40
  if not file_exists:
41
  f.write("timestamp,query,result,message\n")
42
- f.write(f"{timestamp},\"{query}\",{result_str},\"{message}\"\n")
43
  except Exception as e:
44
  logging.error(f"Failed to log analysis result: {e}")
 
39
  with open(ANALYSIS_LOG_FILE, 'a') as f:
40
  if not file_exists:
41
  f.write("timestamp,query,result,message\n")
42
+ f.write(f'{timestamp},"{query}",{result_str},"{message}"\n')
43
  except Exception as e:
44
  logging.error(f"Failed to log analysis result: {e}")
requirements.txt CHANGED
@@ -6,4 +6,10 @@ requests>=2.31.0
6
  pytest==8.3.3
7
  redis>=4.0.0
8
  pandas>=1.5.0
9
- matplotlib>=3.5.0
 
 
 
 
 
 
 
6
  pytest==8.3.3
7
  redis>=4.0.0
8
  pandas>=1.5.0
9
+ matplotlib>=3.5.0
10
+ duckduckgo-search>=6.1.0
11
+ googletrans==4.0.0-rc1
12
+ PyPDF2>=3.0.0
13
+ Pillow>=10.0.0
14
+ pytesseract>=0.3.10
15
+ datasets>=2.14.0
version.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "version": "133",
4
+ "description": "Enhanced assistant with file upload, DDG search, multi-language support",
5
+ "date": "2025-04-05"
6
+ }