Spaces:
Runtime error
Runtime error
133
Browse files- app.py +331 -57
- modules/analyzer.py +38 -40
- modules/citation.py +4 -8
- modules/formatter.py +3 -5
- modules/input_handler.py +7 -8
- modules/retriever.py +4 -9
- modules/server_cache.py +3 -2
- modules/status_logger.py +1 -1
- requirements.txt +7 -1
- version.json +6 -0
app.py
CHANGED
|
@@ -1,12 +1,22 @@
|
|
| 1 |
# app.py
|
| 2 |
import gradio as gr
|
| 3 |
import logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
from modules.input_handler import InputHandler
|
| 5 |
from modules.retriever import Retriever
|
| 6 |
from modules.analyzer import Analyzer
|
| 7 |
from modules.citation import CitationManager
|
| 8 |
from modules.formatter import OutputFormatter
|
| 9 |
-
import os
|
| 10 |
|
| 11 |
# Configure logging
|
| 12 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
@@ -18,13 +28,109 @@ class ResearchOrchestrator:
|
|
| 18 |
self.analyzer = analyzer
|
| 19 |
self.citation_manager = citation_manager
|
| 20 |
self.formatter = formatter
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
"""Execute the research pipeline with streaming updates"""
|
| 24 |
try:
|
| 25 |
progress(0.0, desc="Starting research...")
|
| 26 |
logging.info(f"Starting research for query: {query}")
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# Step 1: Process input
|
| 29 |
progress(0.1, desc="🔍 Processing your query...")
|
| 30 |
processed_query = self.input_handler.process_query(query)
|
|
@@ -32,7 +138,7 @@ class ResearchOrchestrator:
|
|
| 32 |
|
| 33 |
# Step 2: Retrieve data
|
| 34 |
progress(0.3, desc="🌐 Searching for relevant information...")
|
| 35 |
-
search_results = self.
|
| 36 |
|
| 37 |
if not search_results:
|
| 38 |
result = "⚠️ No relevant information found for your query. Please try rephrasing."
|
|
@@ -71,8 +177,21 @@ class ResearchOrchestrator:
|
|
| 71 |
formatted_output = self.formatter.format_response(cited_analysis, search_results)
|
| 72 |
logging.info("Response formatted successfully")
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
# Add completion notification
|
| 75 |
progress(1.0, desc="✅ Research complete!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
if len(search_results) >= 3:
|
| 77 |
completion_message = "\n\n---\n[ANALYSIS COMPLETE] ✅ Research finished with sufficient sources."
|
| 78 |
else:
|
|
@@ -91,30 +210,33 @@ CONFIG = {
|
|
| 91 |
"hf_api_base": "https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
|
| 92 |
"hf_api_key": os.getenv("HF_TOKEN"),
|
| 93 |
"tavily_api_key": os.getenv("TAVILY_API_KEY"),
|
|
|
|
| 94 |
}
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
# Initialize modules with error handling
|
| 97 |
def initialize_modules():
|
| 98 |
"""Initialize all modules with proper error handling"""
|
| 99 |
try:
|
| 100 |
if not CONFIG["tavily_api_key"]:
|
| 101 |
raise ValueError("TAVILY_API_KEY environment variable is not set")
|
| 102 |
-
|
| 103 |
if not CONFIG["hf_api_key"]:
|
| 104 |
raise ValueError("HF_TOKEN environment variable is not set")
|
| 105 |
-
|
| 106 |
input_handler = InputHandler()
|
| 107 |
retriever = Retriever(api_key=CONFIG["tavily_api_key"])
|
| 108 |
analyzer = Analyzer(base_url=CONFIG["hf_api_base"], api_key=CONFIG["hf_api_key"])
|
| 109 |
citation_manager = CitationManager()
|
| 110 |
formatter = OutputFormatter()
|
| 111 |
-
|
| 112 |
return ResearchOrchestrator(
|
| 113 |
-
input_handler,
|
| 114 |
-
retriever,
|
| 115 |
-
analyzer,
|
| 116 |
-
citation_manager,
|
| 117 |
-
formatter
|
| 118 |
)
|
| 119 |
except Exception as e:
|
| 120 |
logging.error(f"Failed to initialize modules: {str(e)}")
|
|
@@ -123,78 +245,230 @@ def initialize_modules():
|
|
| 123 |
# Initialize orchestrator
|
| 124 |
orchestrator = initialize_modules()
|
| 125 |
|
| 126 |
-
# Custom CSS for
|
| 127 |
custom_css = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
.spinner {
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
}
|
| 138 |
.streaming-content {
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
}
|
| 146 |
@keyframes spin {
|
| 147 |
-
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
}
|
| 150 |
"""
|
| 151 |
|
| 152 |
-
def research_assistant(query, progress=gr.Progress()):
|
| 153 |
"""Main entry point for the research assistant with streaming"""
|
| 154 |
logging.info(f"Research assistant called with query: {query}")
|
| 155 |
-
for step in orchestrator.run(query, progress):
|
| 156 |
yield step
|
| 157 |
|
| 158 |
# Create Gradio interface
|
| 159 |
with gr.Blocks(css=custom_css, title="Research Assistant") as demo:
|
| 160 |
-
gr.Markdown("
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
lines=3
|
| 169 |
-
)
|
| 170 |
-
submit_btn = gr.Button("Research", variant="primary")
|
| 171 |
-
|
| 172 |
-
with gr.Column():
|
| 173 |
-
output = gr.Markdown(label="Analysis Results", elem_classes=["streaming-content"])
|
| 174 |
-
|
| 175 |
-
# Status indicator with spinner
|
| 176 |
-
status_indicator = gr.HTML("<div id='status'><span class='spinner'></span> Ready for your research query</div>")
|
| 177 |
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
"
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
submit_btn.click(
|
| 188 |
fn=research_assistant,
|
| 189 |
-
inputs=query_input,
|
| 190 |
outputs=output
|
| 191 |
)
|
| 192 |
|
| 193 |
query_input.submit(
|
| 194 |
fn=research_assistant,
|
| 195 |
-
inputs=query_input,
|
| 196 |
outputs=output
|
| 197 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
if __name__ == "__main__":
|
| 200 |
demo.launch()
|
|
|
|
| 1 |
# app.py
|
| 2 |
import gradio as gr
|
| 3 |
import logging
|
| 4 |
+
import os
|
| 5 |
+
import json
|
| 6 |
+
import requests
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from PyPDF2 import PdfReader
|
| 9 |
+
from PIL import Image
|
| 10 |
+
import pytesseract
|
| 11 |
+
from duckduckgo_search import DDGS
|
| 12 |
+
from googletrans import Translator
|
| 13 |
+
from datasets import load_dataset
|
| 14 |
+
|
| 15 |
from modules.input_handler import InputHandler
|
| 16 |
from modules.retriever import Retriever
|
| 17 |
from modules.analyzer import Analyzer
|
| 18 |
from modules.citation import CitationManager
|
| 19 |
from modules.formatter import OutputFormatter
|
|
|
|
| 20 |
|
| 21 |
# Configure logging
|
| 22 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
| 28 |
self.analyzer = analyzer
|
| 29 |
self.citation_manager = citation_manager
|
| 30 |
self.formatter = formatter
|
| 31 |
+
self.translator = Translator()
|
| 32 |
+
self.chat_history = []
|
| 33 |
+
|
| 34 |
+
def process_file(self, file):
|
| 35 |
+
"""Process uploaded files (PDF, images)"""
|
| 36 |
+
if not file:
|
| 37 |
+
return ""
|
| 38 |
+
|
| 39 |
+
file_path = file.name
|
| 40 |
+
file_ext = file_path.split('.')[-1].lower()
|
| 41 |
+
|
| 42 |
+
if file_ext == 'pdf':
|
| 43 |
+
try:
|
| 44 |
+
reader = PdfReader(file_path)
|
| 45 |
+
text = ""
|
| 46 |
+
for page in reader.pages:
|
| 47 |
+
text += page.extract_text() + "\n"
|
| 48 |
+
return f"\n\n[PDF Content]: {text[:1000]}..." # Limit content size
|
| 49 |
+
except Exception as e:
|
| 50 |
+
logging.error(f"PDF processing error: {e}")
|
| 51 |
+
return "\n\n[Error processing PDF file]"
|
| 52 |
+
|
| 53 |
+
elif file_ext in ['png', 'jpg', 'jpeg']:
|
| 54 |
+
try:
|
| 55 |
+
image = Image.open(file_path)
|
| 56 |
+
text = pytesseract.image_to_string(image)
|
| 57 |
+
return f"\n\n[Image Text]: {text[:500]}..." # Limit content size
|
| 58 |
+
except Exception as e:
|
| 59 |
+
logging.error(f"Image processing error: {e}")
|
| 60 |
+
return "\n\n[Error processing image file]"
|
| 61 |
+
|
| 62 |
+
return ""
|
| 63 |
+
|
| 64 |
+
def search_with_fallback(self, query, use_ddg=False):
|
| 65 |
+
"""Search with fallback to DDG if Tavily fails"""
|
| 66 |
+
if use_ddg:
|
| 67 |
+
try:
|
| 68 |
+
with DDGS() as ddgs:
|
| 69 |
+
results = []
|
| 70 |
+
for r in ddgs.text(query, max_results=5):
|
| 71 |
+
results.append({
|
| 72 |
+
'title': r.get('title', 'No title'),
|
| 73 |
+
'url': r.get('href', 'No URL'),
|
| 74 |
+
'content': r.get('body', 'No content')
|
| 75 |
+
})
|
| 76 |
+
return results
|
| 77 |
+
except Exception as e:
|
| 78 |
+
logging.error(f"DDG search failed: {e}")
|
| 79 |
+
return []
|
| 80 |
+
else:
|
| 81 |
+
try:
|
| 82 |
+
return self.retriever.search(query)
|
| 83 |
+
except Exception as e:
|
| 84 |
+
logging.error(f"Tavily search failed: {e}")
|
| 85 |
+
# Fallback to DDG
|
| 86 |
+
return self.search_with_fallback(query, use_ddg=True)
|
| 87 |
+
|
| 88 |
+
def get_time_weather(self):
|
| 89 |
+
"""Get current time and weather for Boston and Bogor"""
|
| 90 |
+
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 91 |
+
|
| 92 |
+
# For demo purposes, using mock weather data
|
| 93 |
+
# In production, you would use a real weather API
|
| 94 |
+
boston_weather = {
|
| 95 |
+
"temp": 22,
|
| 96 |
+
"desc": "Partly cloudy"
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
bogor_weather = {
|
| 100 |
+
"temp": 28,
|
| 101 |
+
"desc": "Thunderstorms"
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
return {
|
| 105 |
+
"time": current_time,
|
| 106 |
+
"boston": boston_weather,
|
| 107 |
+
"bogor": bogor_weather
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
def run(self, query, file=None, use_ddg=False, progress=gr.Progress()):
|
| 111 |
"""Execute the research pipeline with streaming updates"""
|
| 112 |
try:
|
| 113 |
progress(0.0, desc="Starting research...")
|
| 114 |
logging.info(f"Starting research for query: {query}")
|
| 115 |
|
| 116 |
+
# Add time/weather info
|
| 117 |
+
time_weather = self.get_time_weather()
|
| 118 |
+
|
| 119 |
+
# Process file if uploaded
|
| 120 |
+
file_content = self.process_file(file)
|
| 121 |
+
if file_content:
|
| 122 |
+
query += file_content
|
| 123 |
+
|
| 124 |
+
# Detect and translate if needed
|
| 125 |
+
try:
|
| 126 |
+
detected_lang = self.translator.detect(query).lang
|
| 127 |
+
if detected_lang not in ['en', 'English']:
|
| 128 |
+
translated_query = self.translator.translate(query, src=detected_lang, dest='en').text
|
| 129 |
+
query = translated_query
|
| 130 |
+
logging.info(f"Translated query from {detected_lang} to English")
|
| 131 |
+
except Exception as e:
|
| 132 |
+
logging.warning(f"Translation failed: {e}")
|
| 133 |
+
|
| 134 |
# Step 1: Process input
|
| 135 |
progress(0.1, desc="🔍 Processing your query...")
|
| 136 |
processed_query = self.input_handler.process_query(query)
|
|
|
|
| 138 |
|
| 139 |
# Step 2: Retrieve data
|
| 140 |
progress(0.3, desc="🌐 Searching for relevant information...")
|
| 141 |
+
search_results = self.search_with_fallback(processed_query, use_ddg)
|
| 142 |
|
| 143 |
if not search_results:
|
| 144 |
result = "⚠️ No relevant information found for your query. Please try rephrasing."
|
|
|
|
| 177 |
formatted_output = self.formatter.format_response(cited_analysis, search_results)
|
| 178 |
logging.info("Response formatted successfully")
|
| 179 |
|
| 180 |
+
# Add time/weather info
|
| 181 |
+
formatted_output += f"\n\n### 📅 Current Time: {time_weather['time']}"
|
| 182 |
+
formatted_output += f"\n### 🌤 Weather in Boston: {time_weather['boston']['temp']}°C, {time_weather['boston']['desc']}"
|
| 183 |
+
formatted_output += f"\n### 🌧 Weather in Bogor: {time_weather['bogor']['temp']}°C, {time_weather['bogor']['desc']}"
|
| 184 |
+
|
| 185 |
# Add completion notification
|
| 186 |
progress(1.0, desc="✅ Research complete!")
|
| 187 |
+
|
| 188 |
+
# Translate back if needed
|
| 189 |
+
if 'detected_lang' in locals() and detected_lang not in ['en', 'English']:
|
| 190 |
+
try:
|
| 191 |
+
formatted_output = self.translator.translate(formatted_output, src='en', dest=detected_lang).text
|
| 192 |
+
except Exception as e:
|
| 193 |
+
logging.warning(f"Back-translation failed: {e}")
|
| 194 |
+
|
| 195 |
if len(search_results) >= 3:
|
| 196 |
completion_message = "\n\n---\n[ANALYSIS COMPLETE] ✅ Research finished with sufficient sources."
|
| 197 |
else:
|
|
|
|
| 210 |
"hf_api_base": "https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
|
| 211 |
"hf_api_key": os.getenv("HF_TOKEN"),
|
| 212 |
"tavily_api_key": os.getenv("TAVILY_API_KEY"),
|
| 213 |
+
"openweather_api_key": os.getenv("OPENWEATHER_API_KEY", "demo_key")
|
| 214 |
}
|
| 215 |
|
| 216 |
+
# Load version info
|
| 217 |
+
try:
|
| 218 |
+
with open("version.json", "r") as f:
|
| 219 |
+
VERSION = json.load(f)
|
| 220 |
+
except FileNotFoundError:
|
| 221 |
+
VERSION = {"version": "133", "description": "Initial release"}
|
| 222 |
+
|
| 223 |
# Initialize modules with error handling
|
| 224 |
def initialize_modules():
|
| 225 |
"""Initialize all modules with proper error handling"""
|
| 226 |
try:
|
| 227 |
if not CONFIG["tavily_api_key"]:
|
| 228 |
raise ValueError("TAVILY_API_KEY environment variable is not set")
|
|
|
|
| 229 |
if not CONFIG["hf_api_key"]:
|
| 230 |
raise ValueError("HF_TOKEN environment variable is not set")
|
| 231 |
+
|
| 232 |
input_handler = InputHandler()
|
| 233 |
retriever = Retriever(api_key=CONFIG["tavily_api_key"])
|
| 234 |
analyzer = Analyzer(base_url=CONFIG["hf_api_base"], api_key=CONFIG["hf_api_key"])
|
| 235 |
citation_manager = CitationManager()
|
| 236 |
formatter = OutputFormatter()
|
| 237 |
+
|
| 238 |
return ResearchOrchestrator(
|
| 239 |
+
input_handler, retriever, analyzer, citation_manager, formatter
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
)
|
| 241 |
except Exception as e:
|
| 242 |
logging.error(f"Failed to initialize modules: {str(e)}")
|
|
|
|
| 245 |
# Initialize orchestrator
|
| 246 |
orchestrator = initialize_modules()
|
| 247 |
|
| 248 |
+
# Custom CSS for enhanced UI
|
| 249 |
custom_css = """
|
| 250 |
+
body {
|
| 251 |
+
background: linear-gradient(135deg, #e0f7fa, #f5f5f5);
|
| 252 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 253 |
+
color: #003366;
|
| 254 |
+
}
|
| 255 |
+
.container {
|
| 256 |
+
max-width: 1200px;
|
| 257 |
+
margin: 0 auto;
|
| 258 |
+
padding: 20px;
|
| 259 |
+
}
|
| 260 |
+
.header {
|
| 261 |
+
text-align: center;
|
| 262 |
+
margin-bottom: 30px;
|
| 263 |
+
padding: 20px;
|
| 264 |
+
background: linear-gradient(90deg, #007BFF, #0056b3);
|
| 265 |
+
color: white;
|
| 266 |
+
border-radius: 10px;
|
| 267 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
|
| 268 |
+
}
|
| 269 |
+
.title {
|
| 270 |
+
font-size: 2.5em;
|
| 271 |
+
margin-bottom: 10px;
|
| 272 |
+
}
|
| 273 |
+
.subtitle {
|
| 274 |
+
font-size: 1.2em;
|
| 275 |
+
opacity: 0.9;
|
| 276 |
+
}
|
| 277 |
+
.card {
|
| 278 |
+
background: white;
|
| 279 |
+
border-radius: 10px;
|
| 280 |
+
box-shadow: 0 4px 12px rgba(0,0,0,0.08);
|
| 281 |
+
padding: 25px;
|
| 282 |
+
margin-bottom: 25px;
|
| 283 |
+
transition: transform 0.3s ease, box-shadow 0.3s ease;
|
| 284 |
+
}
|
| 285 |
+
.card:hover {
|
| 286 |
+
transform: translateY(-5px);
|
| 287 |
+
box-shadow: 0 6px 16px rgba(0,0,0,0.12);
|
| 288 |
+
}
|
| 289 |
+
.input-section {
|
| 290 |
+
background: #e3f2fd;
|
| 291 |
+
}
|
| 292 |
+
.output-section {
|
| 293 |
+
background: #f8f9fa;
|
| 294 |
+
}
|
| 295 |
+
.btn-primary {
|
| 296 |
+
background: linear-gradient(90deg, #007BFF, #0056b3);
|
| 297 |
+
color: white;
|
| 298 |
+
border: none;
|
| 299 |
+
padding: 12px 25px;
|
| 300 |
+
font-size: 16px;
|
| 301 |
+
border-radius: 30px;
|
| 302 |
+
cursor: pointer;
|
| 303 |
+
transition: all 0.3s ease;
|
| 304 |
+
font-weight: 600;
|
| 305 |
+
box-shadow: 0 4px 8px rgba(0,123,255,0.3);
|
| 306 |
+
}
|
| 307 |
+
.btn-primary:hover {
|
| 308 |
+
transform: translateY(-3px);
|
| 309 |
+
box-shadow: 0 6px 12px rgba(0,123,255,0.4);
|
| 310 |
+
background: linear-gradient(90deg, #0056b3, #003d7a);
|
| 311 |
+
}
|
| 312 |
+
.btn-primary:active {
|
| 313 |
+
transform: translateY(1px);
|
| 314 |
+
box-shadow: 0 2px 4px rgba(0,123,255,0.3);
|
| 315 |
+
}
|
| 316 |
.spinner {
|
| 317 |
+
border: 4px solid #f3f3f3;
|
| 318 |
+
border-top: 4px solid #007BFF;
|
| 319 |
+
border-radius: 50%;
|
| 320 |
+
width: 24px;
|
| 321 |
+
height: 24px;
|
| 322 |
+
animation: spin 1s linear infinite;
|
| 323 |
+
display: inline-block;
|
| 324 |
+
margin-right: 8px;
|
| 325 |
}
|
| 326 |
.streaming-content {
|
| 327 |
+
white-space: pre-wrap;
|
| 328 |
+
font-family: 'Courier New', monospace;
|
| 329 |
+
background-color: #f0f8ff;
|
| 330 |
+
padding: 15px;
|
| 331 |
+
border-radius: 8px;
|
| 332 |
+
margin: 15px 0;
|
| 333 |
+
border-left: 4px solid #007BFF;
|
| 334 |
+
box-shadow: inset 0 0 5px rgba(0,0,0,0.05);
|
| 335 |
+
}
|
| 336 |
+
.chat-history {
|
| 337 |
+
max-height: 400px;
|
| 338 |
+
overflow-y: auto;
|
| 339 |
+
padding: 15px;
|
| 340 |
+
background: #ffffff;
|
| 341 |
+
border-radius: 8px;
|
| 342 |
+
border: 1px solid #e0e0e0;
|
| 343 |
+
margin-bottom: 20px;
|
| 344 |
+
}
|
| 345 |
+
.version-info {
|
| 346 |
+
text-align: center;
|
| 347 |
+
padding: 15px;
|
| 348 |
+
background: #fff8e1;
|
| 349 |
+
border-radius: 8px;
|
| 350 |
+
margin-top: 20px;
|
| 351 |
+
border: 1px solid #ffecb3;
|
| 352 |
+
color: #5d4037;
|
| 353 |
+
}
|
| 354 |
+
.examples {
|
| 355 |
+
background: #e8f5e9;
|
| 356 |
+
border-left: 4px solid #4caf50;
|
| 357 |
}
|
| 358 |
@keyframes spin {
|
| 359 |
+
0% { transform: rotate(0deg); }
|
| 360 |
+
100% { transform: rotate(360deg); }
|
| 361 |
+
}
|
| 362 |
+
.status-indicator {
|
| 363 |
+
display: flex;
|
| 364 |
+
align-items: center;
|
| 365 |
+
justify-content: center;
|
| 366 |
+
padding: 10px;
|
| 367 |
+
background: #e3f2fd;
|
| 368 |
+
border-radius: 8px;
|
| 369 |
+
margin: 10px 0;
|
| 370 |
+
}
|
| 371 |
+
.progress-text {
|
| 372 |
+
margin-left: 10px;
|
| 373 |
+
font-weight: 500;
|
| 374 |
+
}
|
| 375 |
+
.footer {
|
| 376 |
+
text-align: center;
|
| 377 |
+
margin-top: 30px;
|
| 378 |
+
padding: 20px;
|
| 379 |
+
color: #666;
|
| 380 |
+
font-size: 0.9em;
|
| 381 |
+
}
|
| 382 |
+
.highlight {
|
| 383 |
+
background: linear-gradient(120deg, #ffebee, #fff3e0);
|
| 384 |
+
padding: 3px 6px;
|
| 385 |
+
border-radius: 4px;
|
| 386 |
+
font-weight: 500;
|
| 387 |
}
|
| 388 |
"""
|
| 389 |
|
| 390 |
+
def research_assistant(query, file, use_ddg, progress=gr.Progress()):
|
| 391 |
"""Main entry point for the research assistant with streaming"""
|
| 392 |
logging.info(f"Research assistant called with query: {query}")
|
| 393 |
+
for step in orchestrator.run(query, file, use_ddg, progress):
|
| 394 |
yield step
|
| 395 |
|
| 396 |
# Create Gradio interface
|
| 397 |
with gr.Blocks(css=custom_css, title="Research Assistant") as demo:
|
| 398 |
+
gr.Markdown(f"""
|
| 399 |
+
<div class="header">
|
| 400 |
+
<div class="title">🧠 AI Research Assistant</div>
|
| 401 |
+
<div class="subtitle">Your intelligent research companion with multi-language support</div>
|
| 402 |
+
</div>
|
| 403 |
+
""")
|
| 404 |
|
| 405 |
+
gr.Markdown(f"""
|
| 406 |
+
<div class="version-info">
|
| 407 |
+
🧬 Version: {VERSION['version']} | {VERSION['description']}
|
| 408 |
+
</div>
|
| 409 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
|
| 411 |
+
with gr.Row():
|
| 412 |
+
with gr.Column(scale=1):
|
| 413 |
+
with gr.Card(elem_classes=["card", "input-section"]):
|
| 414 |
+
gr.Markdown("### 🔍 Research Input")
|
| 415 |
+
query_input = gr.Textbox(
|
| 416 |
+
label="Research Query",
|
| 417 |
+
placeholder="Enter your research question...",
|
| 418 |
+
lines=3
|
| 419 |
+
)
|
| 420 |
+
file_upload = gr.File(
|
| 421 |
+
label="📄 Upload Files (PDF, Images)",
|
| 422 |
+
file_types=[".pdf", ".png", ".jpg", ".jpeg"]
|
| 423 |
+
)
|
| 424 |
+
ddg_checkbox = gr.Checkbox(
|
| 425 |
+
label="Use DuckDuckGo Search (Alternative)",
|
| 426 |
+
value=False
|
| 427 |
+
)
|
| 428 |
+
submit_btn = gr.Button("Research", elem_classes=["btn-primary"])
|
| 429 |
+
|
| 430 |
+
gr.Markdown("### 💡 Example Queries")
|
| 431 |
+
examples = gr.Examples(
|
| 432 |
+
examples=[
|
| 433 |
+
"Latest advancements in quantum computing",
|
| 434 |
+
"Impact of climate change on global agriculture",
|
| 435 |
+
"Recent developments in Alzheimer's treatment research",
|
| 436 |
+
"Perkembangan terbaru dalam kecerdasan buatan",
|
| 437 |
+
"Dampak perubahan iklim terhadap pertanian di Indonesia"
|
| 438 |
+
],
|
| 439 |
+
inputs=query_input
|
| 440 |
+
)
|
| 441 |
+
|
| 442 |
+
with gr.Column(scale=2):
|
| 443 |
+
with gr.Card(elem_classes=["card", "output-section"]):
|
| 444 |
+
gr.Markdown("### 📊 Analysis Results")
|
| 445 |
+
output = gr.Markdown(elem_classes=["streaming-content"])
|
| 446 |
+
|
| 447 |
+
status_indicator = gr.HTML("""
|
| 448 |
+
<div class="status-indicator">
|
| 449 |
+
<div class="spinner"></div>
|
| 450 |
+
<div class="progress-text">Ready for your research query</div>
|
| 451 |
+
</div>
|
| 452 |
+
""")
|
| 453 |
|
| 454 |
submit_btn.click(
|
| 455 |
fn=research_assistant,
|
| 456 |
+
inputs=[query_input, file_upload, ddg_checkbox],
|
| 457 |
outputs=output
|
| 458 |
)
|
| 459 |
|
| 460 |
query_input.submit(
|
| 461 |
fn=research_assistant,
|
| 462 |
+
inputs=[query_input, file_upload, ddg_checkbox],
|
| 463 |
outputs=output
|
| 464 |
)
|
| 465 |
+
|
| 466 |
+
gr.Markdown("""
|
| 467 |
+
<div class="footer">
|
| 468 |
+
<p>Built with ❤️ for researchers worldwide | Supports English and Bahasa Indonesia</p>
|
| 469 |
+
<p>Features: Multi-language support • File upload • Web search • Real-time analysis</p>
|
| 470 |
+
</div>
|
| 471 |
+
""")
|
| 472 |
|
| 473 |
if __name__ == "__main__":
|
| 474 |
demo.launch()
|
modules/analyzer.py
CHANGED
|
@@ -20,14 +20,14 @@ class Analyzer:
|
|
| 20 |
|
| 21 |
# Connect to Redis cache
|
| 22 |
self.cache = RedisServerStatusCache()
|
| 23 |
-
|
| 24 |
def is_server_ready(self):
|
| 25 |
# Check cache first
|
| 26 |
cached_status = self.cache.get(self.cache_key)
|
| 27 |
if cached_status is not None:
|
| 28 |
logging.info(f"Using cached server status: {cached_status}")
|
| 29 |
return cached_status
|
| 30 |
-
|
| 31 |
# Try multiple approaches to check if server is ready
|
| 32 |
is_ready = False
|
| 33 |
|
|
@@ -61,12 +61,12 @@ class Analyzer:
|
|
| 61 |
self.cache.set(self.cache_key, is_ready, ttl=60) # Cache for 1 minute
|
| 62 |
log_server_status(self.cache_key, is_ready)
|
| 63 |
return is_ready
|
| 64 |
-
|
| 65 |
def wait_for_server(self, timeout=180, interval=15):
|
| 66 |
if self.is_server_ready():
|
| 67 |
logging.info("✅ Server is already ready (from cache or direct check).")
|
| 68 |
return True
|
| 69 |
-
|
| 70 |
logging.info("⏳ Server not ready. Starting polling...")
|
| 71 |
start_time = time.time()
|
| 72 |
|
|
@@ -79,12 +79,11 @@ class Analyzer:
|
|
| 79 |
|
| 80 |
logging.warning("⏰ Server initialization timeout reached")
|
| 81 |
return False
|
| 82 |
-
|
| 83 |
def analyze_stream(self, query, search_results):
|
| 84 |
-
"""
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
"""
|
| 88 |
# Prepare context from search results
|
| 89 |
context = "\n\n".join([
|
| 90 |
f"Source: {result.get('url', 'N/A')}\nTitle: {result.get('title', 'N/A')}\nContent: {result.get('content', 'N/A')}"
|
|
@@ -92,22 +91,20 @@ class Analyzer:
|
|
| 92 |
])
|
| 93 |
|
| 94 |
prompt = f"""
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
Query: {query}
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
| 111 |
|
| 112 |
try:
|
| 113 |
# First check if server is ready
|
|
@@ -136,12 +133,13 @@ class Analyzer:
|
|
| 136 |
for chunk in response:
|
| 137 |
if chunk.choices[0].delta.content:
|
| 138 |
yield chunk.choices[0].delta.content
|
| 139 |
-
|
| 140 |
logging.info("Analysis streaming completed successfully")
|
| 141 |
|
| 142 |
except Exception as e:
|
| 143 |
error_msg = str(e)
|
| 144 |
logging.error(f"Analysis streaming failed: {error_msg}")
|
|
|
|
| 145 |
if "503" in error_msg or "Service Unavailable" in error_msg:
|
| 146 |
yield "⚠️ The AI model server is currently unavailable. It may be initializing. Please try again in a few minutes."
|
| 147 |
elif "timeout" in error_msg.lower() or "read timeout" in error_msg.lower():
|
|
@@ -150,11 +148,10 @@ class Analyzer:
|
|
| 150 |
yield "⚠️ The AI model endpoint was not found. Please check the configuration."
|
| 151 |
else:
|
| 152 |
yield f"Analysis failed: {str(e)}"
|
| 153 |
-
|
| 154 |
def analyze(self, query, search_results):
|
| 155 |
-
"""
|
| 156 |
-
|
| 157 |
-
"""
|
| 158 |
# Prepare context from search results
|
| 159 |
context = "\n\n".join([
|
| 160 |
f"Source: {result.get('url', 'N/A')}\nTitle: {result.get('title', 'N/A')}\nContent: {result.get('content', 'N/A')}"
|
|
@@ -162,22 +159,20 @@ class Analyzer:
|
|
| 162 |
])
|
| 163 |
|
| 164 |
prompt = f"""
|
| 165 |
-
|
| 166 |
|
| 167 |
-
|
|
|
|
| 168 |
|
| 169 |
-
|
| 170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
2. Key findings or developments
|
| 175 |
-
3. Different perspectives or approaches
|
| 176 |
-
4. Potential implications or future directions
|
| 177 |
-
5. Any controversies or conflicting viewpoints
|
| 178 |
-
|
| 179 |
-
Structure your response clearly with these sections. If there is insufficient information, state that clearly.
|
| 180 |
-
"""
|
| 181 |
|
| 182 |
try:
|
| 183 |
# First check if server is ready
|
|
@@ -188,6 +183,7 @@ class Analyzer:
|
|
| 188 |
return error_msg
|
| 189 |
|
| 190 |
logging.info("Server is ready. Sending request to AI model...")
|
|
|
|
| 191 |
response = self.client.chat.completions.create(
|
| 192 |
model="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
|
| 193 |
messages=[
|
|
@@ -206,10 +202,12 @@ class Analyzer:
|
|
| 206 |
except Exception as e:
|
| 207 |
error_msg = str(e)
|
| 208 |
logging.error(f"Analysis failed: {error_msg}")
|
|
|
|
| 209 |
if "503" in error_msg or "Service Unavailable" in error_msg:
|
| 210 |
return "⚠️ The AI model server is currently unavailable. It may be initializing. Please try again in a few minutes."
|
| 211 |
elif "timeout" in error_msg.lower() or "read timeout" in error_msg.lower():
|
| 212 |
return "⚠️ The AI model request timed out. The server may be overloaded or initializing. Please try again in a few minutes."
|
| 213 |
elif "404" in error_msg:
|
| 214 |
return "⚠️ The AI model endpoint was not found. Please check the configuration."
|
|
|
|
| 215 |
return f"Analysis failed: {str(e)}"
|
|
|
|
| 20 |
|
| 21 |
# Connect to Redis cache
|
| 22 |
self.cache = RedisServerStatusCache()
|
| 23 |
+
|
| 24 |
def is_server_ready(self):
|
| 25 |
# Check cache first
|
| 26 |
cached_status = self.cache.get(self.cache_key)
|
| 27 |
if cached_status is not None:
|
| 28 |
logging.info(f"Using cached server status: {cached_status}")
|
| 29 |
return cached_status
|
| 30 |
+
|
| 31 |
# Try multiple approaches to check if server is ready
|
| 32 |
is_ready = False
|
| 33 |
|
|
|
|
| 61 |
self.cache.set(self.cache_key, is_ready, ttl=60) # Cache for 1 minute
|
| 62 |
log_server_status(self.cache_key, is_ready)
|
| 63 |
return is_ready
|
| 64 |
+
|
| 65 |
def wait_for_server(self, timeout=180, interval=15):
|
| 66 |
if self.is_server_ready():
|
| 67 |
logging.info("✅ Server is already ready (from cache or direct check).")
|
| 68 |
return True
|
| 69 |
+
|
| 70 |
logging.info("⏳ Server not ready. Starting polling...")
|
| 71 |
start_time = time.time()
|
| 72 |
|
|
|
|
| 79 |
|
| 80 |
logging.warning("⏰ Server initialization timeout reached")
|
| 81 |
return False
|
| 82 |
+
|
| 83 |
def analyze_stream(self, query, search_results):
|
| 84 |
+
""" Analyze search results using the custom LLM with streaming output
|
| 85 |
+
Yields chunks of the response as they arrive """
|
| 86 |
+
|
|
|
|
| 87 |
# Prepare context from search results
|
| 88 |
context = "\n\n".join([
|
| 89 |
f"Source: {result.get('url', 'N/A')}\nTitle: {result.get('title', 'N/A')}\nContent: {result.get('content', 'N/A')}"
|
|
|
|
| 91 |
])
|
| 92 |
|
| 93 |
prompt = f"""
|
| 94 |
+
You are an expert research analyst. Analyze the following query and information to provide a comprehensive summary.
|
|
|
|
|
|
|
| 95 |
|
| 96 |
+
Query: {query}
|
| 97 |
+
Information: {context}
|
| 98 |
|
| 99 |
+
Please provide:
|
| 100 |
+
1. A brief overview of the topic
|
| 101 |
+
2. Key findings or developments
|
| 102 |
+
3. Different perspectives or approaches
|
| 103 |
+
4. Potential implications or future directions
|
| 104 |
+
5. Any controversies or conflicting viewpoints
|
| 105 |
|
| 106 |
+
Structure your response clearly with these sections. If there is insufficient information, state that clearly.
|
| 107 |
+
"""
|
| 108 |
|
| 109 |
try:
|
| 110 |
# First check if server is ready
|
|
|
|
| 133 |
for chunk in response:
|
| 134 |
if chunk.choices[0].delta.content:
|
| 135 |
yield chunk.choices[0].delta.content
|
| 136 |
+
|
| 137 |
logging.info("Analysis streaming completed successfully")
|
| 138 |
|
| 139 |
except Exception as e:
|
| 140 |
error_msg = str(e)
|
| 141 |
logging.error(f"Analysis streaming failed: {error_msg}")
|
| 142 |
+
|
| 143 |
if "503" in error_msg or "Service Unavailable" in error_msg:
|
| 144 |
yield "⚠️ The AI model server is currently unavailable. It may be initializing. Please try again in a few minutes."
|
| 145 |
elif "timeout" in error_msg.lower() or "read timeout" in error_msg.lower():
|
|
|
|
| 148 |
yield "⚠️ The AI model endpoint was not found. Please check the configuration."
|
| 149 |
else:
|
| 150 |
yield f"Analysis failed: {str(e)}"
|
| 151 |
+
|
| 152 |
def analyze(self, query, search_results):
|
| 153 |
+
""" Non-streaming version for compatibility """
|
| 154 |
+
|
|
|
|
| 155 |
# Prepare context from search results
|
| 156 |
context = "\n\n".join([
|
| 157 |
f"Source: {result.get('url', 'N/A')}\nTitle: {result.get('title', 'N/A')}\nContent: {result.get('content', 'N/A')}"
|
|
|
|
| 159 |
])
|
| 160 |
|
| 161 |
prompt = f"""
|
| 162 |
+
You are an expert research analyst. Analyze the following query and information to provide a comprehensive summary.
|
| 163 |
|
| 164 |
+
Query: {query}
|
| 165 |
+
Information: {context}
|
| 166 |
|
| 167 |
+
Please provide:
|
| 168 |
+
1. A brief overview of the topic
|
| 169 |
+
2. Key findings or developments
|
| 170 |
+
3. Different perspectives or approaches
|
| 171 |
+
4. Potential implications or future directions
|
| 172 |
+
5. Any controversies or conflicting viewpoints
|
| 173 |
|
| 174 |
+
Structure your response clearly with these sections. If there is insufficient information, state that clearly.
|
| 175 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
try:
|
| 178 |
# First check if server is ready
|
|
|
|
| 183 |
return error_msg
|
| 184 |
|
| 185 |
logging.info("Server is ready. Sending request to AI model...")
|
| 186 |
+
|
| 187 |
response = self.client.chat.completions.create(
|
| 188 |
model="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
|
| 189 |
messages=[
|
|
|
|
| 202 |
except Exception as e:
|
| 203 |
error_msg = str(e)
|
| 204 |
logging.error(f"Analysis failed: {error_msg}")
|
| 205 |
+
|
| 206 |
if "503" in error_msg or "Service Unavailable" in error_msg:
|
| 207 |
return "⚠️ The AI model server is currently unavailable. It may be initializing. Please try again in a few minutes."
|
| 208 |
elif "timeout" in error_msg.lower() or "read timeout" in error_msg.lower():
|
| 209 |
return "⚠️ The AI model request timed out. The server may be overloaded or initializing. Please try again in a few minutes."
|
| 210 |
elif "404" in error_msg:
|
| 211 |
return "⚠️ The AI model endpoint was not found. Please check the configuration."
|
| 212 |
+
|
| 213 |
return f"Analysis failed: {str(e)}"
|
modules/citation.py
CHANGED
|
@@ -1,11 +1,9 @@
|
|
| 1 |
class CitationManager:
|
| 2 |
def add_citations(self, analysis, search_results):
|
| 3 |
-
"""
|
| 4 |
-
Add citations to the analysis based on source URLs
|
| 5 |
-
"""
|
| 6 |
if not search_results:
|
| 7 |
return analysis
|
| 8 |
-
|
| 9 |
# Create a simple citation mapping
|
| 10 |
citations = {}
|
| 11 |
for i, result in enumerate(search_results):
|
|
@@ -18,15 +16,13 @@ class CitationManager:
|
|
| 18 |
|
| 19 |
# Add citation references to analysis
|
| 20 |
cited_analysis = analysis
|
|
|
|
| 21 |
# In a more sophisticated implementation, we would match claims to sources
|
| 22 |
# For now, we'll just append the citation list
|
| 23 |
-
|
| 24 |
return cited_analysis, citations
|
| 25 |
|
| 26 |
def format_bibliography(self, citations):
|
| 27 |
-
"""
|
| 28 |
-
Format citations into a bibliography
|
| 29 |
-
"""
|
| 30 |
bib_items = []
|
| 31 |
for cite_id, info in citations.items():
|
| 32 |
bib_item = f"{cite_id} {info['title']}. {info['source']}. Retrieved from: {info['url']}"
|
|
|
|
| 1 |
class CitationManager:
|
| 2 |
def add_citations(self, analysis, search_results):
|
| 3 |
+
""" Add citations to the analysis based on source URLs """
|
|
|
|
|
|
|
| 4 |
if not search_results:
|
| 5 |
return analysis
|
| 6 |
+
|
| 7 |
# Create a simple citation mapping
|
| 8 |
citations = {}
|
| 9 |
for i, result in enumerate(search_results):
|
|
|
|
| 16 |
|
| 17 |
# Add citation references to analysis
|
| 18 |
cited_analysis = analysis
|
| 19 |
+
|
| 20 |
# In a more sophisticated implementation, we would match claims to sources
|
| 21 |
# For now, we'll just append the citation list
|
|
|
|
| 22 |
return cited_analysis, citations
|
| 23 |
|
| 24 |
def format_bibliography(self, citations):
|
| 25 |
+
""" Format citations into a bibliography """
|
|
|
|
|
|
|
| 26 |
bib_items = []
|
| 27 |
for cite_id, info in citations.items():
|
| 28 |
bib_item = f"{cite_id} {info['title']}. {info['source']}. Retrieved from: {info['url']}"
|
modules/formatter.py
CHANGED
|
@@ -1,8 +1,6 @@
|
|
| 1 |
class OutputFormatter:
|
| 2 |
def format_response(self, analysis_result, search_results):
|
| 3 |
-
"""
|
| 4 |
-
Format the final response with proper structure
|
| 5 |
-
"""
|
| 6 |
if isinstance(analysis_result, tuple):
|
| 7 |
analysis, citations = analysis_result
|
| 8 |
else:
|
|
@@ -16,12 +14,12 @@ class OutputFormatter:
|
|
| 16 |
if search_results:
|
| 17 |
formatted_output += "## Sources\n"
|
| 18 |
for i, result in enumerate(search_results):
|
| 19 |
-
formatted_output += f"{i+1}. [{result.get('title', 'Untitled')}]({result.get('url', '')})\n"
|
| 20 |
|
| 21 |
# Add citation details if available
|
| 22 |
if citations:
|
| 23 |
formatted_output += "\n## Detailed Citations\n"
|
| 24 |
for cite_id, info in citations.items():
|
| 25 |
-
formatted_output += f"- {cite_id}
|
| 26 |
|
| 27 |
return formatted_output
|
|
|
|
| 1 |
class OutputFormatter:
|
| 2 |
def format_response(self, analysis_result, search_results):
|
| 3 |
+
""" Format the final response with proper structure """
|
|
|
|
|
|
|
| 4 |
if isinstance(analysis_result, tuple):
|
| 5 |
analysis, citations = analysis_result
|
| 6 |
else:
|
|
|
|
| 14 |
if search_results:
|
| 15 |
formatted_output += "## Sources\n"
|
| 16 |
for i, result in enumerate(search_results):
|
| 17 |
+
formatted_output += f"{i+1}. [{result.get('title', 'Untitled')}]({result.get('url', '#')})\n"
|
| 18 |
|
| 19 |
# Add citation details if available
|
| 20 |
if citations:
|
| 21 |
formatted_output += "\n## Detailed Citations\n"
|
| 22 |
for cite_id, info in citations.items():
|
| 23 |
+
formatted_output += f"- {cite_id} {info['title']} - {info['source']}: {info['url']}\n"
|
| 24 |
|
| 25 |
return formatted_output
|
modules/input_handler.py
CHANGED
|
@@ -1,23 +1,22 @@
|
|
| 1 |
class InputHandler:
|
| 2 |
def process_query(self, query):
|
| 3 |
-
"""
|
| 4 |
-
Process and validate user input
|
| 5 |
-
"""
|
| 6 |
# Clean and normalize query
|
| 7 |
cleaned_query = query.strip()
|
| 8 |
|
| 9 |
# Add context if needed
|
| 10 |
if len(cleaned_query) < 5:
|
| 11 |
raise ValueError("Query too short. Please provide more details.")
|
| 12 |
-
|
| 13 |
return cleaned_query
|
| 14 |
|
| 15 |
def extract_keywords(self, query):
|
| 16 |
-
"""
|
| 17 |
-
Extract important keywords from query
|
| 18 |
-
"""
|
| 19 |
# Simple keyword extraction (could be enhanced with NLP)
|
| 20 |
-
stop_words = {
|
|
|
|
|
|
|
|
|
|
| 21 |
words = query.lower().split()
|
| 22 |
keywords = [word for word in words if word not in stop_words]
|
| 23 |
return keywords
|
|
|
|
| 1 |
class InputHandler:
|
| 2 |
def process_query(self, query):
|
| 3 |
+
""" Process and validate user input """
|
|
|
|
|
|
|
| 4 |
# Clean and normalize query
|
| 5 |
cleaned_query = query.strip()
|
| 6 |
|
| 7 |
# Add context if needed
|
| 8 |
if len(cleaned_query) < 5:
|
| 9 |
raise ValueError("Query too short. Please provide more details.")
|
| 10 |
+
|
| 11 |
return cleaned_query
|
| 12 |
|
| 13 |
def extract_keywords(self, query):
|
| 14 |
+
""" Extract important keywords from query """
|
|
|
|
|
|
|
| 15 |
# Simple keyword extraction (could be enhanced with NLP)
|
| 16 |
+
stop_words = {
|
| 17 |
+
'the', 'is', 'at', 'which', 'on', 'in', 'for', 'of', 'with', 'by',
|
| 18 |
+
'dan', 'di', 'ke', 'dari', 'pada', 'untuk', 'oleh', 'sebagai'
|
| 19 |
+
}
|
| 20 |
words = query.lower().split()
|
| 21 |
keywords = [word for word in words if word not in stop_words]
|
| 22 |
return keywords
|
modules/retriever.py
CHANGED
|
@@ -4,11 +4,9 @@ import logging
|
|
| 4 |
class Retriever:
|
| 5 |
def __init__(self, api_key):
|
| 6 |
self.client = TavilyClient(api_key=api_key)
|
| 7 |
-
|
| 8 |
def search(self, query, max_results=5):
|
| 9 |
-
"""
|
| 10 |
-
Search for relevant content using Tavily API
|
| 11 |
-
"""
|
| 12 |
try:
|
| 13 |
response = self.client.search(
|
| 14 |
query=query,
|
|
@@ -23,13 +21,10 @@ class Retriever:
|
|
| 23 |
return []
|
| 24 |
|
| 25 |
def get_related_queries(self, query):
|
| 26 |
-
"""
|
| 27 |
-
Generate related search queries
|
| 28 |
-
"""
|
| 29 |
# This could be enhanced with LLM-based query expansion
|
| 30 |
return [
|
| 31 |
f"{query} research paper",
|
| 32 |
f"{query} latest developments",
|
| 33 |
f"{query} pros and cons"
|
| 34 |
-
]
|
| 35 |
-
|
|
|
|
| 4 |
class Retriever:
|
| 5 |
def __init__(self, api_key):
|
| 6 |
self.client = TavilyClient(api_key=api_key)
|
| 7 |
+
|
| 8 |
def search(self, query, max_results=5):
|
| 9 |
+
""" Search for relevant content using Tavily API """
|
|
|
|
|
|
|
| 10 |
try:
|
| 11 |
response = self.client.search(
|
| 12 |
query=query,
|
|
|
|
| 21 |
return []
|
| 22 |
|
| 23 |
def get_related_queries(self, query):
|
| 24 |
+
""" Generate related search queries """
|
|
|
|
|
|
|
| 25 |
# This could be enhanced with LLM-based query expansion
|
| 26 |
return [
|
| 27 |
f"{query} research paper",
|
| 28 |
f"{query} latest developments",
|
| 29 |
f"{query} pros and cons"
|
| 30 |
+
]
|
|
|
modules/server_cache.py
CHANGED
|
@@ -18,8 +18,9 @@ class RedisServerStatusCache:
|
|
| 18 |
self.fallback_cache = {}
|
| 19 |
self.use_redis = False
|
| 20 |
logging.warning(f"Redis not available, using in-memory cache: {e}")
|
|
|
|
| 21 |
self.default_ttl = default_ttl
|
| 22 |
-
|
| 23 |
def get(self, server_key):
|
| 24 |
try:
|
| 25 |
if self.use_redis and self.redis:
|
|
@@ -40,7 +41,7 @@ class RedisServerStatusCache:
|
|
| 40 |
except Exception as e:
|
| 41 |
logging.error(f"Cache get error: {e}")
|
| 42 |
return None
|
| 43 |
-
|
| 44 |
def set(self, server_key, status, ttl=None):
|
| 45 |
try:
|
| 46 |
ttl = ttl or self.default_ttl
|
|
|
|
| 18 |
self.fallback_cache = {}
|
| 19 |
self.use_redis = False
|
| 20 |
logging.warning(f"Redis not available, using in-memory cache: {e}")
|
| 21 |
+
|
| 22 |
self.default_ttl = default_ttl
|
| 23 |
+
|
| 24 |
def get(self, server_key):
|
| 25 |
try:
|
| 26 |
if self.use_redis and self.redis:
|
|
|
|
| 41 |
except Exception as e:
|
| 42 |
logging.error(f"Cache get error: {e}")
|
| 43 |
return None
|
| 44 |
+
|
| 45 |
def set(self, server_key, status, ttl=None):
|
| 46 |
try:
|
| 47 |
ttl = ttl or self.default_ttl
|
modules/status_logger.py
CHANGED
|
@@ -39,6 +39,6 @@ def log_analysis_result(query, success, message=""):
|
|
| 39 |
with open(ANALYSIS_LOG_FILE, 'a') as f:
|
| 40 |
if not file_exists:
|
| 41 |
f.write("timestamp,query,result,message\n")
|
| 42 |
-
f.write(f
|
| 43 |
except Exception as e:
|
| 44 |
logging.error(f"Failed to log analysis result: {e}")
|
|
|
|
| 39 |
with open(ANALYSIS_LOG_FILE, 'a') as f:
|
| 40 |
if not file_exists:
|
| 41 |
f.write("timestamp,query,result,message\n")
|
| 42 |
+
f.write(f'{timestamp},"{query}",{result_str},"{message}"\n')
|
| 43 |
except Exception as e:
|
| 44 |
logging.error(f"Failed to log analysis result: {e}")
|
requirements.txt
CHANGED
|
@@ -6,4 +6,10 @@ requests>=2.31.0
|
|
| 6 |
pytest==8.3.3
|
| 7 |
redis>=4.0.0
|
| 8 |
pandas>=1.5.0
|
| 9 |
-
matplotlib>=3.5.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
pytest==8.3.3
|
| 7 |
redis>=4.0.0
|
| 8 |
pandas>=1.5.0
|
| 9 |
+
matplotlib>=3.5.0
|
| 10 |
+
duckduckgo-search>=6.1.0
|
| 11 |
+
googletrans==4.0.0-rc1
|
| 12 |
+
PyPDF2>=3.0.0
|
| 13 |
+
Pillow>=10.0.0
|
| 14 |
+
pytesseract>=0.3.10
|
| 15 |
+
datasets>=2.14.0
|
version.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
{
|
| 3 |
+
"version": "133",
|
| 4 |
+
"description": "Enhanced assistant with file upload, DDG search, multi-language support",
|
| 5 |
+
"date": "2025-04-05"
|
| 6 |
+
}
|