myspace134v

Runtime error

App Files Files Community

rdune71 commited on Sep 3

Commit

2a12b48

1 Parent(s): 03df531

133

Browse files

Files changed (10) hide show

app.py +331 -57
modules/analyzer.py +38 -40
modules/citation.py +4 -8
modules/formatter.py +3 -5
modules/input_handler.py +7 -8
modules/retriever.py +4 -9
modules/server_cache.py +3 -2
modules/status_logger.py +1 -1
requirements.txt +7 -1
version.json +6 -0

app.py CHANGED Viewed

@@ -1,12 +1,22 @@
 # app.py
 import gradio as gr
 import logging
 from modules.input_handler import InputHandler
 from modules.retriever import Retriever
 from modules.analyzer import Analyzer
 from modules.citation import CitationManager
 from modules.formatter import OutputFormatter
-import os
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -18,13 +28,109 @@ class ResearchOrchestrator:
         self.analyzer = analyzer
         self.citation_manager = citation_manager
         self.formatter = formatter
-    def run(self, query, progress=gr.Progress()):
         """Execute the research pipeline with streaming updates"""
         try:
             progress(0.0, desc="Starting research...")
             logging.info(f"Starting research for query: {query}")
             # Step 1: Process input
             progress(0.1, desc="🔍 Processing your query...")
             processed_query = self.input_handler.process_query(query)
@@ -32,7 +138,7 @@ class ResearchOrchestrator:
             # Step 2: Retrieve data
             progress(0.3, desc="🌐 Searching for relevant information...")
-            search_results = self.retriever.search(processed_query)
             if not search_results:
                 result = "⚠️ No relevant information found for your query. Please try rephrasing."
@@ -71,8 +177,21 @@ class ResearchOrchestrator:
             formatted_output = self.formatter.format_response(cited_analysis, search_results)
             logging.info("Response formatted successfully")
             # Add completion notification
             progress(1.0, desc="✅ Research complete!")
             if len(search_results) >= 3:
                 completion_message = "\n\n---\n[ANALYSIS COMPLETE] ✅ Research finished with sufficient sources."
             else:
@@ -91,30 +210,33 @@ CONFIG = {
     "hf_api_base": "https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
     "hf_api_key": os.getenv("HF_TOKEN"),
     "tavily_api_key": os.getenv("TAVILY_API_KEY"),
 }
 # Initialize modules with error handling
 def initialize_modules():
     """Initialize all modules with proper error handling"""
     try:
         if not CONFIG["tavily_api_key"]:
             raise ValueError("TAVILY_API_KEY environment variable is not set")
         if not CONFIG["hf_api_key"]:
             raise ValueError("HF_TOKEN environment variable is not set")
         input_handler = InputHandler()
         retriever = Retriever(api_key=CONFIG["tavily_api_key"])
         analyzer = Analyzer(base_url=CONFIG["hf_api_base"], api_key=CONFIG["hf_api_key"])
         citation_manager = CitationManager()
         formatter = OutputFormatter()
         return ResearchOrchestrator(
-            input_handler,
-            retriever,
-            analyzer,
-            citation_manager,
-            formatter
         )
     except Exception as e:
         logging.error(f"Failed to initialize modules: {str(e)}")
@@ -123,78 +245,230 @@ def initialize_modules():
 # Initialize orchestrator
 orchestrator = initialize_modules()
-# Custom CSS for spinner and streaming
 custom_css = """
 .spinner {
-  border: 4px solid #f3f3f3;
-  border-top: 4px solid #3498db;
-  border-radius: 50%;
-  width: 24px;
-  height: 24px;
-  animation: spin 1s linear infinite;
-  display: inline-block;
-  margin-right: 8px;
 }
 .streaming-content {
-  white-space: pre-wrap;
-  font-family: monospace;
-  background-color: #f8f9fa;
-  padding: 10px;
-  border-radius: 5px;
-  margin: 10px 0;
 }
 @keyframes spin {
-  0% { transform: rotate(0deg); }
-  100% { transform: rotate(360deg); }
 }
 """
-def research_assistant(query, progress=gr.Progress()):
     """Main entry point for the research assistant with streaming"""
     logging.info(f"Research assistant called with query: {query}")
-    for step in orchestrator.run(query, progress):
         yield step
 # Create Gradio interface
 with gr.Blocks(css=custom_css, title="Research Assistant") as demo:
-    gr.Markdown("# 🧠 AI Research Assistant")
-    gr.Markdown("Enter a research topic to get a structured analysis with sources")
-    with gr.Row():
-        with gr.Column():
-            query_input = gr.Textbox(
-                label="Research Query",
-                placeholder="Enter your research question...",
-                lines=3
-            )
-            submit_btn = gr.Button("Research", variant="primary")
-        with gr.Column():
-            output = gr.Markdown(label="Analysis Results", elem_classes=["streaming-content"])
-    # Status indicator with spinner
-    status_indicator = gr.HTML("<div id='status'><span class='spinner'></span> Ready for your research query</div>")
-    examples = gr.Examples(
-        examples=[
-            "Latest advancements in quantum computing",
-            "Impact of climate change on global agriculture",
-            "Recent developments in Alzheimer's treatment research"
-        ],
-        inputs=query_input
-    )
     submit_btn.click(
         fn=research_assistant,
-        inputs=query_input,
         outputs=output
     )
     query_input.submit(
         fn=research_assistant,
-        inputs=query_input,
         outputs=output
     )
 if __name__ == "__main__":
     demo.launch()

 # app.py
 import gradio as gr
 import logging
+import os
+import json
+import requests
+from datetime import datetime
+from PyPDF2 import PdfReader
+from PIL import Image
+import pytesseract
+from duckduckgo_search import DDGS
+from googletrans import Translator
+from datasets import load_dataset
 from modules.input_handler import InputHandler
 from modules.retriever import Retriever
 from modules.analyzer import Analyzer
 from modules.citation import CitationManager
 from modules.formatter import OutputFormatter
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
         self.analyzer = analyzer
         self.citation_manager = citation_manager
         self.formatter = formatter
+        self.translator = Translator()
+        self.chat_history = []
+    def process_file(self, file):
+        """Process uploaded files (PDF, images)"""
+        if not file:
+            return ""
+        file_path = file.name
+        file_ext = file_path.split('.')[-1].lower()
+        if file_ext == 'pdf':
+            try:
+                reader = PdfReader(file_path)
+                text = ""
+                for page in reader.pages:
+                    text += page.extract_text() + "\n"
+                return f"\n\n[PDF Content]: {text[:1000]}..."  # Limit content size
+            except Exception as e:
+                logging.error(f"PDF processing error: {e}")
+                return "\n\n[Error processing PDF file]"
+        elif file_ext in ['png', 'jpg', 'jpeg']:
+            try:
+                image = Image.open(file_path)
+                text = pytesseract.image_to_string(image)
+                return f"\n\n[Image Text]: {text[:500]}..."  # Limit content size
+            except Exception as e:
+                logging.error(f"Image processing error: {e}")
+                return "\n\n[Error processing image file]"
+        return ""
+    def search_with_fallback(self, query, use_ddg=False):
+        """Search with fallback to DDG if Tavily fails"""
+        if use_ddg:
+            try:
+                with DDGS() as ddgs:
+                    results = []
+                    for r in ddgs.text(query, max_results=5):
+                        results.append({
+                            'title': r.get('title', 'No title'),
+                            'url': r.get('href', 'No URL'),
+                            'content': r.get('body', 'No content')
+                        })
+                    return results
+            except Exception as e:
+                logging.error(f"DDG search failed: {e}")
+                return []
+        else:
+            try:
+                return self.retriever.search(query)
+            except Exception as e:
+                logging.error(f"Tavily search failed: {e}")
+                # Fallback to DDG
+                return self.search_with_fallback(query, use_ddg=True)
+    def get_time_weather(self):
+        """Get current time and weather for Boston and Bogor"""
+        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        # For demo purposes, using mock weather data
+        # In production, you would use a real weather API
+        boston_weather = {
+            "temp": 22,
+            "desc": "Partly cloudy"
+        }
+        bogor_weather = {
+            "temp": 28,
+            "desc": "Thunderstorms"
+        }
+        return {
+            "time": current_time,
+            "boston": boston_weather,
+            "bogor": bogor_weather
+        }
+    def run(self, query, file=None, use_ddg=False, progress=gr.Progress()):
         """Execute the research pipeline with streaming updates"""
         try:
             progress(0.0, desc="Starting research...")
             logging.info(f"Starting research for query: {query}")
+            # Add time/weather info
+            time_weather = self.get_time_weather()
+            # Process file if uploaded
+            file_content = self.process_file(file)
+            if file_content:
+                query += file_content
+            # Detect and translate if needed
+            try:
+                detected_lang = self.translator.detect(query).lang
+                if detected_lang not in ['en', 'English']:
+                    translated_query = self.translator.translate(query, src=detected_lang, dest='en').text
+                    query = translated_query
+                    logging.info(f"Translated query from {detected_lang} to English")
+            except Exception as e:
+                logging.warning(f"Translation failed: {e}")
             # Step 1: Process input
             progress(0.1, desc="🔍 Processing your query...")
             processed_query = self.input_handler.process_query(query)
             # Step 2: Retrieve data
             progress(0.3, desc="🌐 Searching for relevant information...")
+            search_results = self.search_with_fallback(processed_query, use_ddg)
             if not search_results:
                 result = "⚠️ No relevant information found for your query. Please try rephrasing."
             formatted_output = self.formatter.format_response(cited_analysis, search_results)
             logging.info("Response formatted successfully")
+            # Add time/weather info
+            formatted_output += f"\n\n### 📅 Current Time: {time_weather['time']}"
+            formatted_output += f"\n### 🌤 Weather in Boston: {time_weather['boston']['temp']}°C, {time_weather['boston']['desc']}"
+            formatted_output += f"\n### 🌧 Weather in Bogor: {time_weather['bogor']['temp']}°C, {time_weather['bogor']['desc']}"
             # Add completion notification
             progress(1.0, desc="✅ Research complete!")
+            # Translate back if needed
+            if 'detected_lang' in locals() and detected_lang not in ['en', 'English']:
+                try:
+                    formatted_output = self.translator.translate(formatted_output, src='en', dest=detected_lang).text
+                except Exception as e:
+                    logging.warning(f"Back-translation failed: {e}")
             if len(search_results) >= 3:
                 completion_message = "\n\n---\n[ANALYSIS COMPLETE] ✅ Research finished with sufficient sources."
             else:
     "hf_api_base": "https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
     "hf_api_key": os.getenv("HF_TOKEN"),
     "tavily_api_key": os.getenv("TAVILY_API_KEY"),
+    "openweather_api_key": os.getenv("OPENWEATHER_API_KEY", "demo_key")
 }
+# Load version info
+try:
+    with open("version.json", "r") as f:
+        VERSION = json.load(f)
+except FileNotFoundError:
+    VERSION = {"version": "133", "description": "Initial release"}
 # Initialize modules with error handling
 def initialize_modules():
     """Initialize all modules with proper error handling"""
     try:
         if not CONFIG["tavily_api_key"]:
             raise ValueError("TAVILY_API_KEY environment variable is not set")
         if not CONFIG["hf_api_key"]:
             raise ValueError("HF_TOKEN environment variable is not set")
         input_handler = InputHandler()
         retriever = Retriever(api_key=CONFIG["tavily_api_key"])
         analyzer = Analyzer(base_url=CONFIG["hf_api_base"], api_key=CONFIG["hf_api_key"])
         citation_manager = CitationManager()
         formatter = OutputFormatter()
         return ResearchOrchestrator(
+            input_handler, retriever, analyzer, citation_manager, formatter
         )
     except Exception as e:
         logging.error(f"Failed to initialize modules: {str(e)}")
 # Initialize orchestrator
 orchestrator = initialize_modules()
+# Custom CSS for enhanced UI
 custom_css = """
+body {
+    background: linear-gradient(135deg, #e0f7fa, #f5f5f5);
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    color: #003366;
+}
+.container {
+    max-width: 1200px;
+    margin: 0 auto;
+    padding: 20px;
+}
+.header {
+    text-align: center;
+    margin-bottom: 30px;
+    padding: 20px;
+    background: linear-gradient(90deg, #007BFF, #0056b3);
+    color: white;
+    border-radius: 10px;
+    box-shadow: 0 4px 8px rgba(0,0,0,0.1);
+}
+.title {
+    font-size: 2.5em;
+    margin-bottom: 10px;
+}
+.subtitle {
+    font-size: 1.2em;
+    opacity: 0.9;
+}
+.card {
+    background: white;
+    border-radius: 10px;
+    box-shadow: 0 4px 12px rgba(0,0,0,0.08);
+    padding: 25px;
+    margin-bottom: 25px;
+    transition: transform 0.3s ease, box-shadow 0.3s ease;
+}
+.card:hover {
+    transform: translateY(-5px);
+    box-shadow: 0 6px 16px rgba(0,0,0,0.12);
+}
+.input-section {
+    background: #e3f2fd;
+}
+.output-section {
+    background: #f8f9fa;
+}
+.btn-primary {
+    background: linear-gradient(90deg, #007BFF, #0056b3);
+    color: white;
+    border: none;
+    padding: 12px 25px;
+    font-size: 16px;
+    border-radius: 30px;
+    cursor: pointer;
+    transition: all 0.3s ease;
+    font-weight: 600;
+    box-shadow: 0 4px 8px rgba(0,123,255,0.3);
+}
+.btn-primary:hover {
+    transform: translateY(-3px);
+    box-shadow: 0 6px 12px rgba(0,123,255,0.4);
+    background: linear-gradient(90deg, #0056b3, #003d7a);
+}
+.btn-primary:active {
+    transform: translateY(1px);
+    box-shadow: 0 2px 4px rgba(0,123,255,0.3);
+}
 .spinner {
+    border: 4px solid #f3f3f3;
+    border-top: 4px solid #007BFF;
+    border-radius: 50%;
+    width: 24px;
+    height: 24px;
+    animation: spin 1s linear infinite;
+    display: inline-block;
+    margin-right: 8px;
 }
 .streaming-content {
+    white-space: pre-wrap;
+    font-family: 'Courier New', monospace;
+    background-color: #f0f8ff;
+    padding: 15px;
+    border-radius: 8px;
+    margin: 15px 0;
+    border-left: 4px solid #007BFF;
+    box-shadow: inset 0 0 5px rgba(0,0,0,0.05);
+}
+.chat-history {
+    max-height: 400px;
+    overflow-y: auto;
+    padding: 15px;
+    background: #ffffff;
+    border-radius: 8px;
+    border: 1px solid #e0e0e0;
+    margin-bottom: 20px;
+}
+.version-info {
+    text-align: center;
+    padding: 15px;
+    background: #fff8e1;
+    border-radius: 8px;
+    margin-top: 20px;
+    border: 1px solid #ffecb3;
+    color: #5d4037;
+}
+.examples {
+    background: #e8f5e9;
+    border-left: 4px solid #4caf50;
 }
 @keyframes spin {
+    0% { transform: rotate(0deg); }
+    100% { transform: rotate(360deg); }
+}
+.status-indicator {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    padding: 10px;
+    background: #e3f2fd;
+    border-radius: 8px;
+    margin: 10px 0;
+}
+.progress-text {
+    margin-left: 10px;
+    font-weight: 500;
+}
+.footer {
+    text-align: center;
+    margin-top: 30px;
+    padding: 20px;
+    color: #666;
+    font-size: 0.9em;
+}
+.highlight {
+    background: linear-gradient(120deg, #ffebee, #fff3e0);
+    padding: 3px 6px;
+    border-radius: 4px;
+    font-weight: 500;
 }
 """
+def research_assistant(query, file, use_ddg, progress=gr.Progress()):
     """Main entry point for the research assistant with streaming"""
     logging.info(f"Research assistant called with query: {query}")
+    for step in orchestrator.run(query, file, use_ddg, progress):
         yield step
 # Create Gradio interface
 with gr.Blocks(css=custom_css, title="Research Assistant") as demo:
+    gr.Markdown(f"""
+    <div class="header">
+        <div class="title">🧠 AI Research Assistant</div>
+        <div class="subtitle">Your intelligent research companion with multi-language support</div>
+    </div>
+    """)
+    gr.Markdown(f"""
+    <div class="version-info">
+        🧬 Version: {VERSION['version']} | {VERSION['description']}
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            with gr.Card(elem_classes=["card", "input-section"]):
+                gr.Markdown("### 🔍 Research Input")
+                query_input = gr.Textbox(
+                    label="Research Query",
+                    placeholder="Enter your research question...",
+                    lines=3
+                )
+                file_upload = gr.File(
+                    label="📄 Upload Files (PDF, Images)",
+                    file_types=[".pdf", ".png", ".jpg", ".jpeg"]
+                )
+                ddg_checkbox = gr.Checkbox(
+                    label="Use DuckDuckGo Search (Alternative)",
+                    value=False
+                )
+                submit_btn = gr.Button("Research", elem_classes=["btn-primary"])
+                gr.Markdown("### 💡 Example Queries")
+                examples = gr.Examples(
+                    examples=[
+                        "Latest advancements in quantum computing",
+                        "Impact of climate change on global agriculture",
+                        "Recent developments in Alzheimer's treatment research",
+                        "Perkembangan terbaru dalam kecerdasan buatan",
+                        "Dampak perubahan iklim terhadap pertanian di Indonesia"
+                    ],
+                    inputs=query_input
+                )
+        with gr.Column(scale=2):
+            with gr.Card(elem_classes=["card", "output-section"]):
+                gr.Markdown("### 📊 Analysis Results")
+                output = gr.Markdown(elem_classes=["streaming-content"])
+                status_indicator = gr.HTML("""
+                    <div class="status-indicator">
+                        <div class="spinner"></div>
+                        <div class="progress-text">Ready for your research query</div>
+                    </div>
+                """)
     submit_btn.click(
         fn=research_assistant,
+        inputs=[query_input, file_upload, ddg_checkbox],
         outputs=output
     )
     query_input.submit(
         fn=research_assistant,
+        inputs=[query_input, file_upload, ddg_checkbox],
         outputs=output
     )
+    gr.Markdown("""
+    <div class="footer">
+        <p>Built with ❤️ for researchers worldwide | Supports English and Bahasa Indonesia</p>
+        <p>Features: Multi-language support • File upload • Web search • Real-time analysis</p>
+    </div>
+    """)
 if __name__ == "__main__":
     demo.launch()

modules/analyzer.py CHANGED Viewed

@@ -20,14 +20,14 @@ class Analyzer:
         # Connect to Redis cache
         self.cache = RedisServerStatusCache()
     def is_server_ready(self):
         # Check cache first
         cached_status = self.cache.get(self.cache_key)
         if cached_status is not None:
             logging.info(f"Using cached server status: {cached_status}")
             return cached_status
         # Try multiple approaches to check if server is ready
         is_ready = False
@@ -61,12 +61,12 @@ class Analyzer:
         self.cache.set(self.cache_key, is_ready, ttl=60)  # Cache for 1 minute
         log_server_status(self.cache_key, is_ready)
         return is_ready
     def wait_for_server(self, timeout=180, interval=15):
         if self.is_server_ready():
             logging.info("✅ Server is already ready (from cache or direct check).")
             return True
         logging.info("⏳ Server not ready. Starting polling...")
         start_time = time.time()
@@ -79,12 +79,11 @@ class Analyzer:
         logging.warning("⏰ Server initialization timeout reached")
         return False
     def analyze_stream(self, query, search_results):
-        """
-        Analyze search results using the custom LLM with streaming output
-        Yields chunks of the response as they arrive
-        """
         # Prepare context from search results
         context = "\n\n".join([
             f"Source: {result.get('url', 'N/A')}\nTitle: {result.get('title', 'N/A')}\nContent: {result.get('content', 'N/A')}"
@@ -92,22 +91,20 @@ class Analyzer:
         ])
         prompt = f"""
-        You are an expert research analyst. Analyze the following query and information to provide a comprehensive summary.
-        Query: {query}
-        Information:
-        {context}
-        Please provide:
-        1. A brief overview of the topic
-        2. Key findings or developments
-        3. Different perspectives or approaches
-        4. Potential implications or future directions
-        5. Any controversies or conflicting viewpoints
-        Structure your response clearly with these sections. If there is insufficient information, state that clearly.
-        """
         try:
             # First check if server is ready
@@ -136,12 +133,13 @@ class Analyzer:
             for chunk in response:
                 if chunk.choices[0].delta.content:
                     yield chunk.choices[0].delta.content
             logging.info("Analysis streaming completed successfully")
         except Exception as e:
             error_msg = str(e)
             logging.error(f"Analysis streaming failed: {error_msg}")
             if "503" in error_msg or "Service Unavailable" in error_msg:
                 yield "⚠️ The AI model server is currently unavailable. It may be initializing. Please try again in a few minutes."
             elif "timeout" in error_msg.lower() or "read timeout" in error_msg.lower():
@@ -150,11 +148,10 @@ class Analyzer:
                 yield "⚠️ The AI model endpoint was not found. Please check the configuration."
             else:
                 yield f"Analysis failed: {str(e)}"
     def analyze(self, query, search_results):
-        """
-        Non-streaming version for compatibility
-        """
         # Prepare context from search results
         context = "\n\n".join([
             f"Source: {result.get('url', 'N/A')}\nTitle: {result.get('title', 'N/A')}\nContent: {result.get('content', 'N/A')}"
@@ -162,22 +159,20 @@ class Analyzer:
         ])
         prompt = f"""
-        You are an expert research analyst. Analyze the following query and information to provide a comprehensive summary.
-        Query: {query}
-        Information:
-        {context}
-        Please provide:
-        1. A brief overview of the topic
-        2. Key findings or developments
-        3. Different perspectives or approaches
-        4. Potential implications or future directions
-        5. Any controversies or conflicting viewpoints
-        Structure your response clearly with these sections. If there is insufficient information, state that clearly.
-        """
         try:
             # First check if server is ready
@@ -188,6 +183,7 @@ class Analyzer:
                 return error_msg
             logging.info("Server is ready. Sending request to AI model...")
             response = self.client.chat.completions.create(
                 model="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
                 messages=[
@@ -206,10 +202,12 @@ class Analyzer:
         except Exception as e:
             error_msg = str(e)
             logging.error(f"Analysis failed: {error_msg}")
             if "503" in error_msg or "Service Unavailable" in error_msg:
                 return "⚠️ The AI model server is currently unavailable. It may be initializing. Please try again in a few minutes."
             elif "timeout" in error_msg.lower() or "read timeout" in error_msg.lower():
                 return "⚠️ The AI model request timed out. The server may be overloaded or initializing. Please try again in a few minutes."
             elif "404" in error_msg:
                 return "⚠️ The AI model endpoint was not found. Please check the configuration."
             return f"Analysis failed: {str(e)}"

         # Connect to Redis cache
         self.cache = RedisServerStatusCache()
     def is_server_ready(self):
         # Check cache first
         cached_status = self.cache.get(self.cache_key)
         if cached_status is not None:
             logging.info(f"Using cached server status: {cached_status}")
             return cached_status
         # Try multiple approaches to check if server is ready
         is_ready = False
         self.cache.set(self.cache_key, is_ready, ttl=60)  # Cache for 1 minute
         log_server_status(self.cache_key, is_ready)
         return is_ready
     def wait_for_server(self, timeout=180, interval=15):
         if self.is_server_ready():
             logging.info("✅ Server is already ready (from cache or direct check).")
             return True
         logging.info("⏳ Server not ready. Starting polling...")
         start_time = time.time()
         logging.warning("⏰ Server initialization timeout reached")
         return False
     def analyze_stream(self, query, search_results):
+        """ Analyze search results using the custom LLM with streaming output
+        Yields chunks of the response as they arrive """
         # Prepare context from search results
         context = "\n\n".join([
             f"Source: {result.get('url', 'N/A')}\nTitle: {result.get('title', 'N/A')}\nContent: {result.get('content', 'N/A')}"
         ])
         prompt = f"""
+You are an expert research analyst. Analyze the following query and information to provide a comprehensive summary.
+Query: {query}
+Information: {context}
+Please provide:
+1. A brief overview of the topic
+2. Key findings or developments
+3. Different perspectives or approaches
+4. Potential implications or future directions
+5. Any controversies or conflicting viewpoints
+Structure your response clearly with these sections. If there is insufficient information, state that clearly.
+"""
         try:
             # First check if server is ready
             for chunk in response:
                 if chunk.choices[0].delta.content:
                     yield chunk.choices[0].delta.content
             logging.info("Analysis streaming completed successfully")
         except Exception as e:
             error_msg = str(e)
             logging.error(f"Analysis streaming failed: {error_msg}")
             if "503" in error_msg or "Service Unavailable" in error_msg:
                 yield "⚠️ The AI model server is currently unavailable. It may be initializing. Please try again in a few minutes."
             elif "timeout" in error_msg.lower() or "read timeout" in error_msg.lower():
                 yield "⚠️ The AI model endpoint was not found. Please check the configuration."
             else:
                 yield f"Analysis failed: {str(e)}"
     def analyze(self, query, search_results):
+        """ Non-streaming version for compatibility """
         # Prepare context from search results
         context = "\n\n".join([
             f"Source: {result.get('url', 'N/A')}\nTitle: {result.get('title', 'N/A')}\nContent: {result.get('content', 'N/A')}"
         ])
         prompt = f"""
+You are an expert research analyst. Analyze the following query and information to provide a comprehensive summary.
+Query: {query}
+Information: {context}
+Please provide:
+1. A brief overview of the topic
+2. Key findings or developments
+3. Different perspectives or approaches
+4. Potential implications or future directions
+5. Any controversies or conflicting viewpoints
+Structure your response clearly with these sections. If there is insufficient information, state that clearly.
+"""
         try:
             # First check if server is ready
                 return error_msg
             logging.info("Server is ready. Sending request to AI model...")
             response = self.client.chat.completions.create(
                 model="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
                 messages=[
         except Exception as e:
             error_msg = str(e)
             logging.error(f"Analysis failed: {error_msg}")
             if "503" in error_msg or "Service Unavailable" in error_msg:
                 return "⚠️ The AI model server is currently unavailable. It may be initializing. Please try again in a few minutes."
             elif "timeout" in error_msg.lower() or "read timeout" in error_msg.lower():
                 return "⚠️ The AI model request timed out. The server may be overloaded or initializing. Please try again in a few minutes."
             elif "404" in error_msg:
                 return "⚠️ The AI model endpoint was not found. Please check the configuration."
             return f"Analysis failed: {str(e)}"

modules/citation.py CHANGED Viewed

@@ -1,11 +1,9 @@
 class CitationManager:
     def add_citations(self, analysis, search_results):
-        """
-        Add citations to the analysis based on source URLs
-        """
         if not search_results:
             return analysis
         # Create a simple citation mapping
         citations = {}
         for i, result in enumerate(search_results):
@@ -18,15 +16,13 @@ class CitationManager:
         # Add citation references to analysis
         cited_analysis = analysis
         # In a more sophisticated implementation, we would match claims to sources
         # For now, we'll just append the citation list
         return cited_analysis, citations
     def format_bibliography(self, citations):
-        """
-        Format citations into a bibliography
-        """
         bib_items = []
         for cite_id, info in citations.items():
             bib_item = f"{cite_id} {info['title']}. {info['source']}. Retrieved from: {info['url']}"

 class CitationManager:
     def add_citations(self, analysis, search_results):
+        """ Add citations to the analysis based on source URLs """
         if not search_results:
             return analysis
         # Create a simple citation mapping
         citations = {}
         for i, result in enumerate(search_results):
         # Add citation references to analysis
         cited_analysis = analysis
         # In a more sophisticated implementation, we would match claims to sources
         # For now, we'll just append the citation list
         return cited_analysis, citations
     def format_bibliography(self, citations):
+        """ Format citations into a bibliography """
         bib_items = []
         for cite_id, info in citations.items():
             bib_item = f"{cite_id} {info['title']}. {info['source']}. Retrieved from: {info['url']}"

modules/formatter.py CHANGED Viewed

@@ -1,8 +1,6 @@
 class OutputFormatter:
     def format_response(self, analysis_result, search_results):
-        """
-        Format the final response with proper structure
-        """
         if isinstance(analysis_result, tuple):
             analysis, citations = analysis_result
         else:
@@ -16,12 +14,12 @@ class OutputFormatter:
         if search_results:
             formatted_output += "## Sources\n"
             for i, result in enumerate(search_results):
-                formatted_output += f"{i+1}. [{result.get('title', 'Untitled')}]({result.get('url', '')})\n"
         # Add citation details if available
         if citations:
             formatted_output += "\n## Detailed Citations\n"
             for cite_id, info in citations.items():
-                formatted_output += f"- {cite_id} **{info['title']}** - {info['source']}: {info['url']}\n"
         return formatted_output

 class OutputFormatter:
     def format_response(self, analysis_result, search_results):
+        """ Format the final response with proper structure """
         if isinstance(analysis_result, tuple):
             analysis, citations = analysis_result
         else:
         if search_results:
             formatted_output += "## Sources\n"
             for i, result in enumerate(search_results):
+                formatted_output += f"{i+1}. [{result.get('title', 'Untitled')}]({result.get('url', '#')})\n"
         # Add citation details if available
         if citations:
             formatted_output += "\n## Detailed Citations\n"
             for cite_id, info in citations.items():
+                formatted_output += f"- {cite_id} {info['title']} - {info['source']}: {info['url']}\n"
         return formatted_output

modules/input_handler.py CHANGED Viewed

@@ -1,23 +1,22 @@
 class InputHandler:
     def process_query(self, query):
-        """
-        Process and validate user input
-        """
         # Clean and normalize query
         cleaned_query = query.strip()
         # Add context if needed
         if len(cleaned_query) < 5:
             raise ValueError("Query too short. Please provide more details.")
         return cleaned_query
     def extract_keywords(self, query):
-        """
-        Extract important keywords from query
-        """
         # Simple keyword extraction (could be enhanced with NLP)
-        stop_words = {'the', 'is', 'at', 'which', 'on', 'in', 'for', 'of', 'with', 'by'}
         words = query.lower().split()
         keywords = [word for word in words if word not in stop_words]
         return keywords

 class InputHandler:
     def process_query(self, query):
+        """ Process and validate user input """
         # Clean and normalize query
         cleaned_query = query.strip()
         # Add context if needed
         if len(cleaned_query) < 5:
             raise ValueError("Query too short. Please provide more details.")
         return cleaned_query
     def extract_keywords(self, query):
+        """ Extract important keywords from query """
         # Simple keyword extraction (could be enhanced with NLP)
+        stop_words = {
+            'the', 'is', 'at', 'which', 'on', 'in', 'for', 'of', 'with', 'by',
+            'dan', 'di', 'ke', 'dari', 'pada', 'untuk', 'oleh', 'sebagai'
+        }
         words = query.lower().split()
         keywords = [word for word in words if word not in stop_words]
         return keywords

modules/retriever.py CHANGED Viewed

@@ -4,11 +4,9 @@ import logging
 class Retriever:
     def __init__(self, api_key):
         self.client = TavilyClient(api_key=api_key)
     def search(self, query, max_results=5):
-        """
-        Search for relevant content using Tavily API
-        """
         try:
             response = self.client.search(
                 query=query,
@@ -23,13 +21,10 @@ class Retriever:
             return []
     def get_related_queries(self, query):
-        """
-        Generate related search queries
-        """
         # This could be enhanced with LLM-based query expansion
         return [
             f"{query} research paper",
             f"{query} latest developments",
             f"{query} pros and cons"
-        ]

 class Retriever:
     def __init__(self, api_key):
         self.client = TavilyClient(api_key=api_key)
     def search(self, query, max_results=5):
+        """ Search for relevant content using Tavily API """
         try:
             response = self.client.search(
                 query=query,
             return []
     def get_related_queries(self, query):
+        """ Generate related search queries """
         # This could be enhanced with LLM-based query expansion
         return [
             f"{query} research paper",
             f"{query} latest developments",
             f"{query} pros and cons"
+        ]

modules/server_cache.py CHANGED Viewed

@@ -18,8 +18,9 @@ class RedisServerStatusCache:
             self.fallback_cache = {}
             self.use_redis = False
             logging.warning(f"Redis not available, using in-memory cache: {e}")
         self.default_ttl = default_ttl
     def get(self, server_key):
         try:
             if self.use_redis and self.redis:
@@ -40,7 +41,7 @@ class RedisServerStatusCache:
         except Exception as e:
             logging.error(f"Cache get error: {e}")
             return None
     def set(self, server_key, status, ttl=None):
         try:
             ttl = ttl or self.default_ttl

             self.fallback_cache = {}
             self.use_redis = False
             logging.warning(f"Redis not available, using in-memory cache: {e}")
         self.default_ttl = default_ttl
     def get(self, server_key):
         try:
             if self.use_redis and self.redis:
         except Exception as e:
             logging.error(f"Cache get error: {e}")
             return None
     def set(self, server_key, status, ttl=None):
         try:
             ttl = ttl or self.default_ttl

modules/status_logger.py CHANGED Viewed

@@ -39,6 +39,6 @@ def log_analysis_result(query, success, message=""):
         with open(ANALYSIS_LOG_FILE, 'a') as f:
             if not file_exists:
                 f.write("timestamp,query,result,message\n")
-            f.write(f"{timestamp},\"{query}\",{result_str},\"{message}\"\n")
     except Exception as e:
         logging.error(f"Failed to log analysis result: {e}")

         with open(ANALYSIS_LOG_FILE, 'a') as f:
             if not file_exists:
                 f.write("timestamp,query,result,message\n")
+            f.write(f'{timestamp},"{query}",{result_str},"{message}"\n')
     except Exception as e:
         logging.error(f"Failed to log analysis result: {e}")

requirements.txt CHANGED Viewed

@@ -6,4 +6,10 @@ requests>=2.31.0
 pytest==8.3.3
 redis>=4.0.0
 pandas>=1.5.0
-matplotlib>=3.5.0

 pytest==8.3.3
 redis>=4.0.0
 pandas>=1.5.0
+matplotlib>=3.5.0
+duckduckgo-search>=6.1.0
+googletrans==4.0.0-rc1
+PyPDF2>=3.0.0
+Pillow>=10.0.0
+pytesseract>=0.3.10
+datasets>=2.14.0

version.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "version": "133",
+  "description": "Enhanced assistant with file upload, DDG search, multi-language support",
+  "date": "2025-04-05"
+}