Spaces:

BinKhoaLe1812
/

EdSummariser

Sleeping

App Files Files Community

LiamKhoaLe commited on Sep 21

Commit

d34d874

1 Parent(s): 607bc14

Upd report planning and CoT reasoning. Upd dynamic state toggle

Browse files

Files changed (6) hide show

helpers/models.py +5 -0
routes/chats.py +70 -10
routes/reports.py +314 -82
routes/search.py +5 -1
static/script.js +100 -2
static/styles.css +29 -0

helpers/models.py CHANGED Viewed

@@ -52,4 +52,9 @@ class ReportResponse(BaseModel):
     report_markdown: str
     sources: List[Dict[str, Any]]

     report_markdown: str
     sources: List[Dict[str, Any]]
+class StatusUpdateResponse(BaseModel):
+    status: str
+    message: str
+    progress: Optional[int] = None

routes/chats.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional
 from fastapi import Form, HTTPException
 from helpers.setup import app, rag, logger, embedder, captioner, gemini_rotator, nvidia_rotator
-from helpers.models import ChatMessageResponse, ChatHistoryResponse, MessageResponse, ChatAnswerResponse
 from utils.service.common import trim_text
 from .search import build_web_context
 from utils.api.router import select_model, generate_answer_with_model
@@ -91,6 +91,25 @@ async def delete_chat_history(user_id: str, project_id: str):
         raise HTTPException(500, detail=f"Failed to clear chat history: {str(e)}")
 # ────────────────────────────── RAG Chat and Helpers ──────────────────────────────
 async def _generate_query_variations(question: str, nvidia_rotator) -> List[str]:
     """
@@ -176,13 +195,21 @@ async def chat(
     question: str = Form(...),
     k: int = Form(6),
     use_web: int = Form(0),
-    max_web: int = Form(30)
 ):
     import asyncio
     try:
-        return await asyncio.wait_for(_chat_impl(user_id, project_id, question, k, use_web=use_web, max_web=max_web), timeout=120.0)
     except asyncio.TimeoutError:
         logger.error("[CHAT] Chat request timed out after 120 seconds")
         return ChatAnswerResponse(
             answer="Sorry, the request took too long to process. Please try again with a simpler question.",
             sources=[],
@@ -196,13 +223,18 @@ async def _chat_impl(
     question: str,
     k: int,
     use_web: int = 0,
-    max_web: int = 30
 ):
     import sys
     from memo.core import get_memory_system
     from utils.api.router import NVIDIA_SMALL  # reuse default name
     memory = get_memory_system()
     logger.info("[CHAT] User Q/chat: %s", trim_text(question, 15).replace("\n", " "))
     mentioned = set([m.group(0).strip() for m in re.finditer(r"\b[^\s/\\]+?\.(?:pdf|docx|doc)\b", question, re.IGNORECASE)])
     if mentioned:
@@ -307,8 +339,17 @@ async def _chat_impl(
             semantic_related = "\n\n".join(top) if top else ""
     logger.info(f"[CHAT] Starting enhanced vector search with relevant_files={relevant_files}")
     enhanced_queries = await _generate_query_variations(question, nvidia_rotator)
     logger.info(f"[CHAT] Generated {len(enhanced_queries)} query variations")
     all_hits = []
     search_strategies = ["flat", "hybrid", "local"]
     for strategy in search_strategies:
@@ -413,11 +454,16 @@ async def _chat_impl(
     web_context_block = ""
     web_sources_meta: List[Dict[str, Any]] = []
     if use_web:
         try:
-            # Use planner to avoid redundant fetching and improve coverage
-            web_context_block, web_sources_meta = await plan_and_build_web_context(
-                question, max_web=max_web, per_query=6, top_k=12, dedup_threshold=0.90
-            )
         except Exception as e:
             logger.warning(f"[CHAT] Web augmentation failed: {e}")
@@ -447,10 +493,19 @@ async def _chat_impl(
     if web_context_block:
         composed_context += "\n\nWEB_CONTEXT:\n" + web_context_block
     user_prompt = f"QUESTION:\n{question}\n\nCONTEXT:\n{composed_context}"
     selection = select_model(question=question, context=composed_context)
     logger.info(f"Model selection: {selection}")
     logger.info(f"[CHAT] Generating answer with {selection['provider']} {selection['model']}")
     try:
         answer = await generate_answer_with_model(
             selection=selection,
@@ -491,6 +546,10 @@ async def _chat_impl(
                 "score": float(s.get("score", 0.0)),
                 "kind": "web"
             })
     logger.info("LLM answer (trimmed): %s", trim_text(answer, 200).replace("\n", " "))
     return ChatAnswerResponse(answer=answer, sources=sources_meta, relevant_files=relevant_files)
@@ -557,7 +616,8 @@ async def chat_with_search(
     project_id: str = Form(...),
     question: str = Form(...),
     k: int = Form(6),
-    max_web: int = Form(30)
 ):
     """Answer using local documents and up to 30 web sources, with URL citations."""
     from memo.core import get_memory_system
@@ -565,7 +625,7 @@ async def chat_with_search(
     logger.info("[CHAT] User Q/chat.search: %s", trim_text(question, 20).replace("\n", " "))
     # 1) Reuse local RAG retrieval
-    local_resp = await _chat_impl(user_id, project_id, question, k)
     # 2) Web search and fetching via shared utilities
     web_context, web_sources_meta = await build_web_context(question, max_web=max_web, top_k=10)

 from fastapi import Form, HTTPException
 from helpers.setup import app, rag, logger, embedder, captioner, gemini_rotator, nvidia_rotator
+from helpers.models import ChatMessageResponse, ChatHistoryResponse, MessageResponse, ChatAnswerResponse, StatusUpdateResponse
 from utils.service.common import trim_text
 from .search import build_web_context
 from utils.api.router import select_model, generate_answer_with_model
         raise HTTPException(500, detail=f"Failed to clear chat history: {str(e)}")
+# In-memory status tracking for real-time updates
+chat_status_store = {}
+@app.get("/chat/status/{session_id}", response_model=StatusUpdateResponse)
+async def get_chat_status(session_id: str):
+    """Get current status of a chat processing session"""
+    status = chat_status_store.get(session_id, {"status": "idle", "message": "Ready", "progress": 0})
+    return StatusUpdateResponse(**status)
+def update_chat_status(session_id: str, status: str, message: str, progress: int = None):
+    """Update chat processing status"""
+    chat_status_store[session_id] = {
+        "status": status,
+        "message": message,
+        "progress": progress
+    }
 # ────────────────────────────── RAG Chat and Helpers ──────────────────────────────
 async def _generate_query_variations(question: str, nvidia_rotator) -> List[str]:
     """
     question: str = Form(...),
     k: int = Form(6),
     use_web: int = Form(0),
+    max_web: int = Form(30),
+    session_id: str = Form(None)
 ):
     import asyncio
+    import uuid
+    # Generate session ID if not provided
+    if not session_id:
+        session_id = str(uuid.uuid4())
     try:
+        return await asyncio.wait_for(_chat_impl(user_id, project_id, question, k, use_web=use_web, max_web=max_web, session_id=session_id), timeout=120.0)
     except asyncio.TimeoutError:
         logger.error("[CHAT] Chat request timed out after 120 seconds")
+        update_chat_status(session_id, "error", "Request timed out", 0)
         return ChatAnswerResponse(
             answer="Sorry, the request took too long to process. Please try again with a simpler question.",
             sources=[],
     question: str,
     k: int,
     use_web: int = 0,
+    max_web: int = 30,
+    session_id: str = None
 ):
     import sys
     from memo.core import get_memory_system
     from utils.api.router import NVIDIA_SMALL  # reuse default name
     memory = get_memory_system()
     logger.info("[CHAT] User Q/chat: %s", trim_text(question, 15).replace("\n", " "))
+    # Update status: Receiving request
+    if session_id:
+        update_chat_status(session_id, "receiving", "Receiving request...", 5)
     mentioned = set([m.group(0).strip() for m in re.finditer(r"\b[^\s/\\]+?\.(?:pdf|docx|doc)\b", question, re.IGNORECASE)])
     if mentioned:
             semantic_related = "\n\n".join(top) if top else ""
     logger.info(f"[CHAT] Starting enhanced vector search with relevant_files={relevant_files}")
+    # Update status: Processing data (LLM generating query variations)
+    if session_id:
+        update_chat_status(session_id, "processing", "Processing data...", 15)
     enhanced_queries = await _generate_query_variations(question, nvidia_rotator)
     logger.info(f"[CHAT] Generated {len(enhanced_queries)} query variations")
+    # Update status: Planning action (planning search strategy)
+    if session_id:
+        update_chat_status(session_id, "planning", "Planning action...", 25)
     all_hits = []
     search_strategies = ["flat", "hybrid", "local"]
     for strategy in search_strategies:
     web_context_block = ""
     web_sources_meta: List[Dict[str, Any]] = []
     if use_web:
+        # Update status: Searching information (web search)
+        if session_id:
+            update_chat_status(session_id, "searching", "Searching information...", 40)
         try:
+            # Create status callback for web search
+            def web_status_callback(status, message, progress):
+                if session_id:
+                    update_chat_status(session_id, status, message, progress)
+            web_context_block, web_sources_meta = await build_web_context(question, max_web=max_web, top_k=10, status_callback=web_status_callback)
         except Exception as e:
             logger.warning(f"[CHAT] Web augmentation failed: {e}")
     if web_context_block:
         composed_context += "\n\nWEB_CONTEXT:\n" + web_context_block
+    # Update status: Thinking solution
+    if session_id:
+        update_chat_status(session_id, "thinking", "Thinking solution...", 60)
     user_prompt = f"QUESTION:\n{question}\n\nCONTEXT:\n{composed_context}"
     selection = select_model(question=question, context=composed_context)
     logger.info(f"Model selection: {selection}")
     logger.info(f"[CHAT] Generating answer with {selection['provider']} {selection['model']}")
+    # Update status: Generating answer
+    if session_id:
+        update_chat_status(session_id, "generating", "Generating answer...", 80)
     try:
         answer = await generate_answer_with_model(
             selection=selection,
                 "score": float(s.get("score", 0.0)),
                 "kind": "web"
             })
+    # Update status: Complete
+    if session_id:
+        update_chat_status(session_id, "complete", "Answer ready", 100)
     logger.info("LLM answer (trimmed): %s", trim_text(answer, 200).replace("\n", " "))
     return ChatAnswerResponse(answer=answer, sources=sources_meta, relevant_files=relevant_files)
     project_id: str = Form(...),
     question: str = Form(...),
     k: int = Form(6),
+    max_web: int = Form(30),
+    session_id: str = Form(None)
 ):
     """Answer using local documents and up to 30 web sources, with URL citations."""
     from memo.core import get_memory_system
     logger.info("[CHAT] User Q/chat.search: %s", trim_text(question, 20).replace("\n", " "))
     # 1) Reuse local RAG retrieval
+    local_resp = await _chat_impl(user_id, project_id, question, k, use_web=1, max_web=max_web, session_id=session_id)
     # 2) Web search and fetching via shared utilities
     web_context, web_sources_meta = await build_web_context(question, max_web=max_web, top_k=10)

routes/reports.py CHANGED Viewed

@@ -6,11 +6,28 @@ from fastapi import Form, HTTPException
 from helpers.setup import app, rag, logger, embedder, gemini_rotator, nvidia_rotator
 from .search import build_web_context
-from helpers.models import ReportResponse
 from utils.service.common import trim_text
 from utils.api.router import select_model, generate_answer_with_model
 @app.post("/report", response_model=ReportResponse)
 async def generate_report(
     user_id: str = Form(...),
@@ -20,9 +37,18 @@ async def generate_report(
     report_words: int = Form(1200),
     instructions: str = Form(""),
     use_web: int = Form(0),
-    max_web: int = Form(20)
 ):
     logger.info("[REPORT] User Q/report: %s", trim_text(instructions, 15).replace("\n", " "))
     files_list = rag.list_files(user_id=user_id, project_id=project_id)
     filenames_ci = {f.get("filename", "").lower(): f.get("filename") for f in files_list}
     eff_name = filenames_ci.get(filename.lower(), filename)
@@ -55,91 +81,40 @@ async def generate_report(
     web_context_block = ""
     web_sources_meta: List[Dict] = []
     if use_web:
         web_context_block, web_sources_meta = await build_web_context(
-            instructions or query_text, max_web=max_web, top_k=12
         )
     file_summary = doc_sum.get("summary", "")
-    from utils.api.router import GEMINI_MED, GEMINI_PRO
-    if instructions.strip():
-        filter_sys = (
-            "You are an expert content analyst. Given the user's specific instructions and the document content, "
-            "identify which sections/chunks are MOST relevant to their request. "
-            "Each chunk is prefixed with [CHUNK_ID: <id>] - use these exact IDs in your response. "
-            "Return a JSON object with this structure: {\"relevant_chunks\": [\"<chunk_id_1>\", \"<chunk_id_2>\"], \"focus_areas\": [\"key topic 1\", \"key topic 2\"]}"
-        )
-        filter_user = f"USER_INSTRUCTIONS: {instructions}\n\nDOCUMENT_SUMMARY: {file_summary}\n\nAVAILABLE_CHUNKS:\n{context_text}\n\nIdentify only the chunks that directly address the user's specific request."
-        try:
-            selection_filter = {"provider": "gemini", "model": os.getenv("GEMINI_MED", "gemini-2.5-flash")}
-            filter_response = await generate_answer_with_model(selection_filter, filter_sys, filter_user, gemini_rotator, nvidia_rotator)
-            logger.info(f"[REPORT] Raw filter response: {filter_response}")
-            import json as _json
-            try:
-                # Extract JSON from markdown code blocks if present
-                json_text = filter_response.strip()
-                if json_text.startswith('```json'):
-                    # Remove markdown code block formatting
-                    json_text = json_text[7:]  # Remove ```json
-                    if json_text.endswith('```'):
-                        json_text = json_text[:-3]  # Remove ```
-                    json_text = json_text.strip()
-                elif json_text.startswith('```'):
-                    # Remove generic code block formatting
-                    json_text = json_text[3:]  # Remove ```
-                    if json_text.endswith('```'):
-                        json_text = json_text[:-3]  # Remove ```
-                    json_text = json_text.strip()
-                filter_data = _json.loads(json_text)
-                relevant_chunk_ids = filter_data.get("relevant_chunks", [])
-                focus_areas = filter_data.get("focus_areas", [])
-                logger.info(f"[REPORT] Content filtering identified {len(relevant_chunk_ids)} relevant chunks: {relevant_chunk_ids} and focus areas: {focus_areas}")
-                if relevant_chunk_ids and hits:
-                    filtered_hits = [h for h in hits if str(h["doc"].get("_id", "")) in relevant_chunk_ids]
-                    if filtered_hits:
-                        hits = filtered_hits
-                        logger.info(f"[REPORT] Filtered context from {len(hits)} chunks to {len(filtered_hits)} relevant chunks")
-                    else:
-                        logger.warning(f"[REPORT] No matching chunks found for IDs: {relevant_chunk_ids}")
-                else:
-                    logger.warning(f"[REPORT] No relevant chunk IDs returned or no hits available")
-            except _json.JSONDecodeError as e:
-                logger.warning(f"[REPORT] Could not parse filter response, using all chunks. JSON error: {e}. Response: {filter_response}")
-        except Exception as e:
-            logger.warning(f"[REPORT] Content filtering failed: {e}")
-    sys_outline = (
-        "You are an expert technical writer. Create a focused, hierarchical outline for a report based on the user's specific instructions and the MATERIALS. "
-        "The outline should directly address what the user asked for. Output as Markdown bullet list only. Keep it within about {} words."
-    ).format(max(100, outline_words))
-    instruction_context = f"USER_REQUEST: {instructions}\n\n" if instructions.strip() else ""
-    user_outline = f"{instruction_context}MATERIALS:\n\n[FILE_SUMMARY from {eff_name}]\n{file_summary}\n\n[DOC_CONTEXT]\n{context_text}\n\n[WEB_CONTEXT]\n{web_context_block}"
-    try:
-        selection_outline = {"provider": "gemini", "model": os.getenv("GEMINI_MED", "gemini-2.5-flash")}
-        outline_md = await generate_answer_with_model(selection_outline, sys_outline, user_outline, gemini_rotator, nvidia_rotator)
-    except Exception as e:
-        logger.warning(f"Report outline failed: {e}")
-        outline_md = "# Report Outline\n\n- Introduction\n- Key Topics\n- Conclusion"
-    instruction_focus = f"FOCUS ON: {instructions}\n\n" if instructions.strip() else ""
-    sys_report = (
-        "You are an expert report writer. Write a focused, comprehensive Markdown report that directly addresses the user's specific request. "
-        "Using the OUTLINE and MATERIALS:\n"
-        "- Structure the report to answer exactly what the user asked for\n"
-        "- Use clear section headings\n"
-        "- Keep content factual and grounded in the provided materials\n"
-        f"- Include brief citations like (source: {eff_name}, topic) - use the actual filename provided\n"
-        "- If the user asked for a specific section/topic, focus heavily on that\n"
-        f"- Target length ~{max(600, report_words)} words\n"
-        "- Ensure the report directly fulfills the user's request"
     )
-    user_report = f"{instruction_focus}OUTLINE:\n{outline_md}\n\nMATERIALS:\n[FILE_SUMMARY from {eff_name}]\n{file_summary}\n\n[DOC_CONTEXT]\n{context_text}\n\n[WEB_CONTEXT]\n{web_context_block}"
-    try:
-        selection_report = {"provider": "gemini", "model": os.getenv("GEMINI_PRO", "gemini-2.5-pro")}
-        report_md = await generate_answer_with_model(selection_report, sys_report, user_report, gemini_rotator, nvidia_rotator)
-    except Exception as e:
-        logger.error(f"Report generation failed: {e}")
-        report_md = outline_md + "\n\n" + file_summary
     # Merge local and web sources
     merged_sources = list(sources_meta) + [
         {"filename": s.get("url"), "topic_name": s.get("topic_name"), "score": s.get("score"), "kind": "web"}
@@ -177,3 +152,260 @@ async def generate_report_pdf(
         raise

 from helpers.setup import app, rag, logger, embedder, gemini_rotator, nvidia_rotator
 from .search import build_web_context
+from helpers.models import ReportResponse, StatusUpdateResponse
 from utils.service.common import trim_text
 from utils.api.router import select_model, generate_answer_with_model
+# In-memory status tracking for report generation
+report_status_store = {}
+@app.get("/report/status/{session_id}", response_model=StatusUpdateResponse)
+async def get_report_status(session_id: str):
+    """Get current status of a report generation session"""
+    status = report_status_store.get(session_id, {"status": "idle", "message": "Ready", "progress": 0})
+    return StatusUpdateResponse(**status)
+def update_report_status(session_id: str, status: str, message: str, progress: int = None):
+    """Update report generation status"""
+    report_status_store[session_id] = {
+        "status": status,
+        "message": message,
+        "progress": progress
+    }
 @app.post("/report", response_model=ReportResponse)
 async def generate_report(
     user_id: str = Form(...),
     report_words: int = Form(1200),
     instructions: str = Form(""),
     use_web: int = Form(0),
+    max_web: int = Form(20),
+    session_id: str = Form(None)
 ):
+    import uuid
+    if not session_id:
+        session_id = str(uuid.uuid4())
     logger.info("[REPORT] User Q/report: %s", trim_text(instructions, 15).replace("\n", " "))
+    # Update status: Receiving request
+    update_report_status(session_id, "receiving", "Receiving request...", 5)
     files_list = rag.list_files(user_id=user_id, project_id=project_id)
     filenames_ci = {f.get("filename", "").lower(): f.get("filename") for f in files_list}
     eff_name = filenames_ci.get(filename.lower(), filename)
     web_context_block = ""
     web_sources_meta: List[Dict] = []
     if use_web:
+        # Create status callback for web search
+        def web_status_callback(status, message, progress):
+            update_report_status(session_id, status, message, progress)
         web_context_block, web_sources_meta = await build_web_context(
+            instructions or query_text, max_web=max_web, top_k=12, status_callback=web_status_callback
         )
     file_summary = doc_sum.get("summary", "")
+    # Step 1: Chain of Thought Planning with NVIDIA
+    logger.info("[REPORT] Starting CoT planning phase")
+    update_report_status(session_id, "planning", "Planning action...", 25)
+    cot_plan = await generate_cot_plan(instructions, file_summary, context_text, web_context_block, nvidia_rotator)
+    # Step 2: Execute detailed subtasks based on CoT plan
+    logger.info("[REPORT] Executing detailed subtasks")
+    update_report_status(session_id, "processing", "Processing data...", 40)
+    detailed_analysis = await execute_detailed_subtasks(cot_plan, context_text, web_context_block, eff_name, nvidia_rotator)
+    # Step 3: Synthesize comprehensive report from detailed analysis
+    logger.info("[REPORT] Synthesizing comprehensive report")
+    update_report_status(session_id, "thinking", "Thinking solution...", 60)
+    comprehensive_report = await synthesize_comprehensive_report(
+        instructions, cot_plan, detailed_analysis, eff_name, report_words, gemini_rotator, nvidia_rotator
     )
+    # Update status: Generating answer (final report generation)
+    update_report_status(session_id, "generating", "Generating answer...", 80)
+    # Update status: Complete
+    update_report_status(session_id, "complete", "Report ready", 100)
+    # Use the comprehensive report from CoT approach
+    report_md = comprehensive_report
     # Merge local and web sources
     merged_sources = list(sources_meta) + [
         {"filename": s.get("url"), "topic_name": s.get("topic_name"), "score": s.get("score"), "kind": "web"}
         raise
+# ────────────────────────────── Chain of Thought Report Generation ──────────────────
+async def generate_cot_plan(instructions: str, file_summary: str, context_text: str, web_context: str, nvidia_rotator) -> Dict[str, Any]:
+    """Generate a detailed Chain of Thought plan for report generation using NVIDIA."""
+    sys_prompt = """You are an expert research analyst and report planner. Given a user's request and available materials, create a comprehensive plan for generating a detailed report.
+Your task is to:
+1. Analyze the user's request and identify key requirements
+2. Break down the report into logical sections and subtasks
+3. Identify what specific information needs to be extracted from each source
+4. Plan the reasoning flow and argument structure
+5. Determine the depth and rigor needed for each section
+Return a JSON object with this structure:
+{
+  "analysis": {
+    "user_intent": "What the user really wants to know",
+    "key_requirements": ["requirement1", "requirement2"],
+    "complexity_level": "basic|intermediate|advanced",
+    "focus_areas": ["area1", "area2", "area3"]
+  },
+  "report_structure": {
+    "sections": [
+      {
+        "title": "Section Title",
+        "purpose": "Why this section is needed",
+        "subtasks": [
+          {
+            "task": "Specific task description",
+            "reasoning": "Why this task is important",
+            "sources_needed": ["local", "web", "both"],
+            "depth": "surface|detailed|comprehensive"
+          }
+        ]
+      }
+    ]
+  },
+  "reasoning_flow": [
+    "Step 1: Start with...",
+    "Step 2: Then analyze...",
+    "Step 3: Finally synthesize..."
+  ]
+}"""
+    user_prompt = f"""USER REQUEST: {instructions}
+AVAILABLE MATERIALS:
+FILE SUMMARY: {file_summary}
+DOCUMENT CONTEXT: {context_text[:2000]}...
+WEB CONTEXT: {web_context[:2000]}...
+Create a detailed plan for generating a comprehensive report that addresses the user's request."""
+    try:
+        selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
+        response = await generate_answer_with_model(selection, sys_prompt, user_prompt, None, nvidia_rotator)
+        # Parse JSON response
+        import json
+        json_text = response.strip()
+        if json_text.startswith('```json'):
+            json_text = json_text[7:-3].strip()
+        elif json_text.startswith('```'):
+            json_text = json_text[3:-3].strip()
+        plan = json.loads(json_text)
+        logger.info(f"[REPORT] CoT plan generated with {len(plan.get('report_structure', {}).get('sections', []))} sections")
+        return plan
+    except Exception as e:
+        logger.warning(f"[REPORT] CoT planning failed: {e}")
+        # Fallback plan
+        return {
+            "analysis": {
+                "user_intent": instructions,
+                "key_requirements": ["comprehensive analysis"],
+                "complexity_level": "intermediate",
+                "focus_areas": ["main topics"]
+            },
+            "report_structure": {
+                "sections": [
+                    {
+                        "title": "Introduction",
+                        "purpose": "Provide overview and context",
+                        "subtasks": [{"task": "Summarize key points", "reasoning": "Set foundation", "sources_needed": ["local"], "depth": "detailed"}]
+                    },
+                    {
+                        "title": "Main Analysis",
+                        "purpose": "Address user's specific request",
+                        "subtasks": [{"task": "Detailed analysis", "reasoning": "Core content", "sources_needed": ["both"], "depth": "comprehensive"}]
+                    },
+                    {
+                        "title": "Conclusion",
+                        "purpose": "Synthesize findings",
+                        "subtasks": [{"task": "Summarize key insights", "reasoning": "Provide closure", "sources_needed": ["local"], "depth": "detailed"}]
+                    }
+                ]
+            },
+            "reasoning_flow": ["Analyze materials", "Extract key insights", "Synthesize findings"]
+        }
+async def execute_detailed_subtasks(cot_plan: Dict[str, Any], context_text: str, web_context: str, filename: str, nvidia_rotator) -> Dict[str, Any]:
+    """Execute detailed analysis for each subtask identified in the CoT plan."""
+    detailed_analysis = {}
+    for section in cot_plan.get("report_structure", {}).get("sections", []):
+        section_title = section.get("title", "Unknown Section")
+        section_analysis = {
+            "title": section_title,
+            "purpose": section.get("purpose", ""),
+            "subtask_results": []
+        }
+        for subtask in section.get("subtasks", []):
+            task = subtask.get("task", "")
+            reasoning = subtask.get("reasoning", "")
+            sources_needed = subtask.get("sources_needed", ["local"])
+            depth = subtask.get("depth", "detailed")
+            # Generate detailed analysis for this subtask
+            subtask_result = await analyze_subtask(
+                task, reasoning, sources_needed, depth, context_text, web_context, filename, nvidia_rotator
+            )
+            section_analysis["subtask_results"].append({
+                "task": task,
+                "reasoning": reasoning,
+                "depth": depth,
+                "analysis": subtask_result
+            })
+        detailed_analysis[section_title] = section_analysis
+    logger.info(f"[REPORT] Completed detailed analysis for {len(detailed_analysis)} sections")
+    return detailed_analysis
+async def analyze_subtask(task: str, reasoning: str, sources_needed: List[str], depth: str,
+                         context_text: str, web_context: str, filename: str, nvidia_rotator) -> str:
+    """Analyze a specific subtask with appropriate depth and source selection."""
+    # Select appropriate context based on sources_needed
+    selected_context = ""
+    if "local" in sources_needed and "web" in sources_needed:
+        selected_context = f"DOCUMENT CONTEXT:\n{context_text}\n\nWEB CONTEXT:\n{web_context}"
+    elif "local" in sources_needed:
+        selected_context = f"DOCUMENT CONTEXT:\n{context_text}"
+    elif "web" in sources_needed:
+        selected_context = f"WEB CONTEXT:\n{web_context}"
+    # Adjust prompt based on depth requirement
+    depth_instructions = {
+        "surface": "Provide a brief, high-level analysis",
+        "detailed": "Provide a thorough, well-reasoned analysis with specific examples",
+        "comprehensive": "Provide an exhaustive, rigorous analysis with deep insights and multiple perspectives"
+    }
+    sys_prompt = f"""You are an expert analyst performing detailed research. Your task is to {task}.
+REASONING: {reasoning}
+DEPTH REQUIREMENT: {depth_instructions.get(depth, "Provide detailed analysis")}
+Focus on:
+- Extracting specific, relevant information
+- Providing clear explanations and insights
+- Supporting claims with evidence from the materials
+- Maintaining analytical rigor and objectivity
+- Being comprehensive yet concise
+Return only the analysis, no meta-commentary."""
+    user_prompt = f"""TASK: {task}
+MATERIALS:
+{selected_context}
+Perform the analysis as specified."""
+    try:
+        selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
+        analysis = await generate_answer_with_model(selection, sys_prompt, user_prompt, None, nvidia_rotator)
+        return analysis.strip()
+    except Exception as e:
+        logger.warning(f"[REPORT] Subtask analysis failed for '{task}': {e}")
+        return f"Analysis for '{task}' could not be completed due to processing error."
+async def synthesize_comprehensive_report(instructions: str, cot_plan: Dict[str, Any],
+                                        detailed_analysis: Dict[str, Any], filename: str,
+                                        report_words: int, gemini_rotator, nvidia_rotator) -> str:
+    """Synthesize the detailed analysis into a comprehensive, well-structured report."""
+    # Prepare synthesis materials
+    analysis_summary = ""
+    for section_title, section_data in detailed_analysis.items():
+        analysis_summary += f"\n## {section_title}\n"
+        analysis_summary += f"Purpose: {section_data.get('purpose', '')}\n\n"
+        for subtask_result in section_data.get("subtask_results", []):
+            analysis_summary += f"### {subtask_result.get('task', '')}\n"
+            analysis_summary += f"{subtask_result.get('analysis', '')}\n\n"
+    reasoning_flow = cot_plan.get("reasoning_flow", [])
+    flow_text = "\n".join([f"{i+1}. {step}" for i, step in enumerate(reasoning_flow)])
+    sys_prompt = f"""You are an expert report writer synthesizing detailed analysis into a comprehensive report.
+Your task is to create a well-structured, professional report that:
+1. Follows the planned reasoning flow: {flow_text}
+2. Integrates all detailed analyses seamlessly
+3. Maintains logical flow and coherence
+4. Provides clear, actionable insights
+5. Uses proper academic/professional formatting
+6. Targets approximately {report_words} words
+Structure the report with:
+- Clear section headings
+- Logical progression of ideas
+- Smooth transitions between sections
+- Proper citations and references
+- Executive summary or key takeaways
+- Conclusion with actionable insights
+Write in a professional, analytical tone suitable for business or academic contexts."""
+    user_prompt = f"""USER REQUEST: {instructions}
+DETAILED ANALYSIS TO SYNTHESIZE:
+{analysis_summary}
+REASONING FLOW TO FOLLOW:
+{flow_text}
+Create a comprehensive report that addresses the user's request by synthesizing all the detailed analysis above."""
+    try:
+        # Use Gemini Pro for final synthesis (better for long-form content)
+        selection = {"provider": "gemini", "model": "gemini-2.5-pro"}
+        report = await generate_answer_with_model(selection, sys_prompt, user_prompt, gemini_rotator, nvidia_rotator)
+        logger.info(f"[REPORT] Comprehensive report synthesized, length: {len(report)} characters")
+        return report
+    except Exception as e:
+        logger.error(f"[REPORT] Report synthesis failed: {e}")
+        # Fallback: simple concatenation
+        fallback_report = f"# Report: {instructions}\n\n"
+        fallback_report += analysis_summary
+        fallback_report += f"\n\n## Conclusion\n\nThis report addresses: {instructions}"
+        return fallback_report

routes/search.py CHANGED Viewed

@@ -598,7 +598,7 @@ async def calculate_comprehensive_score(content: str, user_query: str, url: str,
     return max(0.0, min(1.0, comprehensive_score))
-async def build_web_context(question: str, max_web: int = 30, top_k: int = 10) -> Tuple[str, List[Dict[str, Any]]]:
     """
     Intelligent web search and content processing:
     1. Extract intelligent search keywords
@@ -609,6 +609,8 @@ async def build_web_context(question: str, max_web: int = 30, top_k: int = 10) -
     t0 = time.perf_counter()
     # Step 1: Extract intelligent search keywords
     keywords = await extract_search_keywords(question, nvidia_rotator)
     logger.info(f"[SEARCH] Extracted keywords: {keywords}")
@@ -623,6 +625,8 @@ async def build_web_context(question: str, max_web: int = 30, top_k: int = 10) -
         return "", []
     # Step 3: Process each source with NVIDIA agent
     processing_tasks = []
     for result in search_results:
         task = fetch_and_process_content(result["url"], result["title"], question, nvidia_rotator)

     return max(0.0, min(1.0, comprehensive_score))
+async def build_web_context(question: str, max_web: int = 30, top_k: int = 10, status_callback=None) -> Tuple[str, List[Dict[str, Any]]]:
     """
     Intelligent web search and content processing:
     1. Extract intelligent search keywords
     t0 = time.perf_counter()
     # Step 1: Extract intelligent search keywords
+    if status_callback:
+        status_callback("searching", "Searching information...", 45)
     keywords = await extract_search_keywords(question, nvidia_rotator)
     logger.info(f"[SEARCH] Extracted keywords: {keywords}")
         return "", []
     # Step 3: Process each source with NVIDIA agent
+    if status_callback:
+        status_callback("processing", "Processing data...", 50)
     processing_tasks = []
     for result in search_results:
         task = fetch_and_process_content(result["url"], result["title"], question, nvidia_rotator)

static/script.js CHANGED Viewed

@@ -499,13 +499,19 @@
     // Save user message to chat history
     await saveChatMessage(user.user_id, currentProject.project_id, 'user', question);
-    // Add thinking message
-    const thinkingMsg = appendMessage('thinking', 'Thinking...');
     // Disable input during processing
     questionInput.disabled = true;
     sendBtn.disabled = true;
     showButtonLoading(sendBtn, true);
     try {
       // Branch: if report mode is active → call /report with textarea as instructions
@@ -519,6 +525,7 @@
         form.append('outline_words', '200');
         form.append('report_words', '1200');
         form.append('instructions', question);
         // If Search is toggled on, enable web augmentation for report
         const useWeb = searchLink && searchLink.classList.contains('active');
         if (useWeb) {
@@ -542,6 +549,7 @@
         formData.append('project_id', currentProject.project_id);
         formData.append('question', question);
         formData.append('k', '6');
         // If Search is toggled on, enable web augmentation
         const useWeb = searchLink && searchLink.classList.contains('active');
         if (useWeb) {
@@ -573,6 +581,10 @@
       appendMessage('assistant', errorMsg);
       await saveChatMessage(user.user_id, currentProject.project_id, 'assistant', errorMsg);
     } finally {
       // Re-enable input
       questionInput.disabled = false;
       sendBtn.disabled = false;
@@ -1072,4 +1084,90 @@
   }, { threshold: 0.1 });
   document.querySelectorAll('.reveal').forEach(el => observer.observe(el));
 })();

     // Save user message to chat history
     await saveChatMessage(user.user_id, currentProject.project_id, 'user', question);
+    // Generate session ID for status tracking
+    const sessionId = 'chat_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
+    // Add thinking message with dynamic status
+    const thinkingMsg = appendMessage('thinking', 'Receiving request...');
     // Disable input during processing
     questionInput.disabled = true;
     sendBtn.disabled = true;
     showButtonLoading(sendBtn, true);
+    // Start status polling
+    const statusInterval = startStatusPolling(sessionId, thinkingMsg);
     try {
       // Branch: if report mode is active → call /report with textarea as instructions
         form.append('outline_words', '200');
         form.append('report_words', '1200');
         form.append('instructions', question);
+        form.append('session_id', sessionId);
         // If Search is toggled on, enable web augmentation for report
         const useWeb = searchLink && searchLink.classList.contains('active');
         if (useWeb) {
         formData.append('project_id', currentProject.project_id);
         formData.append('question', question);
         formData.append('k', '6');
+        formData.append('session_id', sessionId);
         // If Search is toggled on, enable web augmentation
         const useWeb = searchLink && searchLink.classList.contains('active');
         if (useWeb) {
       appendMessage('assistant', errorMsg);
       await saveChatMessage(user.user_id, currentProject.project_id, 'assistant', errorMsg);
     } finally {
+      // Stop status polling
+      if (statusInterval) {
+        clearInterval(statusInterval);
+      }
       // Re-enable input
       questionInput.disabled = false;
       sendBtn.disabled = false;
   }, { threshold: 0.1 });
   document.querySelectorAll('.reveal').forEach(el => observer.observe(el));
+  // Status polling function for real-time updates
+  function startStatusPolling(sessionId, thinkingMsg) {
+    const isReportMode = isReportModeActive();
+    const statusEndpoint = isReportMode ? `/report/status/${sessionId}` : `/chat/status/${sessionId}`;
+    const interval = setInterval(async () => {
+      try {
+        const response = await fetch(statusEndpoint);
+        if (response.ok) {
+          const status = await response.json();
+          updateThinkingMessage(thinkingMsg, status.message, status.progress);
+          // Stop polling when complete or error
+          if (status.status === 'complete' || status.status === 'error') {
+            clearInterval(interval);
+          }
+        }
+      } catch (error) {
+        console.warn('Status polling failed:', error);
+      }
+    }, 500); // Poll every 500ms
+    return interval;
+  }
+  function updateThinkingMessage(thinkingMsg, message, progress) {
+    if (thinkingMsg && thinkingMsg.querySelector) {
+      const progressBar = thinkingMsg.querySelector('.progress-bar');
+      const statusText = thinkingMsg.querySelector('.status-text');
+      if (statusText) {
+        statusText.textContent = message;
+      }
+      if (progressBar && progress !== undefined) {
+        progressBar.style.width = `${progress}%`;
+      }
+    }
+  }
+  // Enhanced thinking message with progress bar
+  function appendMessage(role, text, isReport = false) {
+    const messageDiv = document.createElement('div');
+    messageDiv.className = `msg ${role}`;
+    if (role === 'thinking') {
+      messageDiv.innerHTML = `
+        <div class="thinking-container">
+          <div class="status-text">${text}</div>
+          <div class="progress-container">
+            <div class="progress-bar" style="width: 0%"></div>
+          </div>
+        </div>
+      `;
+    } else if (role === 'assistant') {
+      // Render Markdown for assistant messages
+      try {
+        // Use marked library to convert Markdown to HTML
+        const htmlContent = marked.parse(text);
+        messageDiv.innerHTML = htmlContent;
+        // Add copy buttons to code blocks
+        addCopyButtonsToCodeBlocks(messageDiv);
+        // Add download PDF button for reports
+        if (isReport) {
+          addDownloadPdfButton(messageDiv, text);
+        }
+      } catch (e) {
+        // Fallback to plain text if Markdown parsing fails
+        messageDiv.textContent = text;
+      }
+    } else {
+      messageDiv.textContent = text;
+    }
+    messages.appendChild(messageDiv);
+    // Scroll to bottom
+    requestAnimationFrame(() => {
+      messageDiv.scrollIntoView({ behavior: 'smooth', block: 'end' });
+    });
+    return messageDiv;
+  }
 })();

static/styles.css CHANGED Viewed

@@ -766,6 +766,35 @@
   font-style: italic;
 }
 /* Markdown content styling */
 .msg.assistant h1,
 .msg.assistant h2,

   font-style: italic;
 }
+/* ────────────────────────────── Thinking Container Styles ────────────────────────────── */
+.thinking-container {
+  display: flex;
+  flex-direction: column;
+  gap: 0.75rem;
+}
+.status-text {
+  font-style: italic;
+  color: var(--text-secondary);
+  font-size: 0.95rem;
+}
+.progress-container {
+  width: 100%;
+  height: 4px;
+  background: var(--border);
+  border-radius: 2px;
+  overflow: hidden;
+}
+.progress-bar {
+  height: 100%;
+  background: var(--gradient-accent);
+  border-radius: 2px;
+  transition: width 0.3s ease;
+  width: 0%;
+}
 /* Markdown content styling */
 .msg.assistant h1,
 .msg.assistant h2,