LiamKhoaLe commited on
Commit
d34d874
·
1 Parent(s): 607bc14

Upd report planning and CoT reasoning. Upd dynamic state toggle

Browse files
Files changed (6) hide show
  1. helpers/models.py +5 -0
  2. routes/chats.py +70 -10
  3. routes/reports.py +314 -82
  4. routes/search.py +5 -1
  5. static/script.js +100 -2
  6. static/styles.css +29 -0
helpers/models.py CHANGED
@@ -52,4 +52,9 @@ class ReportResponse(BaseModel):
52
  report_markdown: str
53
  sources: List[Dict[str, Any]]
54
 
 
 
 
 
 
55
 
 
52
  report_markdown: str
53
  sources: List[Dict[str, Any]]
54
 
55
+ class StatusUpdateResponse(BaseModel):
56
+ status: str
57
+ message: str
58
+ progress: Optional[int] = None
59
+
60
 
routes/chats.py CHANGED
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional
5
  from fastapi import Form, HTTPException
6
 
7
  from helpers.setup import app, rag, logger, embedder, captioner, gemini_rotator, nvidia_rotator
8
- from helpers.models import ChatMessageResponse, ChatHistoryResponse, MessageResponse, ChatAnswerResponse
9
  from utils.service.common import trim_text
10
  from .search import build_web_context
11
  from utils.api.router import select_model, generate_answer_with_model
@@ -91,6 +91,25 @@ async def delete_chat_history(user_id: str, project_id: str):
91
  raise HTTPException(500, detail=f"Failed to clear chat history: {str(e)}")
92
 
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  # ────────────────────────────── RAG Chat and Helpers ──────────────────────────────
95
  async def _generate_query_variations(question: str, nvidia_rotator) -> List[str]:
96
  """
@@ -176,13 +195,21 @@ async def chat(
176
  question: str = Form(...),
177
  k: int = Form(6),
178
  use_web: int = Form(0),
179
- max_web: int = Form(30)
 
180
  ):
181
  import asyncio
 
 
 
 
 
 
182
  try:
183
- return await asyncio.wait_for(_chat_impl(user_id, project_id, question, k, use_web=use_web, max_web=max_web), timeout=120.0)
184
  except asyncio.TimeoutError:
185
  logger.error("[CHAT] Chat request timed out after 120 seconds")
 
186
  return ChatAnswerResponse(
187
  answer="Sorry, the request took too long to process. Please try again with a simpler question.",
188
  sources=[],
@@ -196,13 +223,18 @@ async def _chat_impl(
196
  question: str,
197
  k: int,
198
  use_web: int = 0,
199
- max_web: int = 30
 
200
  ):
201
  import sys
202
  from memo.core import get_memory_system
203
  from utils.api.router import NVIDIA_SMALL # reuse default name
204
  memory = get_memory_system()
205
  logger.info("[CHAT] User Q/chat: %s", trim_text(question, 15).replace("\n", " "))
 
 
 
 
206
 
207
  mentioned = set([m.group(0).strip() for m in re.finditer(r"\b[^\s/\\]+?\.(?:pdf|docx|doc)\b", question, re.IGNORECASE)])
208
  if mentioned:
@@ -307,8 +339,17 @@ async def _chat_impl(
307
  semantic_related = "\n\n".join(top) if top else ""
308
 
309
  logger.info(f"[CHAT] Starting enhanced vector search with relevant_files={relevant_files}")
 
 
 
 
 
310
  enhanced_queries = await _generate_query_variations(question, nvidia_rotator)
311
  logger.info(f"[CHAT] Generated {len(enhanced_queries)} query variations")
 
 
 
 
312
  all_hits = []
313
  search_strategies = ["flat", "hybrid", "local"]
314
  for strategy in search_strategies:
@@ -413,11 +454,16 @@ async def _chat_impl(
413
  web_context_block = ""
414
  web_sources_meta: List[Dict[str, Any]] = []
415
  if use_web:
 
 
 
416
  try:
417
- # Use planner to avoid redundant fetching and improve coverage
418
- web_context_block, web_sources_meta = await plan_and_build_web_context(
419
- question, max_web=max_web, per_query=6, top_k=12, dedup_threshold=0.90
420
- )
 
 
421
  except Exception as e:
422
  logger.warning(f"[CHAT] Web augmentation failed: {e}")
423
 
@@ -447,10 +493,19 @@ async def _chat_impl(
447
  if web_context_block:
448
  composed_context += "\n\nWEB_CONTEXT:\n" + web_context_block
449
 
 
 
 
 
450
  user_prompt = f"QUESTION:\n{question}\n\nCONTEXT:\n{composed_context}"
451
  selection = select_model(question=question, context=composed_context)
452
  logger.info(f"Model selection: {selection}")
453
  logger.info(f"[CHAT] Generating answer with {selection['provider']} {selection['model']}")
 
 
 
 
 
454
  try:
455
  answer = await generate_answer_with_model(
456
  selection=selection,
@@ -491,6 +546,10 @@ async def _chat_impl(
491
  "score": float(s.get("score", 0.0)),
492
  "kind": "web"
493
  })
 
 
 
 
494
  logger.info("LLM answer (trimmed): %s", trim_text(answer, 200).replace("\n", " "))
495
  return ChatAnswerResponse(answer=answer, sources=sources_meta, relevant_files=relevant_files)
496
 
@@ -557,7 +616,8 @@ async def chat_with_search(
557
  project_id: str = Form(...),
558
  question: str = Form(...),
559
  k: int = Form(6),
560
- max_web: int = Form(30)
 
561
  ):
562
  """Answer using local documents and up to 30 web sources, with URL citations."""
563
  from memo.core import get_memory_system
@@ -565,7 +625,7 @@ async def chat_with_search(
565
  logger.info("[CHAT] User Q/chat.search: %s", trim_text(question, 20).replace("\n", " "))
566
 
567
  # 1) Reuse local RAG retrieval
568
- local_resp = await _chat_impl(user_id, project_id, question, k)
569
 
570
  # 2) Web search and fetching via shared utilities
571
  web_context, web_sources_meta = await build_web_context(question, max_web=max_web, top_k=10)
 
5
  from fastapi import Form, HTTPException
6
 
7
  from helpers.setup import app, rag, logger, embedder, captioner, gemini_rotator, nvidia_rotator
8
+ from helpers.models import ChatMessageResponse, ChatHistoryResponse, MessageResponse, ChatAnswerResponse, StatusUpdateResponse
9
  from utils.service.common import trim_text
10
  from .search import build_web_context
11
  from utils.api.router import select_model, generate_answer_with_model
 
91
  raise HTTPException(500, detail=f"Failed to clear chat history: {str(e)}")
92
 
93
 
94
+ # In-memory status tracking for real-time updates
95
+ chat_status_store = {}
96
+
97
+ @app.get("/chat/status/{session_id}", response_model=StatusUpdateResponse)
98
+ async def get_chat_status(session_id: str):
99
+ """Get current status of a chat processing session"""
100
+ status = chat_status_store.get(session_id, {"status": "idle", "message": "Ready", "progress": 0})
101
+ return StatusUpdateResponse(**status)
102
+
103
+
104
+ def update_chat_status(session_id: str, status: str, message: str, progress: int = None):
105
+ """Update chat processing status"""
106
+ chat_status_store[session_id] = {
107
+ "status": status,
108
+ "message": message,
109
+ "progress": progress
110
+ }
111
+
112
+
113
  # ────────────────────────────── RAG Chat and Helpers ──────────────────────────────
114
  async def _generate_query_variations(question: str, nvidia_rotator) -> List[str]:
115
  """
 
195
  question: str = Form(...),
196
  k: int = Form(6),
197
  use_web: int = Form(0),
198
+ max_web: int = Form(30),
199
+ session_id: str = Form(None)
200
  ):
201
  import asyncio
202
+ import uuid
203
+
204
+ # Generate session ID if not provided
205
+ if not session_id:
206
+ session_id = str(uuid.uuid4())
207
+
208
  try:
209
+ return await asyncio.wait_for(_chat_impl(user_id, project_id, question, k, use_web=use_web, max_web=max_web, session_id=session_id), timeout=120.0)
210
  except asyncio.TimeoutError:
211
  logger.error("[CHAT] Chat request timed out after 120 seconds")
212
+ update_chat_status(session_id, "error", "Request timed out", 0)
213
  return ChatAnswerResponse(
214
  answer="Sorry, the request took too long to process. Please try again with a simpler question.",
215
  sources=[],
 
223
  question: str,
224
  k: int,
225
  use_web: int = 0,
226
+ max_web: int = 30,
227
+ session_id: str = None
228
  ):
229
  import sys
230
  from memo.core import get_memory_system
231
  from utils.api.router import NVIDIA_SMALL # reuse default name
232
  memory = get_memory_system()
233
  logger.info("[CHAT] User Q/chat: %s", trim_text(question, 15).replace("\n", " "))
234
+
235
+ # Update status: Receiving request
236
+ if session_id:
237
+ update_chat_status(session_id, "receiving", "Receiving request...", 5)
238
 
239
  mentioned = set([m.group(0).strip() for m in re.finditer(r"\b[^\s/\\]+?\.(?:pdf|docx|doc)\b", question, re.IGNORECASE)])
240
  if mentioned:
 
339
  semantic_related = "\n\n".join(top) if top else ""
340
 
341
  logger.info(f"[CHAT] Starting enhanced vector search with relevant_files={relevant_files}")
342
+
343
+ # Update status: Processing data (LLM generating query variations)
344
+ if session_id:
345
+ update_chat_status(session_id, "processing", "Processing data...", 15)
346
+
347
  enhanced_queries = await _generate_query_variations(question, nvidia_rotator)
348
  logger.info(f"[CHAT] Generated {len(enhanced_queries)} query variations")
349
+
350
+ # Update status: Planning action (planning search strategy)
351
+ if session_id:
352
+ update_chat_status(session_id, "planning", "Planning action...", 25)
353
  all_hits = []
354
  search_strategies = ["flat", "hybrid", "local"]
355
  for strategy in search_strategies:
 
454
  web_context_block = ""
455
  web_sources_meta: List[Dict[str, Any]] = []
456
  if use_web:
457
+ # Update status: Searching information (web search)
458
+ if session_id:
459
+ update_chat_status(session_id, "searching", "Searching information...", 40)
460
  try:
461
+ # Create status callback for web search
462
+ def web_status_callback(status, message, progress):
463
+ if session_id:
464
+ update_chat_status(session_id, status, message, progress)
465
+
466
+ web_context_block, web_sources_meta = await build_web_context(question, max_web=max_web, top_k=10, status_callback=web_status_callback)
467
  except Exception as e:
468
  logger.warning(f"[CHAT] Web augmentation failed: {e}")
469
 
 
493
  if web_context_block:
494
  composed_context += "\n\nWEB_CONTEXT:\n" + web_context_block
495
 
496
+ # Update status: Thinking solution
497
+ if session_id:
498
+ update_chat_status(session_id, "thinking", "Thinking solution...", 60)
499
+
500
  user_prompt = f"QUESTION:\n{question}\n\nCONTEXT:\n{composed_context}"
501
  selection = select_model(question=question, context=composed_context)
502
  logger.info(f"Model selection: {selection}")
503
  logger.info(f"[CHAT] Generating answer with {selection['provider']} {selection['model']}")
504
+
505
+ # Update status: Generating answer
506
+ if session_id:
507
+ update_chat_status(session_id, "generating", "Generating answer...", 80)
508
+
509
  try:
510
  answer = await generate_answer_with_model(
511
  selection=selection,
 
546
  "score": float(s.get("score", 0.0)),
547
  "kind": "web"
548
  })
549
+ # Update status: Complete
550
+ if session_id:
551
+ update_chat_status(session_id, "complete", "Answer ready", 100)
552
+
553
  logger.info("LLM answer (trimmed): %s", trim_text(answer, 200).replace("\n", " "))
554
  return ChatAnswerResponse(answer=answer, sources=sources_meta, relevant_files=relevant_files)
555
 
 
616
  project_id: str = Form(...),
617
  question: str = Form(...),
618
  k: int = Form(6),
619
+ max_web: int = Form(30),
620
+ session_id: str = Form(None)
621
  ):
622
  """Answer using local documents and up to 30 web sources, with URL citations."""
623
  from memo.core import get_memory_system
 
625
  logger.info("[CHAT] User Q/chat.search: %s", trim_text(question, 20).replace("\n", " "))
626
 
627
  # 1) Reuse local RAG retrieval
628
+ local_resp = await _chat_impl(user_id, project_id, question, k, use_web=1, max_web=max_web, session_id=session_id)
629
 
630
  # 2) Web search and fetching via shared utilities
631
  web_context, web_sources_meta = await build_web_context(question, max_web=max_web, top_k=10)
routes/reports.py CHANGED
@@ -6,11 +6,28 @@ from fastapi import Form, HTTPException
6
 
7
  from helpers.setup import app, rag, logger, embedder, gemini_rotator, nvidia_rotator
8
  from .search import build_web_context
9
- from helpers.models import ReportResponse
10
  from utils.service.common import trim_text
11
  from utils.api.router import select_model, generate_answer_with_model
12
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  @app.post("/report", response_model=ReportResponse)
15
  async def generate_report(
16
  user_id: str = Form(...),
@@ -20,9 +37,18 @@ async def generate_report(
20
  report_words: int = Form(1200),
21
  instructions: str = Form(""),
22
  use_web: int = Form(0),
23
- max_web: int = Form(20)
 
24
  ):
 
 
 
 
25
  logger.info("[REPORT] User Q/report: %s", trim_text(instructions, 15).replace("\n", " "))
 
 
 
 
26
  files_list = rag.list_files(user_id=user_id, project_id=project_id)
27
  filenames_ci = {f.get("filename", "").lower(): f.get("filename") for f in files_list}
28
  eff_name = filenames_ci.get(filename.lower(), filename)
@@ -55,91 +81,40 @@ async def generate_report(
55
  web_context_block = ""
56
  web_sources_meta: List[Dict] = []
57
  if use_web:
 
 
 
 
58
  web_context_block, web_sources_meta = await build_web_context(
59
- instructions or query_text, max_web=max_web, top_k=12
60
  )
61
  file_summary = doc_sum.get("summary", "")
62
 
63
- from utils.api.router import GEMINI_MED, GEMINI_PRO
64
- if instructions.strip():
65
- filter_sys = (
66
- "You are an expert content analyst. Given the user's specific instructions and the document content, "
67
- "identify which sections/chunks are MOST relevant to their request. "
68
- "Each chunk is prefixed with [CHUNK_ID: <id>] - use these exact IDs in your response. "
69
- "Return a JSON object with this structure: {\"relevant_chunks\": [\"<chunk_id_1>\", \"<chunk_id_2>\"], \"focus_areas\": [\"key topic 1\", \"key topic 2\"]}"
70
- )
71
- filter_user = f"USER_INSTRUCTIONS: {instructions}\n\nDOCUMENT_SUMMARY: {file_summary}\n\nAVAILABLE_CHUNKS:\n{context_text}\n\nIdentify only the chunks that directly address the user's specific request."
72
- try:
73
- selection_filter = {"provider": "gemini", "model": os.getenv("GEMINI_MED", "gemini-2.5-flash")}
74
- filter_response = await generate_answer_with_model(selection_filter, filter_sys, filter_user, gemini_rotator, nvidia_rotator)
75
- logger.info(f"[REPORT] Raw filter response: {filter_response}")
76
- import json as _json
77
- try:
78
- # Extract JSON from markdown code blocks if present
79
- json_text = filter_response.strip()
80
- if json_text.startswith('```json'):
81
- # Remove markdown code block formatting
82
- json_text = json_text[7:] # Remove ```json
83
- if json_text.endswith('```'):
84
- json_text = json_text[:-3] # Remove ```
85
- json_text = json_text.strip()
86
- elif json_text.startswith('```'):
87
- # Remove generic code block formatting
88
- json_text = json_text[3:] # Remove ```
89
- if json_text.endswith('```'):
90
- json_text = json_text[:-3] # Remove ```
91
- json_text = json_text.strip()
92
-
93
- filter_data = _json.loads(json_text)
94
- relevant_chunk_ids = filter_data.get("relevant_chunks", [])
95
- focus_areas = filter_data.get("focus_areas", [])
96
- logger.info(f"[REPORT] Content filtering identified {len(relevant_chunk_ids)} relevant chunks: {relevant_chunk_ids} and focus areas: {focus_areas}")
97
- if relevant_chunk_ids and hits:
98
- filtered_hits = [h for h in hits if str(h["doc"].get("_id", "")) in relevant_chunk_ids]
99
- if filtered_hits:
100
- hits = filtered_hits
101
- logger.info(f"[REPORT] Filtered context from {len(hits)} chunks to {len(filtered_hits)} relevant chunks")
102
- else:
103
- logger.warning(f"[REPORT] No matching chunks found for IDs: {relevant_chunk_ids}")
104
- else:
105
- logger.warning(f"[REPORT] No relevant chunk IDs returned or no hits available")
106
- except _json.JSONDecodeError as e:
107
- logger.warning(f"[REPORT] Could not parse filter response, using all chunks. JSON error: {e}. Response: {filter_response}")
108
- except Exception as e:
109
- logger.warning(f"[REPORT] Content filtering failed: {e}")
110
-
111
- sys_outline = (
112
- "You are an expert technical writer. Create a focused, hierarchical outline for a report based on the user's specific instructions and the MATERIALS. "
113
- "The outline should directly address what the user asked for. Output as Markdown bullet list only. Keep it within about {} words."
114
- ).format(max(100, outline_words))
115
- instruction_context = f"USER_REQUEST: {instructions}\n\n" if instructions.strip() else ""
116
- user_outline = f"{instruction_context}MATERIALS:\n\n[FILE_SUMMARY from {eff_name}]\n{file_summary}\n\n[DOC_CONTEXT]\n{context_text}\n\n[WEB_CONTEXT]\n{web_context_block}"
117
- try:
118
- selection_outline = {"provider": "gemini", "model": os.getenv("GEMINI_MED", "gemini-2.5-flash")}
119
- outline_md = await generate_answer_with_model(selection_outline, sys_outline, user_outline, gemini_rotator, nvidia_rotator)
120
- except Exception as e:
121
- logger.warning(f"Report outline failed: {e}")
122
- outline_md = "# Report Outline\n\n- Introduction\n- Key Topics\n- Conclusion"
123
-
124
- instruction_focus = f"FOCUS ON: {instructions}\n\n" if instructions.strip() else ""
125
- sys_report = (
126
- "You are an expert report writer. Write a focused, comprehensive Markdown report that directly addresses the user's specific request. "
127
- "Using the OUTLINE and MATERIALS:\n"
128
- "- Structure the report to answer exactly what the user asked for\n"
129
- "- Use clear section headings\n"
130
- "- Keep content factual and grounded in the provided materials\n"
131
- f"- Include brief citations like (source: {eff_name}, topic) - use the actual filename provided\n"
132
- "- If the user asked for a specific section/topic, focus heavily on that\n"
133
- f"- Target length ~{max(600, report_words)} words\n"
134
- "- Ensure the report directly fulfills the user's request"
135
  )
136
- user_report = f"{instruction_focus}OUTLINE:\n{outline_md}\n\nMATERIALS:\n[FILE_SUMMARY from {eff_name}]\n{file_summary}\n\n[DOC_CONTEXT]\n{context_text}\n\n[WEB_CONTEXT]\n{web_context_block}"
137
- try:
138
- selection_report = {"provider": "gemini", "model": os.getenv("GEMINI_PRO", "gemini-2.5-pro")}
139
- report_md = await generate_answer_with_model(selection_report, sys_report, user_report, gemini_rotator, nvidia_rotator)
140
- except Exception as e:
141
- logger.error(f"Report generation failed: {e}")
142
- report_md = outline_md + "\n\n" + file_summary
 
 
143
  # Merge local and web sources
144
  merged_sources = list(sources_meta) + [
145
  {"filename": s.get("url"), "topic_name": s.get("topic_name"), "score": s.get("score"), "kind": "web"}
@@ -177,3 +152,260 @@ async def generate_report_pdf(
177
  raise
178
 
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  from helpers.setup import app, rag, logger, embedder, gemini_rotator, nvidia_rotator
8
  from .search import build_web_context
9
+ from helpers.models import ReportResponse, StatusUpdateResponse
10
  from utils.service.common import trim_text
11
  from utils.api.router import select_model, generate_answer_with_model
12
 
13
 
14
+ # In-memory status tracking for report generation
15
+ report_status_store = {}
16
+
17
+ @app.get("/report/status/{session_id}", response_model=StatusUpdateResponse)
18
+ async def get_report_status(session_id: str):
19
+ """Get current status of a report generation session"""
20
+ status = report_status_store.get(session_id, {"status": "idle", "message": "Ready", "progress": 0})
21
+ return StatusUpdateResponse(**status)
22
+
23
+ def update_report_status(session_id: str, status: str, message: str, progress: int = None):
24
+ """Update report generation status"""
25
+ report_status_store[session_id] = {
26
+ "status": status,
27
+ "message": message,
28
+ "progress": progress
29
+ }
30
+
31
  @app.post("/report", response_model=ReportResponse)
32
  async def generate_report(
33
  user_id: str = Form(...),
 
37
  report_words: int = Form(1200),
38
  instructions: str = Form(""),
39
  use_web: int = Form(0),
40
+ max_web: int = Form(20),
41
+ session_id: str = Form(None)
42
  ):
43
+ import uuid
44
+ if not session_id:
45
+ session_id = str(uuid.uuid4())
46
+
47
  logger.info("[REPORT] User Q/report: %s", trim_text(instructions, 15).replace("\n", " "))
48
+
49
+ # Update status: Receiving request
50
+ update_report_status(session_id, "receiving", "Receiving request...", 5)
51
+
52
  files_list = rag.list_files(user_id=user_id, project_id=project_id)
53
  filenames_ci = {f.get("filename", "").lower(): f.get("filename") for f in files_list}
54
  eff_name = filenames_ci.get(filename.lower(), filename)
 
81
  web_context_block = ""
82
  web_sources_meta: List[Dict] = []
83
  if use_web:
84
+ # Create status callback for web search
85
+ def web_status_callback(status, message, progress):
86
+ update_report_status(session_id, status, message, progress)
87
+
88
  web_context_block, web_sources_meta = await build_web_context(
89
+ instructions or query_text, max_web=max_web, top_k=12, status_callback=web_status_callback
90
  )
91
  file_summary = doc_sum.get("summary", "")
92
 
93
+ # Step 1: Chain of Thought Planning with NVIDIA
94
+ logger.info("[REPORT] Starting CoT planning phase")
95
+ update_report_status(session_id, "planning", "Planning action...", 25)
96
+ cot_plan = await generate_cot_plan(instructions, file_summary, context_text, web_context_block, nvidia_rotator)
97
+
98
+ # Step 2: Execute detailed subtasks based on CoT plan
99
+ logger.info("[REPORT] Executing detailed subtasks")
100
+ update_report_status(session_id, "processing", "Processing data...", 40)
101
+ detailed_analysis = await execute_detailed_subtasks(cot_plan, context_text, web_context_block, eff_name, nvidia_rotator)
102
+
103
+ # Step 3: Synthesize comprehensive report from detailed analysis
104
+ logger.info("[REPORT] Synthesizing comprehensive report")
105
+ update_report_status(session_id, "thinking", "Thinking solution...", 60)
106
+ comprehensive_report = await synthesize_comprehensive_report(
107
+ instructions, cot_plan, detailed_analysis, eff_name, report_words, gemini_rotator, nvidia_rotator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  )
109
+
110
+ # Update status: Generating answer (final report generation)
111
+ update_report_status(session_id, "generating", "Generating answer...", 80)
112
+
113
+ # Update status: Complete
114
+ update_report_status(session_id, "complete", "Report ready", 100)
115
+
116
+ # Use the comprehensive report from CoT approach
117
+ report_md = comprehensive_report
118
  # Merge local and web sources
119
  merged_sources = list(sources_meta) + [
120
  {"filename": s.get("url"), "topic_name": s.get("topic_name"), "score": s.get("score"), "kind": "web"}
 
152
  raise
153
 
154
 
155
+ # ────────────────────────────── Chain of Thought Report Generation ──────────────────
156
+
157
+ async def generate_cot_plan(instructions: str, file_summary: str, context_text: str, web_context: str, nvidia_rotator) -> Dict[str, Any]:
158
+ """Generate a detailed Chain of Thought plan for report generation using NVIDIA."""
159
+ sys_prompt = """You are an expert research analyst and report planner. Given a user's request and available materials, create a comprehensive plan for generating a detailed report.
160
+
161
+ Your task is to:
162
+ 1. Analyze the user's request and identify key requirements
163
+ 2. Break down the report into logical sections and subtasks
164
+ 3. Identify what specific information needs to be extracted from each source
165
+ 4. Plan the reasoning flow and argument structure
166
+ 5. Determine the depth and rigor needed for each section
167
+
168
+ Return a JSON object with this structure:
169
+ {
170
+ "analysis": {
171
+ "user_intent": "What the user really wants to know",
172
+ "key_requirements": ["requirement1", "requirement2"],
173
+ "complexity_level": "basic|intermediate|advanced",
174
+ "focus_areas": ["area1", "area2", "area3"]
175
+ },
176
+ "report_structure": {
177
+ "sections": [
178
+ {
179
+ "title": "Section Title",
180
+ "purpose": "Why this section is needed",
181
+ "subtasks": [
182
+ {
183
+ "task": "Specific task description",
184
+ "reasoning": "Why this task is important",
185
+ "sources_needed": ["local", "web", "both"],
186
+ "depth": "surface|detailed|comprehensive"
187
+ }
188
+ ]
189
+ }
190
+ ]
191
+ },
192
+ "reasoning_flow": [
193
+ "Step 1: Start with...",
194
+ "Step 2: Then analyze...",
195
+ "Step 3: Finally synthesize..."
196
+ ]
197
+ }"""
198
+
199
+ user_prompt = f"""USER REQUEST: {instructions}
200
+
201
+ AVAILABLE MATERIALS:
202
+ FILE SUMMARY: {file_summary}
203
+
204
+ DOCUMENT CONTEXT: {context_text[:2000]}...
205
+
206
+ WEB CONTEXT: {web_context[:2000]}...
207
+
208
+ Create a detailed plan for generating a comprehensive report that addresses the user's request."""
209
+
210
+ try:
211
+ selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
212
+ response = await generate_answer_with_model(selection, sys_prompt, user_prompt, None, nvidia_rotator)
213
+
214
+ # Parse JSON response
215
+ import json
216
+ json_text = response.strip()
217
+ if json_text.startswith('```json'):
218
+ json_text = json_text[7:-3].strip()
219
+ elif json_text.startswith('```'):
220
+ json_text = json_text[3:-3].strip()
221
+
222
+ plan = json.loads(json_text)
223
+ logger.info(f"[REPORT] CoT plan generated with {len(plan.get('report_structure', {}).get('sections', []))} sections")
224
+ return plan
225
+
226
+ except Exception as e:
227
+ logger.warning(f"[REPORT] CoT planning failed: {e}")
228
+ # Fallback plan
229
+ return {
230
+ "analysis": {
231
+ "user_intent": instructions,
232
+ "key_requirements": ["comprehensive analysis"],
233
+ "complexity_level": "intermediate",
234
+ "focus_areas": ["main topics"]
235
+ },
236
+ "report_structure": {
237
+ "sections": [
238
+ {
239
+ "title": "Introduction",
240
+ "purpose": "Provide overview and context",
241
+ "subtasks": [{"task": "Summarize key points", "reasoning": "Set foundation", "sources_needed": ["local"], "depth": "detailed"}]
242
+ },
243
+ {
244
+ "title": "Main Analysis",
245
+ "purpose": "Address user's specific request",
246
+ "subtasks": [{"task": "Detailed analysis", "reasoning": "Core content", "sources_needed": ["both"], "depth": "comprehensive"}]
247
+ },
248
+ {
249
+ "title": "Conclusion",
250
+ "purpose": "Synthesize findings",
251
+ "subtasks": [{"task": "Summarize key insights", "reasoning": "Provide closure", "sources_needed": ["local"], "depth": "detailed"}]
252
+ }
253
+ ]
254
+ },
255
+ "reasoning_flow": ["Analyze materials", "Extract key insights", "Synthesize findings"]
256
+ }
257
+
258
+
259
+ async def execute_detailed_subtasks(cot_plan: Dict[str, Any], context_text: str, web_context: str, filename: str, nvidia_rotator) -> Dict[str, Any]:
260
+ """Execute detailed analysis for each subtask identified in the CoT plan."""
261
+ detailed_analysis = {}
262
+
263
+ for section in cot_plan.get("report_structure", {}).get("sections", []):
264
+ section_title = section.get("title", "Unknown Section")
265
+ section_analysis = {
266
+ "title": section_title,
267
+ "purpose": section.get("purpose", ""),
268
+ "subtask_results": []
269
+ }
270
+
271
+ for subtask in section.get("subtasks", []):
272
+ task = subtask.get("task", "")
273
+ reasoning = subtask.get("reasoning", "")
274
+ sources_needed = subtask.get("sources_needed", ["local"])
275
+ depth = subtask.get("depth", "detailed")
276
+
277
+ # Generate detailed analysis for this subtask
278
+ subtask_result = await analyze_subtask(
279
+ task, reasoning, sources_needed, depth, context_text, web_context, filename, nvidia_rotator
280
+ )
281
+
282
+ section_analysis["subtask_results"].append({
283
+ "task": task,
284
+ "reasoning": reasoning,
285
+ "depth": depth,
286
+ "analysis": subtask_result
287
+ })
288
+
289
+ detailed_analysis[section_title] = section_analysis
290
+
291
+ logger.info(f"[REPORT] Completed detailed analysis for {len(detailed_analysis)} sections")
292
+ return detailed_analysis
293
+
294
+
295
+ async def analyze_subtask(task: str, reasoning: str, sources_needed: List[str], depth: str,
296
+ context_text: str, web_context: str, filename: str, nvidia_rotator) -> str:
297
+ """Analyze a specific subtask with appropriate depth and source selection."""
298
+
299
+ # Select appropriate context based on sources_needed
300
+ selected_context = ""
301
+ if "local" in sources_needed and "web" in sources_needed:
302
+ selected_context = f"DOCUMENT CONTEXT:\n{context_text}\n\nWEB CONTEXT:\n{web_context}"
303
+ elif "local" in sources_needed:
304
+ selected_context = f"DOCUMENT CONTEXT:\n{context_text}"
305
+ elif "web" in sources_needed:
306
+ selected_context = f"WEB CONTEXT:\n{web_context}"
307
+
308
+ # Adjust prompt based on depth requirement
309
+ depth_instructions = {
310
+ "surface": "Provide a brief, high-level analysis",
311
+ "detailed": "Provide a thorough, well-reasoned analysis with specific examples",
312
+ "comprehensive": "Provide an exhaustive, rigorous analysis with deep insights and multiple perspectives"
313
+ }
314
+
315
+ sys_prompt = f"""You are an expert analyst performing detailed research. Your task is to {task}.
316
+
317
+ REASONING: {reasoning}
318
+
319
+ DEPTH REQUIREMENT: {depth_instructions.get(depth, "Provide detailed analysis")}
320
+
321
+ Focus on:
322
+ - Extracting specific, relevant information
323
+ - Providing clear explanations and insights
324
+ - Supporting claims with evidence from the materials
325
+ - Maintaining analytical rigor and objectivity
326
+ - Being comprehensive yet concise
327
+
328
+ Return only the analysis, no meta-commentary."""
329
+
330
+ user_prompt = f"""TASK: {task}
331
+
332
+ MATERIALS:
333
+ {selected_context}
334
+
335
+ Perform the analysis as specified."""
336
+
337
+ try:
338
+ selection = {"provider": "nvidia", "model": "meta/llama-3.1-8b-instruct"}
339
+ analysis = await generate_answer_with_model(selection, sys_prompt, user_prompt, None, nvidia_rotator)
340
+ return analysis.strip()
341
+
342
+ except Exception as e:
343
+ logger.warning(f"[REPORT] Subtask analysis failed for '{task}': {e}")
344
+ return f"Analysis for '{task}' could not be completed due to processing error."
345
+
346
+
347
+ async def synthesize_comprehensive_report(instructions: str, cot_plan: Dict[str, Any],
348
+ detailed_analysis: Dict[str, Any], filename: str,
349
+ report_words: int, gemini_rotator, nvidia_rotator) -> str:
350
+ """Synthesize the detailed analysis into a comprehensive, well-structured report."""
351
+
352
+ # Prepare synthesis materials
353
+ analysis_summary = ""
354
+ for section_title, section_data in detailed_analysis.items():
355
+ analysis_summary += f"\n## {section_title}\n"
356
+ analysis_summary += f"Purpose: {section_data.get('purpose', '')}\n\n"
357
+
358
+ for subtask_result in section_data.get("subtask_results", []):
359
+ analysis_summary += f"### {subtask_result.get('task', '')}\n"
360
+ analysis_summary += f"{subtask_result.get('analysis', '')}\n\n"
361
+
362
+ reasoning_flow = cot_plan.get("reasoning_flow", [])
363
+ flow_text = "\n".join([f"{i+1}. {step}" for i, step in enumerate(reasoning_flow)])
364
+
365
+ sys_prompt = f"""You are an expert report writer synthesizing detailed analysis into a comprehensive report.
366
+
367
+ Your task is to create a well-structured, professional report that:
368
+ 1. Follows the planned reasoning flow: {flow_text}
369
+ 2. Integrates all detailed analyses seamlessly
370
+ 3. Maintains logical flow and coherence
371
+ 4. Provides clear, actionable insights
372
+ 5. Uses proper academic/professional formatting
373
+ 6. Targets approximately {report_words} words
374
+
375
+ Structure the report with:
376
+ - Clear section headings
377
+ - Logical progression of ideas
378
+ - Smooth transitions between sections
379
+ - Proper citations and references
380
+ - Executive summary or key takeaways
381
+ - Conclusion with actionable insights
382
+
383
+ Write in a professional, analytical tone suitable for business or academic contexts."""
384
+
385
+ user_prompt = f"""USER REQUEST: {instructions}
386
+
387
+ DETAILED ANALYSIS TO SYNTHESIZE:
388
+ {analysis_summary}
389
+
390
+ REASONING FLOW TO FOLLOW:
391
+ {flow_text}
392
+
393
+ Create a comprehensive report that addresses the user's request by synthesizing all the detailed analysis above."""
394
+
395
+ try:
396
+ # Use Gemini Pro for final synthesis (better for long-form content)
397
+ selection = {"provider": "gemini", "model": "gemini-2.5-pro"}
398
+ report = await generate_answer_with_model(selection, sys_prompt, user_prompt, gemini_rotator, nvidia_rotator)
399
+
400
+ logger.info(f"[REPORT] Comprehensive report synthesized, length: {len(report)} characters")
401
+ return report
402
+
403
+ except Exception as e:
404
+ logger.error(f"[REPORT] Report synthesis failed: {e}")
405
+ # Fallback: simple concatenation
406
+ fallback_report = f"# Report: {instructions}\n\n"
407
+ fallback_report += analysis_summary
408
+ fallback_report += f"\n\n## Conclusion\n\nThis report addresses: {instructions}"
409
+ return fallback_report
410
+
411
+
routes/search.py CHANGED
@@ -598,7 +598,7 @@ async def calculate_comprehensive_score(content: str, user_query: str, url: str,
598
  return max(0.0, min(1.0, comprehensive_score))
599
 
600
 
601
- async def build_web_context(question: str, max_web: int = 30, top_k: int = 10) -> Tuple[str, List[Dict[str, Any]]]:
602
  """
603
  Intelligent web search and content processing:
604
  1. Extract intelligent search keywords
@@ -609,6 +609,8 @@ async def build_web_context(question: str, max_web: int = 30, top_k: int = 10) -
609
  t0 = time.perf_counter()
610
 
611
  # Step 1: Extract intelligent search keywords
 
 
612
  keywords = await extract_search_keywords(question, nvidia_rotator)
613
  logger.info(f"[SEARCH] Extracted keywords: {keywords}")
614
 
@@ -623,6 +625,8 @@ async def build_web_context(question: str, max_web: int = 30, top_k: int = 10) -
623
  return "", []
624
 
625
  # Step 3: Process each source with NVIDIA agent
 
 
626
  processing_tasks = []
627
  for result in search_results:
628
  task = fetch_and_process_content(result["url"], result["title"], question, nvidia_rotator)
 
598
  return max(0.0, min(1.0, comprehensive_score))
599
 
600
 
601
+ async def build_web_context(question: str, max_web: int = 30, top_k: int = 10, status_callback=None) -> Tuple[str, List[Dict[str, Any]]]:
602
  """
603
  Intelligent web search and content processing:
604
  1. Extract intelligent search keywords
 
609
  t0 = time.perf_counter()
610
 
611
  # Step 1: Extract intelligent search keywords
612
+ if status_callback:
613
+ status_callback("searching", "Searching information...", 45)
614
  keywords = await extract_search_keywords(question, nvidia_rotator)
615
  logger.info(f"[SEARCH] Extracted keywords: {keywords}")
616
 
 
625
  return "", []
626
 
627
  # Step 3: Process each source with NVIDIA agent
628
+ if status_callback:
629
+ status_callback("processing", "Processing data...", 50)
630
  processing_tasks = []
631
  for result in search_results:
632
  task = fetch_and_process_content(result["url"], result["title"], question, nvidia_rotator)
static/script.js CHANGED
@@ -499,13 +499,19 @@
499
  // Save user message to chat history
500
  await saveChatMessage(user.user_id, currentProject.project_id, 'user', question);
501
 
502
- // Add thinking message
503
- const thinkingMsg = appendMessage('thinking', 'Thinking...');
 
 
 
504
 
505
  // Disable input during processing
506
  questionInput.disabled = true;
507
  sendBtn.disabled = true;
508
  showButtonLoading(sendBtn, true);
 
 
 
509
 
510
  try {
511
  // Branch: if report mode is active → call /report with textarea as instructions
@@ -519,6 +525,7 @@
519
  form.append('outline_words', '200');
520
  form.append('report_words', '1200');
521
  form.append('instructions', question);
 
522
  // If Search is toggled on, enable web augmentation for report
523
  const useWeb = searchLink && searchLink.classList.contains('active');
524
  if (useWeb) {
@@ -542,6 +549,7 @@
542
  formData.append('project_id', currentProject.project_id);
543
  formData.append('question', question);
544
  formData.append('k', '6');
 
545
  // If Search is toggled on, enable web augmentation
546
  const useWeb = searchLink && searchLink.classList.contains('active');
547
  if (useWeb) {
@@ -573,6 +581,10 @@
573
  appendMessage('assistant', errorMsg);
574
  await saveChatMessage(user.user_id, currentProject.project_id, 'assistant', errorMsg);
575
  } finally {
 
 
 
 
576
  // Re-enable input
577
  questionInput.disabled = false;
578
  sendBtn.disabled = false;
@@ -1072,4 +1084,90 @@
1072
  }, { threshold: 0.1 });
1073
 
1074
  document.querySelectorAll('.reveal').forEach(el => observer.observe(el));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1075
  })();
 
499
  // Save user message to chat history
500
  await saveChatMessage(user.user_id, currentProject.project_id, 'user', question);
501
 
502
+ // Generate session ID for status tracking
503
+ const sessionId = 'chat_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
504
+
505
+ // Add thinking message with dynamic status
506
+ const thinkingMsg = appendMessage('thinking', 'Receiving request...');
507
 
508
  // Disable input during processing
509
  questionInput.disabled = true;
510
  sendBtn.disabled = true;
511
  showButtonLoading(sendBtn, true);
512
+
513
+ // Start status polling
514
+ const statusInterval = startStatusPolling(sessionId, thinkingMsg);
515
 
516
  try {
517
  // Branch: if report mode is active → call /report with textarea as instructions
 
525
  form.append('outline_words', '200');
526
  form.append('report_words', '1200');
527
  form.append('instructions', question);
528
+ form.append('session_id', sessionId);
529
  // If Search is toggled on, enable web augmentation for report
530
  const useWeb = searchLink && searchLink.classList.contains('active');
531
  if (useWeb) {
 
549
  formData.append('project_id', currentProject.project_id);
550
  formData.append('question', question);
551
  formData.append('k', '6');
552
+ formData.append('session_id', sessionId);
553
  // If Search is toggled on, enable web augmentation
554
  const useWeb = searchLink && searchLink.classList.contains('active');
555
  if (useWeb) {
 
581
  appendMessage('assistant', errorMsg);
582
  await saveChatMessage(user.user_id, currentProject.project_id, 'assistant', errorMsg);
583
  } finally {
584
+ // Stop status polling
585
+ if (statusInterval) {
586
+ clearInterval(statusInterval);
587
+ }
588
  // Re-enable input
589
  questionInput.disabled = false;
590
  sendBtn.disabled = false;
 
1084
  }, { threshold: 0.1 });
1085
 
1086
  document.querySelectorAll('.reveal').forEach(el => observer.observe(el));
1087
+
1088
+ // Status polling function for real-time updates
1089
+ function startStatusPolling(sessionId, thinkingMsg) {
1090
+ const isReportMode = isReportModeActive();
1091
+ const statusEndpoint = isReportMode ? `/report/status/${sessionId}` : `/chat/status/${sessionId}`;
1092
+
1093
+ const interval = setInterval(async () => {
1094
+ try {
1095
+ const response = await fetch(statusEndpoint);
1096
+ if (response.ok) {
1097
+ const status = await response.json();
1098
+ updateThinkingMessage(thinkingMsg, status.message, status.progress);
1099
+
1100
+ // Stop polling when complete or error
1101
+ if (status.status === 'complete' || status.status === 'error') {
1102
+ clearInterval(interval);
1103
+ }
1104
+ }
1105
+ } catch (error) {
1106
+ console.warn('Status polling failed:', error);
1107
+ }
1108
+ }, 500); // Poll every 500ms
1109
+
1110
+ return interval;
1111
+ }
1112
+
1113
+ function updateThinkingMessage(thinkingMsg, message, progress) {
1114
+ if (thinkingMsg && thinkingMsg.querySelector) {
1115
+ const progressBar = thinkingMsg.querySelector('.progress-bar');
1116
+ const statusText = thinkingMsg.querySelector('.status-text');
1117
+
1118
+ if (statusText) {
1119
+ statusText.textContent = message;
1120
+ }
1121
+
1122
+ if (progressBar && progress !== undefined) {
1123
+ progressBar.style.width = `${progress}%`;
1124
+ }
1125
+ }
1126
+ }
1127
+
1128
+ // Enhanced thinking message with progress bar
1129
+ function appendMessage(role, text, isReport = false) {
1130
+ const messageDiv = document.createElement('div');
1131
+ messageDiv.className = `msg ${role}`;
1132
+
1133
+ if (role === 'thinking') {
1134
+ messageDiv.innerHTML = `
1135
+ <div class="thinking-container">
1136
+ <div class="status-text">${text}</div>
1137
+ <div class="progress-container">
1138
+ <div class="progress-bar" style="width: 0%"></div>
1139
+ </div>
1140
+ </div>
1141
+ `;
1142
+ } else if (role === 'assistant') {
1143
+ // Render Markdown for assistant messages
1144
+ try {
1145
+ // Use marked library to convert Markdown to HTML
1146
+ const htmlContent = marked.parse(text);
1147
+ messageDiv.innerHTML = htmlContent;
1148
+
1149
+ // Add copy buttons to code blocks
1150
+ addCopyButtonsToCodeBlocks(messageDiv);
1151
+
1152
+ // Add download PDF button for reports
1153
+ if (isReport) {
1154
+ addDownloadPdfButton(messageDiv, text);
1155
+ }
1156
+ } catch (e) {
1157
+ // Fallback to plain text if Markdown parsing fails
1158
+ messageDiv.textContent = text;
1159
+ }
1160
+ } else {
1161
+ messageDiv.textContent = text;
1162
+ }
1163
+
1164
+ messages.appendChild(messageDiv);
1165
+
1166
+ // Scroll to bottom
1167
+ requestAnimationFrame(() => {
1168
+ messageDiv.scrollIntoView({ behavior: 'smooth', block: 'end' });
1169
+ });
1170
+
1171
+ return messageDiv;
1172
+ }
1173
  })();
static/styles.css CHANGED
@@ -766,6 +766,35 @@
766
  font-style: italic;
767
  }
768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
769
  /* Markdown content styling */
770
  .msg.assistant h1,
771
  .msg.assistant h2,
 
766
  font-style: italic;
767
  }
768
 
769
+ /* ────────────────────────────── Thinking Container Styles ────────────────────────────── */
770
+ .thinking-container {
771
+ display: flex;
772
+ flex-direction: column;
773
+ gap: 0.75rem;
774
+ }
775
+
776
+ .status-text {
777
+ font-style: italic;
778
+ color: var(--text-secondary);
779
+ font-size: 0.95rem;
780
+ }
781
+
782
+ .progress-container {
783
+ width: 100%;
784
+ height: 4px;
785
+ background: var(--border);
786
+ border-radius: 2px;
787
+ overflow: hidden;
788
+ }
789
+
790
+ .progress-bar {
791
+ height: 100%;
792
+ background: var(--gradient-accent);
793
+ border-radius: 2px;
794
+ transition: width 0.3s ease;
795
+ width: 0%;
796
+ }
797
+
798
  /* Markdown content styling */
799
  .msg.assistant h1,
800
  .msg.assistant h2,