LiamKhoaLe commited on
Commit
00e332b
·
1 Parent(s): e2a914f

Upd frontend handler

Browse files
__pycache__/app.cpython-311.pyc ADDED
Binary file (67.5 kB). View file
 
app.py CHANGED
@@ -635,11 +635,45 @@ async def generate_report(
635
 
636
  # Chain-of-thought style two-step with Gemini
637
  from utils.router import GEMINI_MED, GEMINI_PRO
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
638
  sys_outline = (
639
- "You are an expert technical writer. Create a concise, hierarchical outline for a report based on the MATERIALS. "
640
- "Output as Markdown bullet list only. Keep it within about {} words."
641
  ).format(max(100, outline_words))
642
- user_outline = f"MATERIALS:\n\n[FILE_SUMMARY]\n{file_summary}\n\n[DOC_CONTEXT]\n{context_text}\n\nINSTRUCTIONS (if any):\n{instructions}"
 
 
643
 
644
  try:
645
  # Step 1: Outline with Flash/Med
@@ -649,12 +683,20 @@ async def generate_report(
649
  logger.warning(f"Report outline failed: {e}")
650
  outline_md = "# Report Outline\n\n- Introduction\n- Key Topics\n- Conclusion"
651
 
 
 
652
  sys_report = (
653
- "You are an expert report writer. Using the OUTLINE and MATERIALS, write a comprehensive Markdown report. "
654
- "- Use section headings\n- Keep it factual and grounded in the materials\n- Include brief citations like (source: filename, topic). "
655
- f"Target length ~{max(600, report_words)} words."
 
 
 
 
 
 
656
  )
657
- user_report = f"OUTLINE:\n{outline_md}\n\nMATERIALS:\n[FILE_SUMMARY]\n{file_summary}\n\n[DOC_CONTEXT]\n{context_text}\n\nINSTRUCTIONS (if any):\n{instructions}"
658
 
659
  try:
660
  selection_report = {"provider": "gemini", "model": os.getenv("GEMINI_PRO", "gemini-2.5-pro")}
@@ -672,6 +714,24 @@ async def chat(
672
  project_id: str = Form(...),
673
  question: str = Form(...),
674
  k: int = Form(6)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
675
  ):
676
  """
677
  RAG chat that answers ONLY from uploaded materials.
@@ -737,9 +797,13 @@ async def chat(
737
  logger.info(f"[CHAT] Normalized mentions to stored filenames: {mentioned_normalized}")
738
 
739
  # 1b) Ask NVIDIA to mark relevance per file
740
- relevant_map = await files_relevance(question, files_list, nvidia_rotator)
741
- relevant_files = [fn for fn, ok in relevant_map.items() if ok]
742
- logger.info(f"[CHAT] NVIDIA relevant files: {relevant_files}")
 
 
 
 
743
 
744
  # 1c) Ensure any explicitly mentioned files in the question are included
745
  # This safeguards against model misclassification
@@ -778,6 +842,7 @@ async def chat(
778
  recent_related = ""
779
 
780
  # 3) RAG vector search (restricted to relevant files if any)
 
781
  q_vec = embedder.embed([question])[0]
782
  hits = rag.vector_search(
783
  user_id=user_id,
@@ -786,6 +851,7 @@ async def chat(
786
  k=k,
787
  filenames=relevant_files if relevant_files else None
788
  )
 
789
  if not hits:
790
  logger.info(f"[CHAT] No hits with relevance filter. relevant_files={relevant_files}")
791
  # Retry 1: if we have explicit mentions, try restricting only to them
@@ -840,6 +906,7 @@ async def chat(
840
  sources=[],
841
  relevant_files=relevant_files or mentioned_normalized
842
  )
 
843
  # Compose context
844
  contexts = []
845
  sources_meta = []
@@ -887,6 +954,7 @@ async def chat(
887
  selection = select_model(question=question, context=composed_context)
888
  logger.info(f"Model selection: {selection}")
889
  # Generate answer with model
 
890
  try:
891
  answer = await generate_answer_with_model(
892
  selection=selection,
@@ -895,6 +963,7 @@ async def chat(
895
  gemini_rotator=gemini_rotator,
896
  nvidia_rotator=nvidia_rotator
897
  )
 
898
  except Exception as e:
899
  logger.error(f"LLM error: {e}")
900
  answer = "I had trouble contacting the language model provider just now. Please try again."
 
635
 
636
  # Chain-of-thought style two-step with Gemini
637
  from utils.router import GEMINI_MED, GEMINI_PRO
638
+
639
+ # Step 1: Content filtering and relevance assessment based on user instructions
640
+ if instructions.strip():
641
+ filter_sys = (
642
+ "You are an expert content analyst. Given the user's specific instructions and the document content, "
643
+ "identify which sections/chunks are MOST relevant to their request. "
644
+ "Return a JSON object with this structure: {\"relevant_chunks\": [\"chunk_id_1\", \"chunk_id_2\"], \"focus_areas\": [\"key topic 1\", \"key topic 2\"]}"
645
+ )
646
+ filter_user = f"USER_INSTRUCTIONS: {instructions}\n\nDOCUMENT_SUMMARY: {file_summary}\n\nAVAILABLE_CHUNKS:\n{context_text}\n\nIdentify only the chunks that directly address the user's specific request."
647
+
648
+ try:
649
+ selection_filter = {"provider": "gemini", "model": os.getenv("GEMINI_MED", "gemini-2.5-flash")}
650
+ filter_response = await generate_answer_with_model(selection_filter, filter_sys, filter_user, gemini_rotator, nvidia_rotator)
651
+ # Try to parse the filter response to get relevant chunks
652
+ import json
653
+ try:
654
+ filter_data = json.loads(filter_response)
655
+ relevant_chunk_ids = filter_data.get("relevant_chunks", [])
656
+ focus_areas = filter_data.get("focus_areas", [])
657
+ logger.info(f"[REPORT] Content filtering identified {len(relevant_chunk_ids)} relevant chunks and focus areas: {focus_areas}")
658
+ # Filter context to only relevant chunks
659
+ if relevant_chunk_ids and hits:
660
+ filtered_hits = [h for h in hits if str(h["doc"].get("_id", "")) in relevant_chunk_ids]
661
+ if filtered_hits:
662
+ hits = filtered_hits
663
+ logger.info(f"[REPORT] Filtered context from {len(hits)} chunks to {len(filtered_hits)} relevant chunks")
664
+ except json.JSONDecodeError:
665
+ logger.warning(f"[REPORT] Could not parse filter response, using all chunks: {filter_response}")
666
+ except Exception as e:
667
+ logger.warning(f"[REPORT] Content filtering failed: {e}")
668
+
669
+ # Step 2: Create focused outline based on user instructions
670
  sys_outline = (
671
+ "You are an expert technical writer. Create a focused, hierarchical outline for a report based on the user's specific instructions and the MATERIALS. "
672
+ "The outline should directly address what the user asked for. Output as Markdown bullet list only. Keep it within about {} words."
673
  ).format(max(100, outline_words))
674
+
675
+ instruction_context = f"USER_REQUEST: {instructions}\n\n" if instructions.strip() else ""
676
+ user_outline = f"{instruction_context}MATERIALS:\n\n[FILE_SUMMARY]\n{file_summary}\n\n[DOC_CONTEXT]\n{context_text}"
677
 
678
  try:
679
  # Step 1: Outline with Flash/Med
 
683
  logger.warning(f"Report outline failed: {e}")
684
  outline_md = "# Report Outline\n\n- Introduction\n- Key Topics\n- Conclusion"
685
 
686
+ # Step 3: Generate focused report based on user instructions and filtered content
687
+ instruction_focus = f"FOCUS ON: {instructions}\n\n" if instructions.strip() else ""
688
  sys_report = (
689
+ "You are an expert report writer. Write a focused, comprehensive Markdown report that directly addresses the user's specific request. "
690
+ "Using the OUTLINE and MATERIALS:\n"
691
+ "- Structure the report to answer exactly what the user asked for\n"
692
+ "- Use clear section headings\n"
693
+ "- Keep content factual and grounded in the provided materials\n"
694
+ "- Include brief citations like (source: filename, topic)\n"
695
+ "- If the user asked for a specific section/topic, focus heavily on that\n"
696
+ f"- Target length ~{max(600, report_words)} words\n"
697
+ "- Ensure the report directly fulfills the user's request"
698
  )
699
+ user_report = f"{instruction_focus}OUTLINE:\n{outline_md}\n\nMATERIALS:\n[FILE_SUMMARY]\n{file_summary}\n\n[DOC_CONTEXT]\n{context_text}"
700
 
701
  try:
702
  selection_report = {"provider": "gemini", "model": os.getenv("GEMINI_PRO", "gemini-2.5-pro")}
 
714
  project_id: str = Form(...),
715
  question: str = Form(...),
716
  k: int = Form(6)
717
+ ):
718
+ # Add timeout protection to prevent hanging
719
+ import asyncio
720
+ try:
721
+ return await asyncio.wait_for(_chat_impl(user_id, project_id, question, k), timeout=120.0)
722
+ except asyncio.TimeoutError:
723
+ logger.error("[CHAT] Chat request timed out after 120 seconds")
724
+ return ChatAnswerResponse(
725
+ answer="Sorry, the request took too long to process. Please try again with a simpler question.",
726
+ sources=[],
727
+ relevant_files=[]
728
+ )
729
+
730
+ async def _chat_impl(
731
+ user_id: str,
732
+ project_id: str,
733
+ question: str,
734
+ k: int
735
  ):
736
  """
737
  RAG chat that answers ONLY from uploaded materials.
 
797
  logger.info(f"[CHAT] Normalized mentions to stored filenames: {mentioned_normalized}")
798
 
799
  # 1b) Ask NVIDIA to mark relevance per file
800
+ try:
801
+ relevant_map = await files_relevance(question, files_list, nvidia_rotator)
802
+ relevant_files = [fn for fn, ok in relevant_map.items() if ok]
803
+ logger.info(f"[CHAT] NVIDIA relevant files: {relevant_files}")
804
+ except Exception as e:
805
+ logger.warning(f"[CHAT] NVIDIA relevance failed, defaulting to all files: {e}")
806
+ relevant_files = [f.get("filename") for f in files_list if f.get("filename")]
807
 
808
  # 1c) Ensure any explicitly mentioned files in the question are included
809
  # This safeguards against model misclassification
 
842
  recent_related = ""
843
 
844
  # 3) RAG vector search (restricted to relevant files if any)
845
+ logger.info(f"[CHAT] Starting vector search with relevant_files={relevant_files}")
846
  q_vec = embedder.embed([question])[0]
847
  hits = rag.vector_search(
848
  user_id=user_id,
 
851
  k=k,
852
  filenames=relevant_files if relevant_files else None
853
  )
854
+ logger.info(f"[CHAT] Vector search returned {len(hits) if hits else 0} hits")
855
  if not hits:
856
  logger.info(f"[CHAT] No hits with relevance filter. relevant_files={relevant_files}")
857
  # Retry 1: if we have explicit mentions, try restricting only to them
 
906
  sources=[],
907
  relevant_files=relevant_files or mentioned_normalized
908
  )
909
+ # If we get here, we have hits, so continue with normal flow
910
  # Compose context
911
  contexts = []
912
  sources_meta = []
 
954
  selection = select_model(question=question, context=composed_context)
955
  logger.info(f"Model selection: {selection}")
956
  # Generate answer with model
957
+ logger.info(f"[CHAT] Generating answer with {selection['provider']} {selection['model']}")
958
  try:
959
  answer = await generate_answer_with_model(
960
  selection=selection,
 
963
  gemini_rotator=gemini_rotator,
964
  nvidia_rotator=nvidia_rotator
965
  )
966
+ logger.info(f"[CHAT] Answer generated successfully, length: {len(answer)}")
967
  except Exception as e:
968
  logger.error(f"LLM error: {e}")
969
  answer = "I had trouble contacting the language model provider just now. Please try again."
utils/__pycache__/logger.cpython-311.pyc ADDED
Binary file (2.38 kB). View file
 
utils/__pycache__/parser.cpython-311.pyc ADDED
Binary file (4.23 kB). View file
 
utils/__pycache__/rotator.cpython-311.pyc ADDED
Binary file (4.7 kB). View file