Spaces:
Sleeping
Sleeping
Commit
·
00e332b
1
Parent(s):
e2a914f
Upd frontend handler
Browse files
__pycache__/app.cpython-311.pyc
ADDED
|
Binary file (67.5 kB). View file
|
|
|
app.py
CHANGED
|
@@ -635,11 +635,45 @@ async def generate_report(
|
|
| 635 |
|
| 636 |
# Chain-of-thought style two-step with Gemini
|
| 637 |
from utils.router import GEMINI_MED, GEMINI_PRO
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 638 |
sys_outline = (
|
| 639 |
-
"You are an expert technical writer. Create a
|
| 640 |
-
"Output as Markdown bullet list only. Keep it within about {} words."
|
| 641 |
).format(max(100, outline_words))
|
| 642 |
-
|
|
|
|
|
|
|
| 643 |
|
| 644 |
try:
|
| 645 |
# Step 1: Outline with Flash/Med
|
|
@@ -649,12 +683,20 @@ async def generate_report(
|
|
| 649 |
logger.warning(f"Report outline failed: {e}")
|
| 650 |
outline_md = "# Report Outline\n\n- Introduction\n- Key Topics\n- Conclusion"
|
| 651 |
|
|
|
|
|
|
|
| 652 |
sys_report = (
|
| 653 |
-
"You are an expert report writer.
|
| 654 |
-
"
|
| 655 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 656 |
)
|
| 657 |
-
user_report = f"OUTLINE:\n{outline_md}\n\nMATERIALS:\n[FILE_SUMMARY]\n{file_summary}\n\n[DOC_CONTEXT]\n{context_text}
|
| 658 |
|
| 659 |
try:
|
| 660 |
selection_report = {"provider": "gemini", "model": os.getenv("GEMINI_PRO", "gemini-2.5-pro")}
|
|
@@ -672,6 +714,24 @@ async def chat(
|
|
| 672 |
project_id: str = Form(...),
|
| 673 |
question: str = Form(...),
|
| 674 |
k: int = Form(6)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 675 |
):
|
| 676 |
"""
|
| 677 |
RAG chat that answers ONLY from uploaded materials.
|
|
@@ -737,9 +797,13 @@ async def chat(
|
|
| 737 |
logger.info(f"[CHAT] Normalized mentions to stored filenames: {mentioned_normalized}")
|
| 738 |
|
| 739 |
# 1b) Ask NVIDIA to mark relevance per file
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 743 |
|
| 744 |
# 1c) Ensure any explicitly mentioned files in the question are included
|
| 745 |
# This safeguards against model misclassification
|
|
@@ -778,6 +842,7 @@ async def chat(
|
|
| 778 |
recent_related = ""
|
| 779 |
|
| 780 |
# 3) RAG vector search (restricted to relevant files if any)
|
|
|
|
| 781 |
q_vec = embedder.embed([question])[0]
|
| 782 |
hits = rag.vector_search(
|
| 783 |
user_id=user_id,
|
|
@@ -786,6 +851,7 @@ async def chat(
|
|
| 786 |
k=k,
|
| 787 |
filenames=relevant_files if relevant_files else None
|
| 788 |
)
|
|
|
|
| 789 |
if not hits:
|
| 790 |
logger.info(f"[CHAT] No hits with relevance filter. relevant_files={relevant_files}")
|
| 791 |
# Retry 1: if we have explicit mentions, try restricting only to them
|
|
@@ -840,6 +906,7 @@ async def chat(
|
|
| 840 |
sources=[],
|
| 841 |
relevant_files=relevant_files or mentioned_normalized
|
| 842 |
)
|
|
|
|
| 843 |
# Compose context
|
| 844 |
contexts = []
|
| 845 |
sources_meta = []
|
|
@@ -887,6 +954,7 @@ async def chat(
|
|
| 887 |
selection = select_model(question=question, context=composed_context)
|
| 888 |
logger.info(f"Model selection: {selection}")
|
| 889 |
# Generate answer with model
|
|
|
|
| 890 |
try:
|
| 891 |
answer = await generate_answer_with_model(
|
| 892 |
selection=selection,
|
|
@@ -895,6 +963,7 @@ async def chat(
|
|
| 895 |
gemini_rotator=gemini_rotator,
|
| 896 |
nvidia_rotator=nvidia_rotator
|
| 897 |
)
|
|
|
|
| 898 |
except Exception as e:
|
| 899 |
logger.error(f"LLM error: {e}")
|
| 900 |
answer = "I had trouble contacting the language model provider just now. Please try again."
|
|
|
|
| 635 |
|
| 636 |
# Chain-of-thought style two-step with Gemini
|
| 637 |
from utils.router import GEMINI_MED, GEMINI_PRO
|
| 638 |
+
|
| 639 |
+
# Step 1: Content filtering and relevance assessment based on user instructions
|
| 640 |
+
if instructions.strip():
|
| 641 |
+
filter_sys = (
|
| 642 |
+
"You are an expert content analyst. Given the user's specific instructions and the document content, "
|
| 643 |
+
"identify which sections/chunks are MOST relevant to their request. "
|
| 644 |
+
"Return a JSON object with this structure: {\"relevant_chunks\": [\"chunk_id_1\", \"chunk_id_2\"], \"focus_areas\": [\"key topic 1\", \"key topic 2\"]}"
|
| 645 |
+
)
|
| 646 |
+
filter_user = f"USER_INSTRUCTIONS: {instructions}\n\nDOCUMENT_SUMMARY: {file_summary}\n\nAVAILABLE_CHUNKS:\n{context_text}\n\nIdentify only the chunks that directly address the user's specific request."
|
| 647 |
+
|
| 648 |
+
try:
|
| 649 |
+
selection_filter = {"provider": "gemini", "model": os.getenv("GEMINI_MED", "gemini-2.5-flash")}
|
| 650 |
+
filter_response = await generate_answer_with_model(selection_filter, filter_sys, filter_user, gemini_rotator, nvidia_rotator)
|
| 651 |
+
# Try to parse the filter response to get relevant chunks
|
| 652 |
+
import json
|
| 653 |
+
try:
|
| 654 |
+
filter_data = json.loads(filter_response)
|
| 655 |
+
relevant_chunk_ids = filter_data.get("relevant_chunks", [])
|
| 656 |
+
focus_areas = filter_data.get("focus_areas", [])
|
| 657 |
+
logger.info(f"[REPORT] Content filtering identified {len(relevant_chunk_ids)} relevant chunks and focus areas: {focus_areas}")
|
| 658 |
+
# Filter context to only relevant chunks
|
| 659 |
+
if relevant_chunk_ids and hits:
|
| 660 |
+
filtered_hits = [h for h in hits if str(h["doc"].get("_id", "")) in relevant_chunk_ids]
|
| 661 |
+
if filtered_hits:
|
| 662 |
+
hits = filtered_hits
|
| 663 |
+
logger.info(f"[REPORT] Filtered context from {len(hits)} chunks to {len(filtered_hits)} relevant chunks")
|
| 664 |
+
except json.JSONDecodeError:
|
| 665 |
+
logger.warning(f"[REPORT] Could not parse filter response, using all chunks: {filter_response}")
|
| 666 |
+
except Exception as e:
|
| 667 |
+
logger.warning(f"[REPORT] Content filtering failed: {e}")
|
| 668 |
+
|
| 669 |
+
# Step 2: Create focused outline based on user instructions
|
| 670 |
sys_outline = (
|
| 671 |
+
"You are an expert technical writer. Create a focused, hierarchical outline for a report based on the user's specific instructions and the MATERIALS. "
|
| 672 |
+
"The outline should directly address what the user asked for. Output as Markdown bullet list only. Keep it within about {} words."
|
| 673 |
).format(max(100, outline_words))
|
| 674 |
+
|
| 675 |
+
instruction_context = f"USER_REQUEST: {instructions}\n\n" if instructions.strip() else ""
|
| 676 |
+
user_outline = f"{instruction_context}MATERIALS:\n\n[FILE_SUMMARY]\n{file_summary}\n\n[DOC_CONTEXT]\n{context_text}"
|
| 677 |
|
| 678 |
try:
|
| 679 |
# Step 1: Outline with Flash/Med
|
|
|
|
| 683 |
logger.warning(f"Report outline failed: {e}")
|
| 684 |
outline_md = "# Report Outline\n\n- Introduction\n- Key Topics\n- Conclusion"
|
| 685 |
|
| 686 |
+
# Step 3: Generate focused report based on user instructions and filtered content
|
| 687 |
+
instruction_focus = f"FOCUS ON: {instructions}\n\n" if instructions.strip() else ""
|
| 688 |
sys_report = (
|
| 689 |
+
"You are an expert report writer. Write a focused, comprehensive Markdown report that directly addresses the user's specific request. "
|
| 690 |
+
"Using the OUTLINE and MATERIALS:\n"
|
| 691 |
+
"- Structure the report to answer exactly what the user asked for\n"
|
| 692 |
+
"- Use clear section headings\n"
|
| 693 |
+
"- Keep content factual and grounded in the provided materials\n"
|
| 694 |
+
"- Include brief citations like (source: filename, topic)\n"
|
| 695 |
+
"- If the user asked for a specific section/topic, focus heavily on that\n"
|
| 696 |
+
f"- Target length ~{max(600, report_words)} words\n"
|
| 697 |
+
"- Ensure the report directly fulfills the user's request"
|
| 698 |
)
|
| 699 |
+
user_report = f"{instruction_focus}OUTLINE:\n{outline_md}\n\nMATERIALS:\n[FILE_SUMMARY]\n{file_summary}\n\n[DOC_CONTEXT]\n{context_text}"
|
| 700 |
|
| 701 |
try:
|
| 702 |
selection_report = {"provider": "gemini", "model": os.getenv("GEMINI_PRO", "gemini-2.5-pro")}
|
|
|
|
| 714 |
project_id: str = Form(...),
|
| 715 |
question: str = Form(...),
|
| 716 |
k: int = Form(6)
|
| 717 |
+
):
|
| 718 |
+
# Add timeout protection to prevent hanging
|
| 719 |
+
import asyncio
|
| 720 |
+
try:
|
| 721 |
+
return await asyncio.wait_for(_chat_impl(user_id, project_id, question, k), timeout=120.0)
|
| 722 |
+
except asyncio.TimeoutError:
|
| 723 |
+
logger.error("[CHAT] Chat request timed out after 120 seconds")
|
| 724 |
+
return ChatAnswerResponse(
|
| 725 |
+
answer="Sorry, the request took too long to process. Please try again with a simpler question.",
|
| 726 |
+
sources=[],
|
| 727 |
+
relevant_files=[]
|
| 728 |
+
)
|
| 729 |
+
|
| 730 |
+
async def _chat_impl(
|
| 731 |
+
user_id: str,
|
| 732 |
+
project_id: str,
|
| 733 |
+
question: str,
|
| 734 |
+
k: int
|
| 735 |
):
|
| 736 |
"""
|
| 737 |
RAG chat that answers ONLY from uploaded materials.
|
|
|
|
| 797 |
logger.info(f"[CHAT] Normalized mentions to stored filenames: {mentioned_normalized}")
|
| 798 |
|
| 799 |
# 1b) Ask NVIDIA to mark relevance per file
|
| 800 |
+
try:
|
| 801 |
+
relevant_map = await files_relevance(question, files_list, nvidia_rotator)
|
| 802 |
+
relevant_files = [fn for fn, ok in relevant_map.items() if ok]
|
| 803 |
+
logger.info(f"[CHAT] NVIDIA relevant files: {relevant_files}")
|
| 804 |
+
except Exception as e:
|
| 805 |
+
logger.warning(f"[CHAT] NVIDIA relevance failed, defaulting to all files: {e}")
|
| 806 |
+
relevant_files = [f.get("filename") for f in files_list if f.get("filename")]
|
| 807 |
|
| 808 |
# 1c) Ensure any explicitly mentioned files in the question are included
|
| 809 |
# This safeguards against model misclassification
|
|
|
|
| 842 |
recent_related = ""
|
| 843 |
|
| 844 |
# 3) RAG vector search (restricted to relevant files if any)
|
| 845 |
+
logger.info(f"[CHAT] Starting vector search with relevant_files={relevant_files}")
|
| 846 |
q_vec = embedder.embed([question])[0]
|
| 847 |
hits = rag.vector_search(
|
| 848 |
user_id=user_id,
|
|
|
|
| 851 |
k=k,
|
| 852 |
filenames=relevant_files if relevant_files else None
|
| 853 |
)
|
| 854 |
+
logger.info(f"[CHAT] Vector search returned {len(hits) if hits else 0} hits")
|
| 855 |
if not hits:
|
| 856 |
logger.info(f"[CHAT] No hits with relevance filter. relevant_files={relevant_files}")
|
| 857 |
# Retry 1: if we have explicit mentions, try restricting only to them
|
|
|
|
| 906 |
sources=[],
|
| 907 |
relevant_files=relevant_files or mentioned_normalized
|
| 908 |
)
|
| 909 |
+
# If we get here, we have hits, so continue with normal flow
|
| 910 |
# Compose context
|
| 911 |
contexts = []
|
| 912 |
sources_meta = []
|
|
|
|
| 954 |
selection = select_model(question=question, context=composed_context)
|
| 955 |
logger.info(f"Model selection: {selection}")
|
| 956 |
# Generate answer with model
|
| 957 |
+
logger.info(f"[CHAT] Generating answer with {selection['provider']} {selection['model']}")
|
| 958 |
try:
|
| 959 |
answer = await generate_answer_with_model(
|
| 960 |
selection=selection,
|
|
|
|
| 963 |
gemini_rotator=gemini_rotator,
|
| 964 |
nvidia_rotator=nvidia_rotator
|
| 965 |
)
|
| 966 |
+
logger.info(f"[CHAT] Answer generated successfully, length: {len(answer)}")
|
| 967 |
except Exception as e:
|
| 968 |
logger.error(f"LLM error: {e}")
|
| 969 |
answer = "I had trouble contacting the language model provider just now. Please try again."
|
utils/__pycache__/logger.cpython-311.pyc
ADDED
|
Binary file (2.38 kB). View file
|
|
|
utils/__pycache__/parser.cpython-311.pyc
ADDED
|
Binary file (4.23 kB). View file
|
|
|
utils/__pycache__/rotator.cpython-311.pyc
ADDED
|
Binary file (4.7 kB). View file
|
|
|