Spaces:

chinmayjha
/

context-ai

Sleeping

App Files Files Community

chinmayjha commited on Oct 10

Commit

150cd80

unverified ·

1 Parent(s): 0e61755

Improve RAG agent response quality and UX

Browse files

- De-duplicate conversation insights to avoid repetition across chunks
- Group chunks by document ID and show insights only once per conversation
- Update summarizer prompt to generate cleaner answers without customer names
- Add inline citations [Doc X] in answers with numbered sources section
- Format sources with clean spacing (no bold/italic markdown)
- Include conversation insights (summary + key findings) in sources
- Add progress indicators to Gradio UI for better user feedback
- Reduce retrieved documents from 10 to 5 for faster responses
- Switch from XML to compact text format for token optimization
- Add conversation context from multiple chunks when available

Files changed (7) hide show

Makefile +7 -1
configs/compute_rag_vector_index_openai_contextual.yaml +15 -12
configs/compute_rag_vector_index_openai_contextual_simple.yaml +2 -2
src/second_brain_online/application/agents/agents.py +2 -2
src/second_brain_online/application/agents/tools/mongodb_retriever.py +133 -52
src/second_brain_online/application/agents/tools/summarizer.py +76 -31
src/second_brain_online/application/ui/custom_gradio_ui.py +16 -4

Makefile CHANGED Viewed

@@ -10,7 +10,7 @@ export PYTHONPATH = .
 # --- Default Values ---
 CHECK_DIRS := .
-RETRIEVER_CONFIG ?= configs/compute_rag_vector_index_openai_contextual_simple.yaml
 # --- Utilities ---
@@ -53,6 +53,12 @@ evaluate_agent: check-config
 run_conversation_analysis_ui: # Launch Conversation Analysis Dashboard
 	uv run python conversation_analysis_app.py
 # --- QA ---
 format-fix:

 # --- Default Values ---
 CHECK_DIRS := .
+RETRIEVER_CONFIG ?= configs/compute_rag_vector_index_openai_contextual.yaml
 # --- Utilities ---
 run_conversation_analysis_ui: # Launch Conversation Analysis Dashboard
 	uv run python conversation_analysis_app.py
+run_customer_profile_dashboard: # Launch Customer Profile Analysis Dashboard
+	uv run python -m tools.customer_profile_app
+run_user_interaction_dashboard: # Launch User Interaction Analysis Dashboard
+	uv run python tools/user_interaction_ui.py
 # --- QA ---
 format-fix:

configs/compute_rag_vector_index_openai_contextual.yaml CHANGED Viewed

@@ -1,17 +1,20 @@
 parameters:
-  extract_collection_name: raw
-  fetch_limit: 30
-  load_collection_name: rag
-  content_quality_score_threshold: 0.6
-  retriever_type: contextual
-  embedding_model_id: text-embedding-3-small
   embedding_model_type: openai
   embedding_model_dim: 1536
-  chunk_size: 3072
   contextual_summarization_type: contextual
-  contextual_agent_model_id: gpt-4o
-  contextual_agent_max_characters: 128
   mock: false
-  processing_batch_size: 2
-  processing_max_workers: 2
-  device: mps # or cuda (for Nvidia GPUs) or mps (for Apple M1/M2/M3 chips)

+# RAG Configuration for Conversation Data (Agent UI)
+# This config matches the settings used to create rag_conversations collection
 parameters:
+  extract_collection_name: test_conversation_documents
+  fetch_limit: 0
+  load_collection_name: rag_conversations  # Query conversation data
+  content_quality_score_threshold: 0.0
+  retriever_type: contextual  # Hybrid vector + full-text search
+  embedding_model_id: text-embedding-3-small  # Must match offline pipeline
   embedding_model_type: openai
   embedding_model_dim: 1536
+  chunk_size: 640  # Match offline pipeline
   contextual_summarization_type: contextual
+  contextual_agent_model_id: gpt-4o-mini
+  contextual_agent_max_characters: 200
   mock: false
+  processing_batch_size: 5
+  processing_max_workers: 4
+  device: mps

configs/compute_rag_vector_index_openai_contextual_simple.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 parameters:
-  extract_collection_name: test_intercom_data
   fetch_limit: 200
-  load_collection_name: rag_intercom
   content_quality_score_threshold: 0.6
   retriever_type: contextual
   embedding_model_id: text-embedding-3-small

 parameters:
+  extract_collection_name: test_conversation_documents
   fetch_limit: 200
+  load_collection_name: rag_conversations
   content_quality_score_threshold: 0.6
   retriever_type: contextual
   embedding_model_id: text-embedding-3-small

src/second_brain_online/application/agents/agents.py CHANGED Viewed

@@ -61,9 +61,9 @@ class AgentWrapper:
         )
         agent = ToolCallingAgent(
-            tools=[what_can_i_do, retriever_tool],  # Remove summarizer - it's redundant
             model=model,
-            max_steps=2,  # Reduce steps since we removed summarizer
             verbosity_level=2,
         )

         )
         agent = ToolCallingAgent(
+            tools=[what_can_i_do, retriever_tool, summarizer_tool],
             model=model,
+            max_steps=3,  # Retrieval → answer_with_sources → final_answer (pass-through)
             verbosity_level=2,
         )

src/second_brain_online/application/agents/tools/mongodb_retriever.py CHANGED Viewed

@@ -5,8 +5,10 @@ import yaml
 from loguru import logger
 from opik import opik_context, track
 from smolagents import Tool
 from second_brain_online.application.rag import get_retriever
 class MongoDBRetrieverTool(Tool):
@@ -33,6 +35,11 @@ class MongoDBRetrieverTool(Tool):
         self.config_path = config_path
         self.retriever = self.__load_retriever(config_path)
     def __load_retriever(self, config_path: Path):
         config = yaml.safe_load(config_path.read_text())
@@ -42,13 +49,58 @@ class MongoDBRetrieverTool(Tool):
             embedding_model_id=config["embedding_model_id"],
             embedding_model_type=config["embedding_model_type"],
             retriever_type=config["retriever_type"],
-            k=5,
             device=config["device"],
             enable_reranking=config.get("enable_reranking", False),
             rerank_model_name=config.get("rerank_model_name", "cross-encoder/ms-marco-MiniLM-L-2-v2"),
             stage1_limit=config.get("stage1_limit", 50),
-            final_k=config.get("final_k", 10),
         )
     @track(name="MongoDBRetrieverTool.forward")
     def forward(self, query: str) -> str:
@@ -78,62 +130,91 @@ class MongoDBRetrieverTool(Tool):
             query = self.__parse_query(query)
             relevant_docs = self.retriever.invoke(query)
-            formatted_docs = []
             for i, doc in enumerate(relevant_docs, 1):
-                # Extract metadata
-                title = doc.metadata.get("title", "Untitled")
-                datetime = doc.metadata.get("datetime", "unknown")
-                contextual_summary = doc.metadata.get("contextual_summary", "")
-                marketing_insights = doc.metadata.get("marketing_insights", {})
-                content = doc.page_content.strip()
-                # Format marketing insights if available
-                marketing_insights_text = ""
-                if marketing_insights:
-                    marketing_insights_text = "\n<marketing_insights>\n"
-                    # Add quotes
-                    quotes = marketing_insights.get("quotes", [])
-                    if quotes:
-                        marketing_insights_text += "<quotes>\n"
-                        for quote in quotes:
-                            marketing_insights_text += f"- \"{quote.get('quote', '')}\" (Sentiment: {quote.get('sentiment', 'Unknown')})\n"
-                        marketing_insights_text += "</quotes>\n"
-                    # Add key findings
-                    findings = marketing_insights.get("key_findings", [])
-                    if findings:
-                        marketing_insights_text += "<key_findings>\n"
-                        for finding in findings:
-                            marketing_insights_text += f"- {finding.get('finding', '')} (Impact: {finding.get('impact', 'Unknown')})\n"
-                        marketing_insights_text += "</key_findings>\n"
-                    marketing_insights_text += "</marketing_insights>\n"
-                # Create optimized document structure - truncate content to avoid token overload
-                content_preview = content[:500] + "..." if len(content) > 500 else content
-                formatted_docs.append(
-                    f"""
-<document id="{i}">
-<title>{title}</title>
-<date>{datetime}</date>
-<contextual_summary>
-{contextual_summary}
-</contextual_summary>
-{marketing_insights_text}
-<content>
-{content_preview}
-</content>
-</document>
-"""
-                )
             result = "\n".join(formatted_docs)
-            result = f"""
-<search_results>
 {result}
-</search_results>
-When using context from any document, reference the document title and date for attribution.
 """
             return result
         except Exception:

 from loguru import logger
 from opik import opik_context, track
 from smolagents import Tool
+from pymongo import MongoClient
 from second_brain_online.application.rag import get_retriever
+from second_brain_online.config import settings
 class MongoDBRetrieverTool(Tool):
         self.config_path = config_path
         self.retriever = self.__load_retriever(config_path)
+        # Setup MongoDB client for fetching conversation insights
+        self.mongodb_client = MongoClient(settings.MONGODB_URI)
+        self.database = self.mongodb_client[settings.MONGODB_DATABASE_NAME]
+        self.conversation_docs_collection = self.database["test_conversation_documents"]
     def __load_retriever(self, config_path: Path):
         config = yaml.safe_load(config_path.read_text())
             embedding_model_id=config["embedding_model_id"],
             embedding_model_type=config["embedding_model_type"],
             retriever_type=config["retriever_type"],
+            k=5,  # Reduced from 10 to 5 for faster processing
             device=config["device"],
             enable_reranking=config.get("enable_reranking", False),
             rerank_model_name=config.get("rerank_model_name", "cross-encoder/ms-marco-MiniLM-L-2-v2"),
             stage1_limit=config.get("stage1_limit", 50),
+            final_k=config.get("final_k", 5),  # Reduced from 10 to 5
         )
+    def __fetch_conversation_insights(self, document_ids: list[str]) -> dict:
+        """
+        Fetch conversation_insights and metadata for the given document IDs from test_conversation_documents.
+        Args:
+            document_ids: List of document IDs to fetch insights for
+        Returns:
+            Dictionary mapping document_id -> {conversation_insights, url, source, user_id}
+        """
+        insights_map = {}
+        not_found_count = 0
+        # Fetch documents from MongoDB with additional metadata
+        cursor = self.conversation_docs_collection.find(
+            {"id": {"$in": document_ids}},
+            {
+                "id": 1,
+                "conversation_insights": 1,
+                "metadata.url": 1,
+                "metadata.source": 1,
+                "metadata.user_id": 1
+            }
+        )
+        for doc in cursor:
+            doc_id = doc.get("id")
+            insights = doc.get("conversation_insights")
+            metadata = doc.get("metadata", {})
+            if insights:
+                insights_map[doc_id] = {
+                    "conversation_insights": insights,
+                    "url": metadata.get("url"),
+                    "source": metadata.get("source"),
+                    "user_id": metadata.get("user_id")
+                }
+        # Track mismatches
+        not_found_count = len(document_ids) - len(insights_map)
+        if not_found_count > 0:
+            logger.warning(f"Could not find conversation_insights for {not_found_count} out of {len(document_ids)} document IDs")
+        return insights_map
     @track(name="MongoDBRetrieverTool.forward")
     def forward(self, query: str) -> str:
             query = self.__parse_query(query)
             relevant_docs = self.retriever.invoke(query)
+            # Step 1: Extract unique document IDs from chunks
+            document_ids = []
+            for doc in relevant_docs:
+                doc_id = doc.metadata.get("id")
+                if doc_id:
+                    document_ids.append(doc_id)
+            # Step 2: Fetch conversation insights for unique IDs
+            unique_doc_ids = list(set(document_ids))  # De-duplicate
+            insights_map = self.__fetch_conversation_insights(unique_doc_ids)
+            # Step 3: Group chunks by document ID to avoid duplicating insights
+            docs_by_id = {}
+            skipped_chunks = 0
             for i, doc in enumerate(relevant_docs, 1):
+                doc_id = doc.metadata.get("id")
+                # Skip chunks without conversation insights
+                if not doc_id or doc_id not in insights_map:
+                    skipped_chunks += 1
+                    logger.debug(f"Skipping chunk {i} - no conversation insights available for doc_id: {doc_id}")
+                    continue
+                # Group chunks by document ID
+                if doc_id not in docs_by_id:
+                    docs_by_id[doc_id] = {
+                        "title": doc.metadata.get("title", "Untitled"),
+                        "datetime": doc.metadata.get("datetime", "unknown"),
+                        "source": insights_map[doc_id].get("source", "Unknown Source"),
+                        "url": insights_map[doc_id].get("url", ""),
+                        "user_id": insights_map[doc_id].get("user_id", ""),
+                        "insights": insights_map[doc_id]["conversation_insights"],
+                        "chunks": []
+                    }
+                # Add this chunk's contextual summary to the document
+                docs_by_id[doc_id]["chunks"].append(doc.metadata.get("contextual_summary", ""))
+            # Step 4: Format unique documents with their insights
+            formatted_docs = []
+            for doc_num, (doc_id, doc_info) in enumerate(docs_by_id.items(), 1):
+                doc_text = f"=== DOCUMENT {doc_num} ===\n"
+                doc_text += f"Title: {doc_info['title']}\n"
+                doc_text += f"Date: {doc_info['datetime']}\n"
+                doc_text += f"Source: {doc_info['source']} | ID: {doc_id}"
+                if doc_info['user_id']:
+                    doc_text += f" | User: {doc_info['user_id']}"
+                if doc_info['url']:
+                    doc_text += f"\nURL: {doc_info['url']}"
+                # Add all chunk contexts from this conversation
+                doc_text += f"\n\nCONTEXT (from {len(doc_info['chunks'])} chunk(s)):\n"
+                for chunk_idx, chunk_context in enumerate(doc_info['chunks'], 1):
+                    doc_text += f"{chunk_idx}. {chunk_context}\n"
+                # Add conversation insights (only once per conversation)
+                insights = doc_info['insights']
+                summary = insights.get("summary", "")
+                if summary:
+                    doc_text += f"\nINSIGHTS SUMMARY: {summary}\n"
+                # Add key findings
+                key_findings = insights.get("key_findings", [])
+                if key_findings:
+                    doc_text += "\nKEY FINDINGS:\n"
+                    for finding in key_findings:
+                        insight_type = finding.get("insight_type", "Unknown")
+                        finding_text = finding.get("finding", "")
+                        impact = finding.get("impact", "Unknown")
+                        doc_text += f"- [{insight_type}/{impact}] {finding_text}\n"
+                doc_text += "\n---\n"
+                formatted_docs.append(doc_text)
+            # Log statistics
+            logger.info(f"Retrieved {len(relevant_docs)} chunks from {len(docs_by_id)} unique conversations, skipped {skipped_chunks} without insights")
             result = "\n".join(formatted_docs)
+            result = f"""SEARCH RESULTS
+===============
 {result}
+When using context, reference the document title, date, and ID for attribution.
 """
             return result
         except Exception:

src/second_brain_online/application/agents/tools/summarizer.py CHANGED Viewed

@@ -61,41 +61,77 @@ class HuggingFaceEndpointSummarizerTool(Tool):
 class OpenAISummarizerTool(Tool):
-    name = "openai_summarizer"
-    description = """Use this tool to summarize search results in XML format. This tool is especially useful when you need to analyze multiple documents from search results. The tool will parse XML search results, identify topics that are directly relevant to the user's query, and create a focused summary with document references. It filters out irrelevant topics to ensure the summary directly answers the user's question."""
     inputs = {
-        "text": {
             "type": "string",
-            "description": """The text to summarize.""",
         }
     }
     output_type = "string"
-    SYSTEM_PROMPT = """You are an expert document analyst specialized in query-focused summarization.
-Your task is to analyze search results and create a focused summary that directly answers the user's question.
-When you receive XML search results, you should:
-1. Parse ALL documents from the XML structure
-2. Identify topics that are directly relevant to the user's query
-3. Filter out irrelevant topics that don't relate to the question
-4. Group related information by relevant topics
-5. Extract key insights that directly answer the user's question
-6. Include document references with titles and dates when available
-Analysis Guidelines:
-- Focus on information that directly answers the user's question
-- Only include topics that are relevant to the query
-- Use specific document titles and dates from the XML metadata when available
-- Ignore irrelevant information like cookie policies, privacy policies, HTTP errors, etc.
-- Create a well-structured, readable summary
-- Group similar topics together when appropriate
-Document content:
 {content}
-Generate a focused summary that directly answers the user's question, organized by relevant topics with document references. Exclude any topics that don't directly relate to the question."""
     def __init__(self, *args, **kwargs) -> None:
         super().__init__(*args, **kwargs)
@@ -105,22 +141,31 @@ Generate a focused summary that directly answers the user's question, organized
             api_key=settings.OPENAI_API_KEY,
         )
-    @track
-    def forward(self, text: str) -> str:
         result = self.__client.chat.completions.create(
             model=settings.OPENAI_MODEL_ID,
             messages=[
                 {
                     "role": "system",
-                    "content": "You are an expert document analyst specialized in query-focused topic-based summarization. You excel at parsing XML search results, identifying relevant topics, and creating structured summaries with proper document references."
                 },
                 {
                     "role": "user",
-                    "content": self.SYSTEM_PROMPT.format(content=text),
                 },
             ],
-            temperature=0.1,  # Lower temperature for more consistent, focused output
-            max_tokens=2000,  # Increased token limit for more detailed summaries
         )
         return result.choices[0].message.content

 class OpenAISummarizerTool(Tool):
+    name = "answer_with_sources"
+    description = """Use this tool to generate the complete final answer to the user's question based on search results.
+After retrieving documents with mongodb_vector_search_retriever, use this tool to synthesize a comprehensive answer with a Sources section.
+CRITICAL: This tool's output is the complete answer - after getting results from this tool, you MUST call the built-in final_answer tool and pass this output EXACTLY as-is without any modifications."""
     inputs = {
+        "search_results": {
             "type": "string",
+            "description": """The complete search results from mongodb_vector_search_retriever to analyze and synthesize into an answer. Pass the ENTIRE output from the retriever tool.""",
         }
     }
     output_type = "string"
+    SYSTEM_PROMPT = """Based on the search results below, create a comprehensive answer to the user's question.
 {content}
+Create a two-part response:
+1. **ANSWER** (with inline citations):
+   - Focus on the core issues, concerns, or highlights identified
+   - DO NOT mention specific customer names or personal identifiers
+   - Group related insights by topic with bullet points
+   - Be concise and general, highlighting the problem/concern rather than individuals
+   - Add INLINE CITATIONS at the end of each point using format: [Doc X]
+   - Number each unique document sequentially (Doc 1, Doc 2, etc.)
+   Example:
+   • Organizations are planning phone number porting transitions, but custom porting is expensive (~$1,000) and should be done in bulk [Doc 1]
+   • Questions about additional license requirements for integrations ($45 per user) [Doc 1]
+   • Ringtone volume issues in embedded Salesforce app [Doc 2]
+2. **📚 Sources** (at the end):
+   - List ONLY UNIQUE documents (de-duplicate by Document ID)
+   - Number each unique source to match the inline citations (Doc 1, Doc 2, etc.)
+   - Format URLs as markdown links: [View Chat](url) or [View Recording](url)
+   For EACH unique document, use this EXACT structure with proper spacing and NO bold/italic formatting:
+   Doc X: [Title (Date)]
+   Source: [Type] | Document ID: [ID] | [Hyperlinked URL if available] | [User ID if available]
+   Summary: [One-line summary of the conversation]
+   Key Findings:
+   - [Type/Impact] Finding text here
+   - [Type/Impact] Finding text here
+   Example:
+   Doc 1: JustCall Checkin (2025-10-07)
+   Source: Justcall Meeting Recordings | Document ID: 4f6f9cee4f
+   Summary: Discussion about phone number porting timeline and costs
+   Key Findings:
+   - [Technical Issue/High] Custom porting is expensive at $1,000 per request
+   - [Feature Request/Medium] Need bulk porting option to reduce costs
+   Doc 2: Intercom Conversation (2025-10-05)
+   Source: Intercom Chats | Document ID: 7a6678783fea06d | [View Chat](https://app.intercom.com/...) | User ID: 432830
+   Summary: Customer requesting billing discount due to service interruption
+   Key Findings:
+   - [Pricing Concern/High] Request for discount due to porting delays
+   - [Policy Gap/Medium] No current policy for inactivity-based discounts
+Provide a focused answer with inline citations followed by the well-formatted Sources section with conversation insights."""
     def __init__(self, *args, **kwargs) -> None:
         super().__init__(*args, **kwargs)
             api_key=settings.OPENAI_API_KEY,
         )
+    def forward(self, search_results: str) -> str:
+        """Generate final answer with sources based on search results.
+        Args:
+            search_results: The complete search results to analyze (includes the original query)
+        Returns:
+            Complete answer with Sources section
+        """
         result = self.__client.chat.completions.create(
             model=settings.OPENAI_MODEL_ID,
             messages=[
                 {
                     "role": "system",
+                    "content": "You are an expert analyst. Answer the user's question based on the search results provided. Create a comprehensive answer with a Sources section."
                 },
                 {
                     "role": "user",
+                    "content": self.SYSTEM_PROMPT.format(content=search_results),
                 },
             ],
+            temperature=0.0,  # Deterministic output
+            max_tokens=1500,  # Reduced for faster response
+            timeout=45.0,  # Reduced timeout
         )
         return result.choices[0].message.content

src/second_brain_online/application/ui/custom_gradio_ui.py CHANGED Viewed

@@ -128,13 +128,15 @@ class CustomGradioUI:
             self.submit_btn.click(
                 fn=self.process_query,
                 inputs=[self.query_input],
-                outputs=[self.answer_output, self.sources_output, self.tools_output, self.debug_output, self.conversation_table]
             )
             self.query_input.submit(
                 fn=self.process_query,
                 inputs=[self.query_input],
-                outputs=[self.answer_output, self.sources_output, self.tools_output, self.debug_output, self.conversation_table]
             )
             # Conversation search handlers
@@ -150,16 +152,24 @@ class CustomGradioUI:
                 outputs=[self.conversation_search, self.conversation_table]
             )
-    def process_query(self, query: str) -> Tuple[str, str, str, str, pd.DataFrame]:
         """Process the user query and return formatted response components."""
         if not query.strip():
             # Clear all outputs when query is empty
             return "", "", "", "", self.load_conversations()
         try:
-            # Run the agent
             result = self.agent.run(query)
             # Parse the result with agent logs
             agent_logs = getattr(self.agent, 'logs', []) if hasattr(self.agent, 'logs') else []
             answer, sources, tools_used = self.parse_agent_response(result, agent_logs)
@@ -187,8 +197,10 @@ class CustomGradioUI:
             debug_text = str(result)
             # Filter conversations based on sources used
             filtered_conversations = self.filter_conversations_by_sources(sources)
             return answer_html, sources_html, tools_html, debug_text, filtered_conversations
         except Exception as e:

             self.submit_btn.click(
                 fn=self.process_query,
                 inputs=[self.query_input],
+                outputs=[self.answer_output, self.sources_output, self.tools_output, self.debug_output, self.conversation_table],
+                show_progress="full"  # Show progress indicator
             )
             self.query_input.submit(
                 fn=self.process_query,
                 inputs=[self.query_input],
+                outputs=[self.answer_output, self.sources_output, self.tools_output, self.debug_output, self.conversation_table],
+                show_progress="full"  # Show progress indicator
             )
             # Conversation search handlers
                 outputs=[self.conversation_search, self.conversation_table]
             )
+    def process_query(self, query: str, progress=gr.Progress()) -> Tuple[str, str, str, str, pd.DataFrame]:
         """Process the user query and return formatted response components."""
         if not query.strip():
             # Clear all outputs when query is empty
             return "", "", "", "", self.load_conversations()
         try:
+            # Show progress indicator with descriptive message
+            progress(0, desc="🔍 Starting query processing...")
+            # Run the agent (this takes 30-60 seconds)
+            # Use None for indeterminate progress during long operation
+            progress(None, desc="🔍 Searching knowledge base and retrieving documents...")
             result = self.agent.run(query)
+            # Quick post-processing steps
+            progress(0.8, desc="✨ Formatting answer and sources...")
             # Parse the result with agent logs
             agent_logs = getattr(self.agent, 'logs', []) if hasattr(self.agent, 'logs') else []
             answer, sources, tools_used = self.parse_agent_response(result, agent_logs)
             debug_text = str(result)
             # Filter conversations based on sources used
+            progress(0.95, desc="📊 Updating conversation list...")
             filtered_conversations = self.filter_conversations_by_sources(sources)
+            progress(1.0, desc="✅ Complete!")
             return answer_html, sources_html, tools_html, debug_text, filtered_conversations
         except Exception as e: