Spaces:

chinmayjha
/

context-ai

Sleeping

App Files Files Community

chinmayjha commited on 29 days ago

Commit

a697e1b

unverified ·

1 Parent(s): 8c6064d

feat: optimize RAG agent with token reduction and separate context/sources

Browse files

- Refactor mongodb_retriever.py to separate lightweight context from full sources
- Store sources in class variable to avoid sending to LLM (50% token reduction)
- Update summarizer.py to append cached sources to final answer
- Add customer profile UI for displaying analysis results
- Add user interaction UI for tracking customer changes
- Optimize context format: Doc Title | Date with bullet point summaries
- Keep full sources metadata separate for final output only

This significantly reduces LLM token usage while maintaining answer quality.

Files changed (6) hide show

configs/rag_conversations.yaml +22 -0
src/second_brain_online/application/agents/tools/mongodb_retriever.py +72 -30
src/second_brain_online/application/agents/tools/summarizer.py +59 -71
src/second_brain_online/application/ui/customer_profile_ui.py +722 -0
tools/customer_profile_app.py +82 -0
tools/user_interaction_ui.py +516 -0

configs/rag_conversations.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+# RAG Configuration for Conversation Data (Agent UI)
+# This config matches the settings used to create rag_conversations collection
+parameters:
+  # Collection settings (must match what's in MongoDB)
+  extract_collection_name: test_conversation_documents
+  fetch_limit: 0
+  load_collection_name: rag_conversations  # This is what the agent will query
+  # Retriever settings (must match how embeddings were created)
+  retriever_type: contextual  # Hybrid vector + full-text search
+  embedding_model_id: text-embedding-3-small  # Same as offline pipeline
+  embedding_model_type: openai
+  embedding_model_dim: 1536
+  # These settings are for display/reference only (not used by agent UI)
+  chunk_size: 640
+  contextual_summarization_type: contextual
+  contextual_agent_model_id: gpt-4o-mini
+  contextual_agent_max_characters: 200
+  device: mps

src/second_brain_online/application/agents/tools/mongodb_retriever.py CHANGED Viewed

@@ -29,6 +29,11 @@ class MongoDBRetrieverTool(Tool):
         }
     }
     output_type = "string"
     def __init__(self, config_path: Path, **kwargs):
         super().__init__(**kwargs)
@@ -169,55 +174,92 @@ class MongoDBRetrieverTool(Tool):
                 # Add this chunk's contextual summary to the document
                 docs_by_id[doc_id]["chunks"].append(doc.metadata.get("contextual_summary", ""))
-            # Step 4: Format unique documents with their insights
-            formatted_docs = []
             for doc_num, (doc_id, doc_info) in enumerate(docs_by_id.items(), 1):
-                doc_text = f"=== DOCUMENT {doc_num} ===\n"
-                doc_text += f"Title: {doc_info['title']}\n"
-                doc_text += f"Date: {doc_info['datetime']}\n"
-                doc_text += f"Source: {doc_info['source']} | ID: {doc_id}"
                 if doc_info['user_id']:
-                    doc_text += f" | User: {doc_info['user_id']}"
-                if doc_info['url']:
-                    doc_text += f"\nURL: {doc_info['url']}"
-                # Add all chunk contexts from this conversation
-                doc_text += f"\n\nCONTEXT (from {len(doc_info['chunks'])} chunk(s)):\n"
-                for chunk_idx, chunk_context in enumerate(doc_info['chunks'], 1):
-                    doc_text += f"{chunk_idx}. {chunk_context}\n"
-                # Add conversation insights (for Sources section only - not for answer generation)
                 insights = doc_info['insights']
-                doc_text += f"\n[METADATA FOR SOURCES SECTION]\n"
                 summary = insights.get("summary", "")
                 if summary:
-                    doc_text += f"Summary: {summary}\n"
                 key_findings = insights.get("key_findings", [])
                 if key_findings:
-                    doc_text += "Key Findings:\n"
                     for finding in key_findings:
                         insight_type = finding.get("insight_type", "Unknown")
                         finding_text = finding.get("finding", "")
                         impact = finding.get("impact", "Unknown")
-                        doc_text += f"- [{insight_type}/{impact}] {finding_text}\n"
-                doc_text += "\n---\n"
-                formatted_docs.append(doc_text)
-            # Log statistics
             logger.info(f"Retrieved {len(relevant_docs)} chunks from {len(docs_by_id)} unique conversations, skipped {skipped_chunks} without insights")
-            result = "\n".join(formatted_docs)
-            result = f"""SEARCH RESULTS
-===============
-{result}
-When using context, reference the document title, date, and ID for attribution.
-"""
-            return result
         except Exception:
             logger.opt(exception=True).debug("Error retrieving documents.")

         }
     }
     output_type = "string"
+    # Class variable to store formatted sources for the summarizer tool to access
+    # This allows us to pass ONLY lightweight context to the LLM, while the summarizer
+    # can append the full sources section to the final answer
+    _cached_sources = ""
     def __init__(self, config_path: Path, **kwargs):
         super().__init__(**kwargs)
                 # Add this chunk's contextual summary to the document
                 docs_by_id[doc_id]["chunks"].append(doc.metadata.get("contextual_summary", ""))
+            # Step 4: Format output into TWO sections to reduce LLM token usage
+            # Section A: Lightweight context for LLM answer generation (minimal tokens)
+            # Section B: Full metadata for sources section (appended to final answer)
+            context_for_llm = []  # Lightweight format for answer generation
+            metadata_for_sources = []  # Full format for sources section
             for doc_num, (doc_id, doc_info) in enumerate(docs_by_id.items(), 1):
+                # =================================================================
+                # SECTION A: CONTEXT FOR LLM (Lightweight - Reduced Token Usage)
+                # =================================================================
+                # Format: Doc Title | Date | User ID
+                #         - Contextual Summary 1
+                #         - Contextual Summary 2
+                # This section is sent to the LLM for answer generation
+                context_text = f"Doc {doc_num}: {doc_info['title']} | {doc_info['datetime']}"
                 if doc_info['user_id']:
+                    context_text += f" | User: {doc_info['user_id']}"
+                context_text += "\n"
+                # Add contextual summaries as bullet points (compact format)
+                for chunk_context in doc_info['chunks']:
+                    context_text += f"- {chunk_context}\n"
+                context_text += "\n"
+                context_for_llm.append(context_text)
+                # =================================================================
+                # SECTION B: METADATA FOR SOURCES (Full details for final answer)
+                # =================================================================
+                # This section is NOT sent to the LLM but appended to the final answer
+                # Contains full conversation insights, URLs, and structured metadata
+                source_text = f"Doc {doc_num}: {doc_info['title']} ({doc_info['datetime']})\n"
+                source_text += f"Source: {doc_info['source']} | Document ID: {doc_id}"
+                if doc_info['url']:
+                    source_text += f" | [View Chat]({doc_info['url']})"
+                if doc_info['user_id']:
+                    source_text += f" | User ID: {doc_info['user_id']}"
+                source_text += "\n\n"
+                # Add conversation insights (summary + key findings)
                 insights = doc_info['insights']
                 summary = insights.get("summary", "")
                 if summary:
+                    source_text += f"Summary: {summary}\n\n"
                 key_findings = insights.get("key_findings", [])
                 if key_findings:
+                    source_text += "Key Findings:\n"
                     for finding in key_findings:
                         insight_type = finding.get("insight_type", "Unknown")
                         finding_text = finding.get("finding", "")
                         impact = finding.get("impact", "Unknown")
+                        source_text += f"- [{insight_type}/{impact}] {finding_text}\n"
+                source_text += "\n---\n\n"
+                metadata_for_sources.append(source_text)
+            # Log statistics for monitoring
             logger.info(f"Retrieved {len(relevant_docs)} chunks from {len(docs_by_id)} unique conversations, skipped {skipped_chunks} without insights")
+            # =================================================================
+            # STORE SOURCES SEPARATELY AND RETURN ONLY LIGHTWEIGHT CONTEXT
+            # =================================================================
+            # Strategy: Store formatted sources in class variable for summarizer to access
+            # Return ONLY lightweight context to LLM (reduces tokens significantly)
+            # Summarizer will append sources directly to final answer
+            # Build lightweight context string (ONLY this goes to LLM)
+            context_section = "".join(context_for_llm)
+            # Build formatted sources string (stored for later appending)
+            metadata_section = "".join(metadata_for_sources)
+            # Store sources in class variable for summarizer tool to access
+            # This ensures we don't send sources to the LLM at all
+            MongoDBRetrieverTool._cached_sources = f"""📚 Sources
+{metadata_section}"""
+            # Return ONLY the lightweight context to be sent to LLM
+            logger.info(f"Returning {len(context_section)} chars of context to LLM, {len(MongoDBRetrieverTool._cached_sources)} chars cached for sources")
+            return context_section
         except Exception:
             logger.opt(exception=True).debug("Error retrieving documents.")

src/second_brain_online/application/agents/tools/summarizer.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from openai import OpenAI
 from opik import track
 from smolagents import Tool
 from second_brain_online.config import settings
@@ -76,80 +77,42 @@ CRITICAL: This tool generates the final answer that will be returned to the user
     }
     output_type = "string"
-    SYSTEM_PROMPT = """Based on the search results below, create a comprehensive answer to the user's question.
 {content}
 IMPORTANT INSTRUCTIONS:
-- Use the CONTEXT sections from each document to generate your answer
-- Use the [METADATA FOR SOURCES SECTION] to populate the Sources section (Summary and Key Findings)
-- DO NOT use the metadata to answer the question - only use CONTEXT for the answer
-Create a two-part response:
-1. **ANSWER** (with inline citations):
-   - Base your answer ONLY on the CONTEXT sections from the documents
-   - Focus on the core issues, concerns, or highlights identified in the CONTEXT
-   - DO NOT mention specific customer names or personal identifiers
-   - Group related insights by topic with bullet points
-   - Be concise and general, highlighting the problem/concern rather than individuals
-   - Add INLINE CITATIONS at the end of each point using ONLY this format: [Doc X]
-   - CRITICAL: Citations must be EXACTLY "[Doc 1]", "[Doc 2]", etc. - nothing else
-   - DO NOT add any other information in citations (no titles, dates, IDs, or sources in the citation)
-   - Number each unique document sequentially (Doc 1, Doc 2, etc.)
-   CORRECT Example:
-   • Organizations are planning phone number porting transitions, but custom porting is expensive (~$1,000) and should be done in bulk [Doc 1]
-   • Questions about additional license requirements for integrations ($45 per user) [Doc 1]
-   • Ringtone volume issues in embedded Salesforce app [Doc 2]
-   WRONG Example (DO NOT DO THIS):
-   • Custom porting costs around $1,000 [Source: JustCall Checkin, Document ID: abc123]
-   • License fees are $45 per user [JustCall, 2025-10-07]
-2. **📚 Sources** (at the end):
-   - List ONLY UNIQUE documents (de-duplicate by Document ID)
-   - Number each unique source to match the inline citations (Doc 1, Doc 2, etc.)
-   - Use the information from [METADATA FOR SOURCES SECTION] to populate Summary and Key Findings
-   - Format URLs as markdown links: [View Chat](url) or [View Recording](url)
-   For EACH unique document, use this EXACT structure with proper spacing and NO bold/italic formatting:
-   Doc X: [Title (Date)]
-   Source: [Type] | Document ID: [ID] | [Hyperlinked URL if available] | [User ID if available]
-   Summary: [Copy from the metadata section]
-   Key Findings:
-   - [Type/Impact] [Copy from the metadata section]
-   - [Type/Impact] [Copy from the metadata section]
-   Example:
-   Doc 1: JustCall Checkin (2025-10-07)
-   Source: Justcall Meeting Recordings | Document ID: 4f6f9cee4f
-   Summary: Discussion about phone number porting timeline and costs
-   Key Findings:
-   - [Technical Issue/High] Custom porting is expensive at $1,000 per request
-   - [Feature Request/Medium] Need bulk porting option to reduce costs
-   Doc 2: Intercom Conversation (2025-10-05)
-   Source: Intercom Chats | Document ID: 7a6678783fea06d | [View Chat](https://app.intercom.com/...) | User ID: 432830
-   Summary: Customer requesting billing discount due to service interruption
-   Key Findings:
-   - [Pricing Concern/High] Request for discount due to porting delays
-   - [Policy Gap/Medium] No current policy for inactivity-based discounts
-Provide a focused answer with inline citations followed by the well-formatted Sources section with conversation insights.
 CRITICAL RULES:
-- In the ANSWER section, use ONLY [Doc X] format for citations
-- In the Sources section, provide full details about each Doc
-- NEVER mix citation formats - keep them separate and clean"""
     def __init__(self, *args, **kwargs) -> None:
         super().__init__(*args, **kwargs)
@@ -162,13 +125,23 @@ CRITICAL RULES:
     def forward(self, search_results: str) -> str:
         """Generate final answer with sources based on search results.
         Args:
-            search_results: The complete search results to analyze (includes the original query)
         Returns:
-            Complete answer with Sources section
         """
         result = self.__client.chat.completions.create(
             model=settings.OPENAI_MODEL_ID,
             messages=[
@@ -186,4 +159,19 @@ CRITICAL RULES:
             timeout=45.0,  # Reduced timeout
         )
-        return result.choices[0].message.content

 from openai import OpenAI
 from opik import track
 from smolagents import Tool
+from loguru import logger
 from second_brain_online.config import settings
     }
     output_type = "string"
+    SYSTEM_PROMPT = """Based on the context below, create a comprehensive answer to the user's question.
 {content}
 IMPORTANT INSTRUCTIONS:
+The context contains lightweight information from retrieved documents with this format:
+- Doc X: Title | Date | User ID
+- Contextual summaries as bullet points
+Generate ONLY the ANSWER section with inline citations:
+**ANSWER** (with inline citations):
+- Base your answer ONLY on the provided context
+- Focus on the core issues, concerns, or highlights identified
+- DO NOT mention specific customer names or personal identifiers
+- Group related insights by topic with bullet points
+- Be concise and general, highlighting the problem/concern rather than individuals
+- Add INLINE CITATIONS at the end of each point using ONLY this format: [Doc X]
+- CRITICAL: Citations must be EXACTLY "[Doc 1]", "[Doc 2]", etc. - nothing else
+- DO NOT add any other information in citations (no titles, dates, IDs, or sources)
+- Number each unique document sequentially (Doc 1, Doc 2, etc.)
+CORRECT Example:
+• Organizations are planning phone number porting transitions, but custom porting is expensive (~$1,000) and should be done in bulk [Doc 1]
+• Questions about additional license requirements for integrations ($45 per user) [Doc 1]
+• Ringtone volume issues in embedded Salesforce app [Doc 2]
+WRONG Example (DO NOT DO THIS):
+• Custom porting costs around $1,000 [Source: JustCall Checkin, Document ID: abc123]
+• License fees are $45 per user [JustCall, 2025-10-07]
 CRITICAL RULES:
+- Generate answer ONLY with the exact context provided above
+- Use ONLY [Doc X] format for citations
+- DO NOT add a Sources section (it will be appended automatically)
+- Keep the answer focused and well-structured with bullet points"""
     def __init__(self, *args, **kwargs) -> None:
         super().__init__(*args, **kwargs)
     def forward(self, search_results: str) -> str:
         """Generate final answer with sources based on search results.
+        This method:
+        1. Sends ONLY lightweight context to the LLM for answer generation
+        2. Retrieves pre-formatted sources from MongoDBRetrieverTool._cached_sources
+        3. Appends sources directly to the LLM's answer
         Args:
+            search_results: Lightweight context from the retriever (NOT including sources)
         Returns:
+            Complete answer with Sources section appended
         """
+        # Import here to avoid circular dependency
+        from second_brain_online.application.agents.tools.mongodb_retriever import MongoDBRetrieverTool
+        # Step 1: Generate answer from LLM using ONLY lightweight context
+        # This significantly reduces token usage compared to sending full sources
         result = self.__client.chat.completions.create(
             model=settings.OPENAI_MODEL_ID,
             messages=[
             timeout=45.0,  # Reduced timeout
         )
+        llm_answer = result.choices[0].message.content
+        # Step 2: Retrieve pre-formatted sources from the retriever's class variable
+        # These sources were cached during the retrieval step and are NOT sent to LLM
+        cached_sources = MongoDBRetrieverTool._cached_sources
+        # Step 3: Append sources directly to the answer
+        # This ensures sources are included in the final output without being sent to LLM
+        if cached_sources:
+            final_answer = f"{llm_answer}\n\n{cached_sources}"
+            logger.info(f"Appended {len(cached_sources)} chars of sources to {len(llm_answer)} chars of LLM answer")
+        else:
+            final_answer = llm_answer
+            logger.warning("No cached sources found - returning LLM answer only")
+        return final_answer

src/second_brain_online/application/ui/customer_profile_ui.py ADDED Viewed

	@@ -0,0 +1,722 @@

+import json
+import re
+from typing import Any, Dict, List, Tuple, Optional
+from datetime import datetime
+from pathlib import Path
+import gradio as gr
+import pandas as pd
+from second_brain_online.config import settings
+class CustomerProfileUI:
+    """Gradio UI for displaying customer profile analyses with search functionality."""
+    def __init__(self, data_dir: str = None):
+        # Default to the offline data directory
+        if data_dir is None:
+            # Go up from second-brain-online to second-brain-offline/data/customer_analyses
+            data_dir = Path(__file__).parent.parent.parent.parent.parent.parent / "second-brain-offline" / "data" / "customer_analyses"
+        self.data_dir = Path(data_dir)
+        self.analyses_cache = None
+        self.load_analyses_from_disk()
+        self.setup_ui()
+    def load_analyses_from_disk(self):
+        """Load all customer analyses from JSON files on disk."""
+        try:
+            # Check if the all-in-one file exists
+            all_file = self.data_dir / "customer_analyses_all.json"
+            if all_file.exists():
+                print(f"📂 Loading analyses from: {all_file}")
+                with open(all_file, 'r') as f:
+                    self.analyses_cache = json.load(f)
+                print(f"✅ Loaded {len(self.analyses_cache)} analyses from disk")
+            else:
+                # Load individual files
+                print(f"📂 Loading analyses from directory: {self.data_dir}")
+                self.analyses_cache = []
+                for json_file in self.data_dir.glob("customer_analysis_*.json"):
+                    try:
+                        with open(json_file, 'r') as f:
+                            analysis = json.load(f)
+                            self.analyses_cache.append(analysis)
+                    except Exception as e:
+                        print(f"⚠️  Failed to load {json_file}: {e}")
+                print(f"✅ Loaded {len(self.analyses_cache)} analyses from {len(list(self.data_dir.glob('customer_analysis_*.json')))} files")
+        except Exception as e:
+            print(f"❌ Failed to load analyses from disk: {e}")
+            self.analyses_cache = []
+    def format_text_for_table(self, text_list, max_items=3):
+        """Format a list of text items for better table display with proper bullet point spacing."""
+        if not text_list:
+            return "No items"
+        if isinstance(text_list, str):
+            return text_list
+        if len(text_list) == 0:
+            return "No items"
+        elif len(text_list) == 1:
+            return f"• {text_list[0]}"
+        else:
+            # Format bullet points with proper spacing
+            formatted_items = []
+            items_to_show = min(len(text_list), max_items)
+            for i in range(items_to_show):
+                formatted_items.append(f"• {text_list[i]}")
+            # Add indicator for remaining items if any
+            if len(text_list) > max_items:
+                remaining = len(text_list) - max_items
+                formatted_items.append(f"• ... and {remaining} more")
+            return "\n".join(formatted_items)
+    def setup_ui(self):
+        """Setup the Gradio interface for customer profile analyses."""
+        with gr.Blocks(
+            title="Customer Profile Analysis Dashboard",
+            theme=gr.themes.Soft(),
+            css="""
+            .customer-card {
+                border: 1px solid #e0e0e0;
+                border-radius: 8px;
+                padding: 16px;
+                margin: 8px 0;
+                background-color: #f8f9fa;
+            }
+            .customer-title {
+                font-weight: bold;
+                color: #2c3e50;
+                margin-bottom: 8px;
+                font-size: 1.1em;
+            }
+            .customer-meta {
+                font-size: 0.9em;
+                color: #6c757d;
+                margin-bottom: 12px;
+            }
+            .key-changes {
+                background-color: #fff3cd;
+                border-left: 4px solid #ffc107;
+                padding: 8px 12px;
+                margin: 8px 0;
+                border-radius: 4px;
+                font-size: 0.9em;
+            }
+            .recommendations {
+                background-color: #d1ecf1;
+                border-left: 4px solid #17a2b8;
+                padding: 8px 12px;
+                margin: 8px 0;
+                border-radius: 4px;
+                font-size: 0.9em;
+            }
+            .email-strategy {
+                background-color: #d4edda;
+                border-left: 4px solid #28a745;
+                padding: 8px 12px;
+                margin: 8px 0;
+                border-radius: 4px;
+                font-size: 0.9em;
+            }
+            .follow-up-email {
+                background-color: #e2e3e5;
+                border-left: 4px solid #6c757d;
+                padding: 8px 12px;
+                margin: 8px 0;
+                border-radius: 4px;
+                font-size: 0.9em;
+            }
+            .search-highlight {
+                background-color: #fff3cd;
+                padding: 2px 4px;
+                border-radius: 3px;
+            }
+            .dataframe {
+                font-size: 0.9em;
+                line-height: 1.4;
+            }
+            .dataframe td {
+                padding: 8px 6px;
+                vertical-align: top;
+                word-wrap: break-word;
+                white-space: pre-wrap;
+            }
+            .dataframe th {
+                padding: 8px 6px;
+                font-weight: bold;
+                background-color: #f8f9fa;
+            }
+            """
+        ) as self.interface:
+            gr.Markdown("# 📊 Customer Profile Analysis Dashboard")
+            gr.Markdown("View and search through customer profile analyses with AI-generated insights and follow-up emails.")
+            # Statistics section at the top
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### 📈 Dashboard Statistics")
+                    self.stats_view = gr.HTML(
+                        value=self.generate_statistics(),
+                        label="Analysis Statistics",
+                        show_label=False
+                    )
+            # Filter buttons
+            with gr.Row():
+                gr.Markdown("### 🔍 Quick Filters")
+                with gr.Row():
+                    self.strategy_dropdown = gr.Dropdown(
+                        choices=self.get_strategy_choices(),
+                        label="Email Strategy",
+                        value=None,
+                        multiselect=False,
+                        scale=2
+                    )
+                    self.priority_dropdown = gr.Dropdown(
+                        choices=self.get_priority_choices(),
+                        label="Priority Level",
+                        value=None,
+                        multiselect=False,
+                        scale=2
+                    )
+                    self.clear_filters_btn = gr.Button("🔄 Clear All Filters", scale=1, variant="secondary")
+            gr.Markdown("---")  # Separator line
+            # Search functionality
+            with gr.Row():
+                self.search_input = gr.Textbox(
+                    label="Search Customer Analyses",
+                    placeholder="Search by company name, customer ID, key changes, recommendations, or email content...",
+                    scale=4
+                )
+                self.clear_search_btn = gr.Button("Clear Search", scale=1)
+                self.refresh_btn = gr.Button("Refresh Data", scale=1, variant="secondary")
+            # Main data table
+            self.customer_table = gr.Dataframe(
+                headers=["Customer ID", "Company Name", "Analysis Date", "Key Changes", "Recommendations", "Email Strategy"],
+                datatype=["str", "str", "str", "str", "str", "str"],
+                interactive=False,
+                label="Customer Profile Analyses",
+                wrap=True,
+                max_height=600,
+                column_widths=["10%", "20%", "12%", "25%", "25%", "8%"],
+                value=self.load_customer_analyses()
+            )
+            # Detailed view accordion
+            with gr.Accordion("📋 Detailed Analysis View", open=False):
+                self.detailed_view = gr.HTML(
+                    value="Select a row from the table above to view detailed analysis",
+                    label="Detailed Analysis"
+                )
+            # Event handlers
+            self.search_input.change(
+                fn=self.filter_customer_analyses,
+                inputs=[self.search_input],
+                outputs=[self.customer_table]
+            )
+            self.clear_search_btn.click(
+                fn=self.clear_search,
+                inputs=[],
+                outputs=[self.search_input, self.customer_table]
+            )
+            self.refresh_btn.click(
+                fn=self.refresh_data,
+                inputs=[],
+                outputs=[self.customer_table, self.stats_view]
+            )
+            self.strategy_dropdown.change(
+                fn=self.filter_by_strategy,
+                inputs=[self.strategy_dropdown],
+                outputs=[self.customer_table]
+            )
+            self.priority_dropdown.change(
+                fn=self.filter_by_priority,
+                inputs=[self.priority_dropdown],
+                outputs=[self.customer_table]
+            )
+            self.clear_filters_btn.click(
+                fn=self.clear_filters,
+                inputs=[],
+                outputs=[self.strategy_dropdown, self.priority_dropdown, self.customer_table]
+            )
+            self.customer_table.select(
+                fn=self.show_detailed_analysis,
+                inputs=[self.customer_table],
+                outputs=[self.detailed_view]
+            )
+    def get_strategy_choices(self):
+        """Get unique email strategy choices for dropdown."""
+        if not self.analyses_cache:
+            return []
+        strategies = set()
+        for doc in self.analyses_cache:
+            strategy = doc.get("email_strategy", {}).get("email_type", "Unknown")
+            strategies.add(strategy)
+        return sorted(list(strategies))
+    def get_priority_choices(self):
+        """Get unique priority choices for dropdown."""
+        if not self.analyses_cache:
+            return []
+        priorities = set()
+        for doc in self.analyses_cache:
+            priority = doc.get("email_strategy", {}).get("priority", "Unknown")
+            priorities.add(priority)
+        return sorted(list(priorities))
+    def filter_by_strategy(self, strategy):
+        """Filter analyses by email strategy."""
+        if not strategy:
+            return self.load_customer_analyses()
+        if not self.analyses_cache:
+            return pd.DataFrame(columns=["Customer ID", "Company Name", "Analysis Date", "Key Changes", "Recommendations", "Email Strategy"])
+        try:
+            filtered_docs = []
+            for doc in self.analyses_cache:
+                doc_strategy = doc.get("email_strategy", {}).get("email_type", "Unknown")
+                if doc_strategy == strategy:
+                    filtered_docs.append(doc)
+            return self.format_analyses_for_table(filtered_docs)
+        except Exception as e:
+            print(f"❌ Error filtering by strategy: {e}")
+            return self.load_customer_analyses()
+    def filter_by_priority(self, priority):
+        """Filter analyses by priority level."""
+        if not priority:
+            return self.load_customer_analyses()
+        if not self.analyses_cache:
+            return pd.DataFrame(columns=["Customer ID", "Company Name", "Analysis Date", "Key Changes", "Recommendations", "Email Strategy"])
+        try:
+            filtered_docs = []
+            for doc in self.analyses_cache:
+                doc_priority = doc.get("email_strategy", {}).get("priority", "Unknown")
+                if doc_priority == priority:
+                    filtered_docs.append(doc)
+            return self.format_analyses_for_table(filtered_docs)
+        except Exception as e:
+            print(f"❌ Error filtering by priority: {e}")
+            return self.load_customer_analyses()
+    def clear_filters(self):
+        """Clear all filters and reload data."""
+        return None, None, self.load_customer_analyses()
+    def format_analyses_for_table(self, docs):
+        """Format a list of documents for table display."""
+        data = []
+        for doc in docs:
+            customer_id = str(doc.get("customer_id", "Unknown"))
+            company_name = doc.get("company_name", "Unknown Company")
+            analysis_date = doc.get("analysis_date", "Unknown Date")
+            # Format key changes with proper bullet points
+            key_changes = doc.get("key_changes", [])
+            key_changes_text = self.format_text_for_table(key_changes, max_items=3)
+            # Format recommendations with proper bullet points
+            recommendations = doc.get("recommendations", [])
+            recommendations_text = self.format_text_for_table(recommendations, max_items=3)
+            # Format email strategy
+            email_strategy = doc.get("email_strategy", {})
+            if isinstance(email_strategy, dict):
+                strategy_type = email_strategy.get("email_type", "Unknown")
+                priority = email_strategy.get("priority", "Unknown")
+                email_strategy_text = f"{strategy_type.replace('_', ' ').title()}\n({priority.title()})"
+            else:
+                email_strategy_text = str(email_strategy)[:60] + "..." if len(str(email_strategy)) > 60 else str(email_strategy)
+            data.append([
+                customer_id,
+                company_name,
+                analysis_date,
+                key_changes_text,
+                recommendations_text,
+                email_strategy_text
+            ])
+        return pd.DataFrame(data, columns=["Customer ID", "Company Name", "Analysis Date", "Key Changes", "Recommendations", "Email Strategy"])
+    def load_customer_analyses(self, limit: int = 100) -> pd.DataFrame:
+        """Load customer analyses from disk and format for display."""
+        if not self.analyses_cache:
+            return pd.DataFrame(columns=["Customer ID", "Company Name", "Analysis Date", "Key Changes", "Recommendations", "Email Strategy"])
+        try:
+            return self.format_analyses_for_table(self.analyses_cache[:limit])
+        except Exception as e:
+            print(f"❌ Error loading customer analyses: {e}")
+            return pd.DataFrame(columns=["Customer ID", "Company Name", "Analysis Date", "Key Changes", "Recommendations", "Email Strategy"])
+    def filter_customer_analyses(self, search_term: str) -> pd.DataFrame:
+        """Filter customer analyses based on search term."""
+        if not search_term.strip():
+            return self.load_customer_analyses()
+        if not self.analyses_cache:
+            return pd.DataFrame(columns=["Customer ID", "Company Name", "Analysis Date", "Key Changes", "Recommendations", "Email Strategy"])
+        try:
+            # Filter analyses based on search term (case-insensitive)
+            search_lower = search_term.lower()
+            filtered_docs = []
+            for doc in self.analyses_cache:
+                # Search in various fields
+                if (search_lower in str(doc.get("customer_id", "")).lower() or
+                    search_lower in doc.get("company_name", "").lower() or
+                    any(search_lower in change.lower() for change in doc.get("key_changes", [])) or
+                    any(search_lower in rec.lower() for rec in doc.get("recommendations", [])) or
+                    search_lower in str(doc.get("email_strategy", {}).get("key_messaging", "")).lower() or
+                    search_lower in str(doc.get("follow_up_email", {}).get("subject", "")).lower() or
+                    search_lower in str(doc.get("follow_up_email", {}).get("body", "")).lower()):
+                    filtered_docs.append(doc)
+            return self.format_analyses_for_table(filtered_docs[:100])  # Limit to 100 results
+        except Exception as e:
+            print(f"❌ Error filtering customer analyses: {e}")
+            return self.load_customer_analyses()
+    def clear_search(self):
+        """Clear search input and reload all data."""
+        return "", self.load_customer_analyses()
+    def refresh_data(self):
+        """Refresh the data from disk."""
+        self.load_analyses_from_disk()
+        return self.load_customer_analyses(), self.generate_statistics()
+    def show_detailed_analysis(self, table_data, evt: gr.SelectData):
+        """Show detailed analysis for selected row."""
+        try:
+            if evt.index[0] >= len(table_data):
+                return "Please select a valid row from the table."
+            # Get the row data using iloc for proper pandas indexing
+            row_data = table_data.iloc[evt.index[0]]
+            customer_id = str(row_data.iloc[0])  # Customer ID is the first column
+            # Find document in cache by customer_id
+            if not self.analyses_cache:
+                return "No analyses loaded from disk."
+            doc = None
+            for analysis in self.analyses_cache:
+                if str(analysis.get("customer_id")) == customer_id:
+                    doc = analysis
+                    break
+            if not doc:
+                return f"No detailed data found for customer {customer_id}"
+            # Format detailed analysis
+            html = self.format_detailed_analysis(doc)
+            return html
+        except Exception as e:
+            return f"Error loading detailed analysis: {str(e)}"
+    def format_detailed_analysis(self, doc: dict) -> str:
+        """Format detailed analysis as HTML."""
+        customer_id = doc.get("customer_id", "Unknown")
+        company_name = doc.get("company_name", "Unknown Company")
+        analysis_date = doc.get("analysis_date", "Unknown Date")
+        # Format key changes
+        key_changes = doc.get("key_changes", [])
+        key_changes_html = ""
+        if isinstance(key_changes, list):
+            for i, change in enumerate(key_changes, 1):
+                key_changes_html += f"<li>{change}</li>"
+        else:
+            key_changes_html = f"<li>{key_changes}</li>"
+        # Format recommendations
+        recommendations = doc.get("recommendations", [])
+        recommendations_html = ""
+        if isinstance(recommendations, list):
+            for i, rec in enumerate(recommendations, 1):
+                recommendations_html += f"<li>{rec}</li>"
+        else:
+            recommendations_html = f"<li>{recommendations}</li>"
+        # Format email strategy
+        email_strategy = doc.get("email_strategy", {})
+        strategy_html = ""
+        if isinstance(email_strategy, dict):
+            strategy_type = email_strategy.get("email_type", "Unknown")
+            priority = email_strategy.get("priority", "Unknown")
+            key_messaging = email_strategy.get("key_messaging", "No messaging provided")
+            call_to_action = email_strategy.get("call_to_action", "No call to action")
+            strategy_html = f"""
+            <div class="email-strategy">
+                <strong>Type:</strong> {strategy_type.title()}<br>
+                <strong>Priority:</strong> {priority.title()}<br>
+                <strong>Key Messaging:</strong> {key_messaging}<br>
+                <strong>Call to Action:</strong> {call_to_action}
+            </div>
+            """
+        else:
+            strategy_html = f"<div class='email-strategy'>{email_strategy}</div>"
+        # Format conversation insights
+        conversation_insights = doc.get("conversation_insights", [])
+        insights_html = ""
+        if conversation_insights:
+            insights_html = """
+            <div style="background-color: #f0f4f8; border-left: 4px solid #3b82f6; padding: 12px 16px; margin: 8px 0; border-radius: 4px;">
+                <h4 style="margin: 0 0 12px 0; color: #1e40af;">💬 Recent Conversation Insights</h4>
+            """
+            for i, conv in enumerate(conversation_insights[:5], 1):  # Show up to 5 conversations
+                title = conv.get('title', 'Unknown')
+                source = conv.get('source', 'Unknown')
+                datetime_str = conv.get('datetime', 'Unknown')
+                summary = conv.get('summary', 'No summary available')
+                key_findings = conv.get('key_findings', [])
+                insights_html += f"""
+                <div style="background-color: white; padding: 12px; margin: 8px 0; border-radius: 4px; border: 1px solid #e0e7ff;">
+                    <div style="font-weight: bold; color: #1e40af; margin-bottom: 4px;">
+                        {i}. {title}
+                    </div>
+                    <div style="font-size: 0.85em; color: #6b7280; margin-bottom: 8px;">
+                        {source} | {datetime_str}
+                    </div>
+                    <div style="margin-bottom: 8px;">
+                        <strong>Summary:</strong> {summary}
+                    </div>
+                """
+                if key_findings:
+                    insights_html += '<div style="margin-top: 8px;"><strong>Key Findings:</strong><ul style="margin: 4px 0; padding-left: 20px;">'
+                    for finding in key_findings[:3]:  # Show top 3 findings
+                        finding_text = finding.get('finding', '')
+                        impact = finding.get('impact', '')
+                        insight_type = finding.get('insight_type', '')
+                        # Color code by impact
+                        impact_color = "#dc2626" if impact.lower() == "high" else "#f59e0b" if impact.lower() == "medium" else "#10b981"
+                        insights_html += f"""
+                        <li style="margin: 4px 0;">
+                            <span style="background-color: {impact_color}; color: white; padding: 2px 6px; border-radius: 3px; font-size: 0.75em; font-weight: bold;">
+                                {insight_type}/{impact}
+                            </span>
+                            {finding_text}
+                        </li>
+                        """
+                    insights_html += '</ul></div>'
+                insights_html += '</div>'
+            insights_html += '</div>'
+        else:
+            insights_html = """
+            <div style="background-color: #fef3c7; border-left: 4px solid #f59e0b; padding: 12px 16px; margin: 8px 0; border-radius: 4px;">
+                <strong>ℹ️ No conversation insights available for this customer</strong>
+            </div>
+            """
+        # Format follow-up email
+        follow_up_email = doc.get("follow_up_email", {})
+        email_html = ""
+        if isinstance(follow_up_email, dict):
+            subject = follow_up_email.get("subject", "No Subject")
+            body = follow_up_email.get("body", "No body content")
+            call_to_action = follow_up_email.get("call_to_action", "No call to action")
+            priority = follow_up_email.get("priority", "Unknown")
+            # Color code priority
+            priority_color = "#dc2626" if priority.lower() == "high" else "#f59e0b" if priority.lower() == "medium" else "#10b981"
+            email_html = f"""
+            <div class="follow-up-email">
+                <strong>Subject:</strong> {subject}<br>
+                <strong>Priority:</strong> <span style="background-color: {priority_color}; color: white; padding: 2px 8px; border-radius: 3px; font-weight: bold;">{priority.upper()}</span><br>
+                <strong>Body:</strong><br>
+                <div style="margin-left: 20px; margin-top: 8px; white-space: pre-wrap;">{body}</div>
+                <br><strong>Call to Action:</strong> {call_to_action}
+            </div>
+            """
+        else:
+            email_html = f"<div class='follow-up-email'>{follow_up_email}</div>"
+        html = f"""
+        <div class="customer-card">
+            <div class="customer-title">{company_name} (ID: {customer_id})</div>
+            <div class="customer-meta">
+                <strong>Analysis Date:</strong> {analysis_date}
+            </div>
+            <h4>🔍 Key Changes</h4>
+            <div class="key-changes">
+                <ul>{key_changes_html}</ul>
+            </div>
+            <h4>💡 Recommendations</h4>
+            <div class="recommendations">
+                <ul>{recommendations_html}</ul>
+            </div>
+            <h4>💬 Conversation Insights</h4>
+            {insights_html}
+            <h4>📧 Email Strategy</h4>
+            {strategy_html}
+            <h4>📨 Follow-up Email</h4>
+            {email_html}
+        </div>
+        """
+        return html
+    def generate_statistics(self) -> str:
+        """Generate statistics about the customer analyses."""
+        if not self.analyses_cache:
+            return "No analyses loaded from disk."
+        try:
+            # Get total count
+            total_count = len(self.analyses_cache)
+            # Get email strategy distribution
+            strategy_counts = {}
+            for doc in self.analyses_cache:
+                strategy_type = doc.get("email_strategy", {}).get("email_type", "Unknown")
+                strategy_counts[strategy_type] = strategy_counts.get(strategy_type, 0) + 1
+            strategy_stats = [{"_id": k, "count": v} for k, v in sorted(strategy_counts.items(), key=lambda x: x[1], reverse=True)]
+            # Get priority distribution
+            priority_counts = {}
+            for doc in self.analyses_cache:
+                priority = doc.get("email_strategy", {}).get("priority", "Unknown")
+                priority_counts[priority] = priority_counts.get(priority, 0) + 1
+            priority_stats = [{"_id": k, "count": v} for k, v in sorted(priority_counts.items(), key=lambda x: x[1], reverse=True)]
+            # Count recent analyses (just show total for now since we don't have created_at timestamps)
+            recent_count = total_count
+            # Format statistics with enhanced visual design and clickable filters
+            stats_html = f"""
+            <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 12px; margin: 10px 0; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
+                <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 20px;">
+                    <!-- Total Counts Card -->
+                    <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px; backdrop-filter: blur(10px);">
+                        <h4 style="margin: 0 0 10px 0; color: #fff; font-size: 1.1em;">📊 Total Accounts</h4>
+                        <div style="font-size: 2em; font-weight: bold; color: #fff;">{total_count}</div>
+                    </div>
+                    <!-- Email Strategies Card -->
+                    <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px; backdrop-filter: blur(10px);">
+                        <h4 style="margin: 0 0 10px 0; color: #fff; font-size: 1.1em;">📧 Email Strategies</h4>
+                        <div style="font-size: 0.9em;">
+            """
+            for stat in strategy_stats:
+                strategy_type = stat["_id"] or "Unknown"
+                count = stat["count"]
+                percentage = (count / total_count * 100) if total_count > 0 else 0
+                stats_html += f"""
+                            <div style="display: flex; justify-content: space-between; margin: 5px 0; padding: 3px 0; border-bottom: 1px solid rgba(255,255,255,0.2);">
+                                <span style="cursor: pointer; padding: 2px 6px; border-radius: 3px; transition: background-color 0.2s;"
+                                      onmouseover="this.style.backgroundColor='rgba(255,255,255,0.2)'"
+                                      onmouseout="this.style.backgroundColor='transparent'"
+                                      onclick="filterByStrategy('{strategy_type}')">{strategy_type.title()}</span>
+                                <span style="font-weight: bold;">{count} ({percentage:.1f}%)</span>
+                            </div>
+                """
+            stats_html += """
+                        </div>
+                    </div>
+                    <!-- Priority Distribution Card -->
+                    <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px; backdrop-filter: blur(10px);">
+                        <h4 style="margin: 0 0 10px 0; color: #fff; font-size: 1.1em;">⚡ Priority Levels</h4>
+                        <div style="font-size: 0.9em;">
+            """
+            for stat in priority_stats:
+                priority = stat["_id"] or "Unknown"
+                count = stat["count"]
+                percentage = (count / total_count * 100) if total_count > 0 else 0
+                # Color code based on priority
+                color = "#ff6b6b" if priority.lower() == "high" else "#feca57" if priority.lower() == "medium" else "#48dbfb"
+                stats_html += f"""
+                            <div style="display: flex; justify-content: space-between; margin: 5px 0; padding: 3px 0; border-bottom: 1px solid rgba(255,255,255,0.2);">
+                                <span style="color: {color}; font-weight: bold; cursor: pointer; padding: 2px 6px; border-radius: 3px; transition: background-color 0.2s;"
+                                      onmouseover="this.style.backgroundColor='rgba(255,255,255,0.2)'"
+                                      onmouseout="this.style.backgroundColor='transparent'"
+                                      onclick="filterByPriority('{priority}')">{priority.title()}</span>
+                                <span style="font-weight: bold;">{count} ({percentage:.1f}%)</span>
+                            </div>
+                """
+            stats_html += """
+                        </div>
+                    </div>
+                </div>
+            </div>
+            <script>
+            function filterByStrategy(strategy) {
+                // This will be handled by Gradio's JavaScript interface
+                console.log('Filtering by strategy:', strategy);
+                // We'll implement this with Gradio's interface
+            }
+            function filterByPriority(priority) {
+                // This will be handled by Gradio's JavaScript interface
+                console.log('Filtering by priority:', priority);
+                // We'll implement this with Gradio's interface
+            }
+            </script>
+            """
+            return stats_html
+        except Exception as e:
+            return f"Error generating statistics: {str(e)}"
+    def launch(self, **kwargs):
+        """Launch the Gradio interface."""
+        return self.interface.launch(**kwargs)

tools/customer_profile_app.py ADDED Viewed

	@@ -0,0 +1,82 @@

+#!/usr/bin/env python3
+"""
+Customer Profile Analysis Dashboard App.
+This app provides a Gradio UI for viewing and searching customer profile analyses
+from the customer_profile_analyses MongoDB collection.
+"""
+import click
+from second_brain_online.application.ui.customer_profile_ui import CustomerProfileUI
+@click.command()
+@click.option(
+    "--host",
+    type=str,
+    default="127.0.0.1",
+    help="Host to run the server on",
+)
+@click.option(
+    "--port",
+    type=int,
+    default=7860,
+    help="Port to run the server on",
+)
+@click.option(
+    "--share",
+    is_flag=True,
+    default=False,
+    help="Create a public link for the interface",
+)
+@click.option(
+    "--debug",
+    is_flag=True,
+    default=False,
+    help="Enable debug mode",
+)
+def main(host: str, port: int, share: bool, debug: bool) -> None:
+    """Launch the Customer Profile Analysis Dashboard.
+    This dashboard allows you to:
+    - View all customer profile analyses in a searchable table
+    - Search by company name, customer ID, key changes, recommendations, or email content
+    - View detailed analysis for each customer
+    - See statistics about the analyses
+    Args:
+        host: Host to run the server on
+        port: Port to run the server on
+        share: Create a public link for the interface
+        debug: Enable debug mode
+    """
+    print("🚀 Starting Customer Profile Analysis Dashboard...")
+    print(f"📍 Server will be available at: http://{host}:{port}")
+    if share:
+        print("🌐 Creating public link...")
+    if debug:
+        print("🐛 Debug mode enabled")
+    try:
+        # Initialize the UI
+        ui = CustomerProfileUI()
+        # Launch the interface
+        ui.launch(
+            server_name=host,
+            server_port=port,
+            share=share,
+            debug=debug,
+            show_error=True,
+            quiet=False
+        )
+    except Exception as e:
+        print(f"❌ Failed to start the dashboard: {e}")
+        raise
+if __name__ == "__main__":
+    main()

tools/user_interaction_ui.py ADDED Viewed

	@@ -0,0 +1,516 @@

+#!/usr/bin/env python3
+"""
+User Interaction Analysis Dashboard
+A comprehensive UI for viewing and analyzing user interactions across
+Intercom chats and JustCall meetings with priority-based filtering.
+"""
+import gradio as gr
+from pymongo import MongoClient
+from typing import List, Dict, Any, Tuple, Optional
+import pandas as pd
+from loguru import logger
+# MongoDB Configuration
+MONGODB_URI = "mongodb+srv://contextdb:HOqIgSH01CoEiMb1@cluster0.d9cmff.mongodb.net/"
+DATABASE_NAME = "second_brain_course"
+COLLECTION_NAME = "user_interaction_analyses"
+class UserInteractionDashboard:
+    """Dashboard for user interaction analyses."""
+    def __init__(self):
+        """Initialize dashboard with MongoDB connection."""
+        self.client = MongoClient(MONGODB_URI)
+        self.db = self.client[DATABASE_NAME]
+        self.collection = self.db[COLLECTION_NAME]
+        logger.info(f"Connected to MongoDB: {DATABASE_NAME}.{COLLECTION_NAME}")
+    def get_summary_stats(self) -> Tuple[int, int, int, int, int, int]:
+        """Get summary statistics for the dashboard."""
+        total_users = self.collection.count_documents({})
+        # Count by priority
+        high_priority = self.collection.count_documents({"priority_level": "high"})
+        medium_priority = self.collection.count_documents({"priority_level": "medium"})
+        low_priority = self.collection.count_documents({"priority_level": "low"})
+        # Aggregate total conversations and meetings
+        pipeline = [
+            {
+                "$group": {
+                    "_id": None,
+                    "total_conversations": {"$sum": "$total_conversations"},
+                    "total_meetings": {"$sum": "$total_meetings"}
+                }
+            }
+        ]
+        agg_result = list(self.collection.aggregate(pipeline))
+        total_conversations = agg_result[0]["total_conversations"] if agg_result else 0
+        total_meetings = agg_result[0]["total_meetings"] if agg_result else 0
+        return (
+            total_users,
+            total_conversations,
+            total_meetings,
+            high_priority,
+            medium_priority,
+            low_priority
+        )
+    def get_users_data(self, priority_filter: Optional[str] = None) -> pd.DataFrame:
+        """Get user data for table display with optional priority filter."""
+        # Build query
+        query = {}
+        if priority_filter and priority_filter != "All":
+            query["priority_level"] = priority_filter.lower()
+        # Fetch documents
+        users = list(self.collection.find(query))
+        if not users:
+            return pd.DataFrame(columns=[
+                "User ID", "Conversations", "Meetings",
+                "Conv Key Findings", "Meeting Key Findings", "Priority"
+            ])
+        # Transform to table format
+        table_data = []
+        for user in users:
+            user_id = user.get("user_id", "")
+            # Get conversation IDs
+            conv_ids = user.get("conversation_ids", [])
+            conv_ids_str = ", ".join(conv_ids[:3])  # Show first 3
+            if len(conv_ids) > 3:
+                conv_ids_str += f" (+{len(conv_ids) - 3} more)"
+            # Get meeting IDs
+            meeting_ids = user.get("meeting_ids", [])
+            meeting_ids_str = ", ".join(meeting_ids[:3])  # Show first 3
+            if len(meeting_ids) > 3:
+                meeting_ids_str += f" (+{len(meeting_ids) - 3} more)"
+            # Get key findings from conversation level
+            conv_insights = user.get("conversation_level_insights", {})
+            conv_findings = conv_insights.get("aggregated_marketing_insights", {}).get("key_findings", [])
+            conv_findings_str = f"{len(conv_findings)} findings"
+            # Get key findings from meeting level
+            meeting_insights = user.get("meeting_level_insights", {})
+            meeting_findings = meeting_insights.get("aggregated_marketing_insights", {}).get("key_findings", [])
+            meeting_findings_str = f"{len(meeting_findings)} findings"
+            priority = user.get("priority_level", "unknown").upper()
+            table_data.append({
+                "User ID": user_id,
+                "Conversations": conv_ids_str,
+                "Meetings": meeting_ids_str,
+                "Conv Key Findings": conv_findings_str,
+                "Meeting Key Findings": meeting_findings_str,
+                "Priority": priority,
+                "_raw": user  # Store raw data for detail view
+            })
+        df = pd.DataFrame(table_data)
+        return df
+    def get_user_detail(self, df: pd.DataFrame, evt: gr.SelectData) -> str:
+        """Get detailed view of selected user."""
+        if df is None or len(df) == 0:
+            return "No user selected"
+        try:
+            selected_row = evt.index[0] if isinstance(evt.index, list) else evt.index
+            user_data = df.iloc[selected_row]["_raw"]
+            # Build detailed HTML view
+            html = f"""
+            <div style="font-family: Arial, sans-serif; padding: 20px;">
+                <h2 style="color: #2563eb;">User Profile: {user_data.get('user_id', 'N/A')}</h2>
+                <p><strong>Priority Level:</strong> <span style="color: {'#dc2626' if user_data.get('priority_level') == 'high' else '#f59e0b' if user_data.get('priority_level') == 'medium' else '#16a34a'}; font-weight: bold;">{user_data.get('priority_level', 'unknown').upper()}</span></p>
+                <p><strong>Analysis Date:</strong> {user_data.get('analysis_timestamp', 'N/A')}</p>
+                <hr style="margin: 20px 0;">
+                <h3 style="color: #7c3aed;">📊 Overview</h3>
+                <ul>
+                    <li><strong>Total Conversations:</strong> {user_data.get('total_conversations', 0)}</li>
+                    <li><strong>Total Meetings:</strong> {user_data.get('total_meetings', 0)}</li>
+                    <li><strong>Conversation Chunks:</strong> {user_data.get('total_conversation_chunks', 0)}</li>
+                    <li><strong>Meeting Chunks:</strong> {user_data.get('total_meeting_chunks', 0)}</li>
+                </ul>
+                <hr style="margin: 20px 0;">
+                <h3 style="color: #0891b2;">💬 Conversation Level Insights (Intercom)</h3>
+            """
+            # Conversation insights
+            conv_insights = user_data.get("conversation_level_insights", {})
+            conv_summary = conv_insights.get("conversation_summary", "No summary available")
+            html += f"<p><strong>Summary:</strong> {conv_summary}</p>"
+            # Conversation quotes
+            conv_marketing = conv_insights.get("aggregated_marketing_insights", {})
+            conv_quotes = conv_marketing.get("quotes", [])
+            if conv_quotes:
+                html += "<h4>Key Quotes:</h4><ul>"
+                for quote in conv_quotes[:5]:  # Show first 5
+                    html += f"""
+                    <li>
+                        <strong>"{quote.get('quote', '')}"</strong>
+                        <br><em>Context:</em> {quote.get('context', '')}
+                        <br><em>Sentiment:</em> {quote.get('sentiment', '')}
+                    </li>
+                    """
+                html += "</ul>"
+            # Conversation findings
+            conv_findings = conv_marketing.get("key_findings", [])
+            if conv_findings:
+                html += "<h4>Key Findings:</h4><ul>"
+                for finding in conv_findings[:5]:  # Show first 5
+                    impact_color = "#dc2626" if finding.get("impact") == "high" else "#f59e0b" if finding.get("impact") == "medium" else "#16a34a"
+                    html += f"""
+                    <li>
+                        <strong>{finding.get('finding', '')}</strong>
+                        <br><em>Evidence:</em> {finding.get('evidence', '')}
+                        <br><em>Impact:</em> <span style="color: {impact_color}; font-weight: bold;">{finding.get('impact', '').upper()}</span>
+                    </li>
+                    """
+                html += "</ul>"
+            html += "<hr style='margin: 20px 0;'>"
+            # Meeting insights
+            html += "<h3 style='color: #ea580c;'>📞 Meeting Level Insights (JustCall)</h3>"
+            meeting_insights = user_data.get("meeting_level_insights", {})
+            meeting_summary = meeting_insights.get("meeting_summary", "No summary available")
+            html += f"<p><strong>Summary:</strong> {meeting_summary}</p>"
+            # Meeting quotes
+            meeting_marketing = meeting_insights.get("aggregated_marketing_insights", {})
+            meeting_quotes = meeting_marketing.get("quotes", [])
+            if meeting_quotes:
+                html += "<h4>Key Quotes:</h4><ul>"
+                for quote in meeting_quotes[:5]:  # Show first 5
+                    html += f"""
+                    <li>
+                        <strong>"{quote.get('quote', '')}"</strong>
+                        <br><em>Context:</em> {quote.get('context', '')}
+                        <br><em>Sentiment:</em> {quote.get('sentiment', '')}
+                    </li>
+                    """
+                html += "</ul>"
+            # Meeting findings
+            meeting_findings = meeting_marketing.get("key_findings", [])
+            if meeting_findings:
+                html += "<h4>Key Findings:</h4><ul>"
+                for finding in meeting_findings[:5]:  # Show first 5
+                    impact_color = "#dc2626" if finding.get("impact") == "high" else "#f59e0b" if finding.get("impact") == "medium" else "#16a34a"
+                    html += f"""
+                    <li>
+                        <strong>{finding.get('finding', '')}</strong>
+                        <br><em>Evidence:</em> {finding.get('evidence', '')}
+                        <br><em>Impact:</em> <span style="color: {impact_color}; font-weight: bold;">{finding.get('impact', '').upper()}</span>
+                    </li>
+                    """
+                html += "</ul>"
+            html += "<hr style='margin: 20px 0;'>"
+            # Unified insights
+            html += "<h3 style='color: #059669;'>🎯 Unified Insights</h3>"
+            unified_summary = user_data.get("unified_insights", {}).get("unified_summary", "No unified summary available")
+            html += f"<p><strong>Summary:</strong> {unified_summary}</p>"
+            # User journey
+            user_journey = user_data.get("user_journey_summary", "No journey summary available")
+            html += f"<h4>User Journey:</h4><p>{user_journey}</p>"
+            # Cross-interaction patterns
+            patterns = user_data.get("cross_interaction_patterns", [])
+            if patterns:
+                html += "<h4>Cross-Interaction Patterns:</h4><ul>"
+                for pattern in patterns:
+                    html += f"<li>{pattern}</li>"
+                html += "</ul>"
+            # Follow-up recommendations
+            recommendations = user_data.get("unified_follow_up_recommendations", "No recommendations available")
+            html += f"<h4>Follow-up Recommendations:</h4><p style='background: #f3f4f6; padding: 15px; border-radius: 5px;'>{recommendations}</p>"
+            html += "</div>"
+            return html
+        except Exception as e:
+            logger.error(f"Error getting user detail: {e}")
+            return f"Error loading user details: {str(e)}"
+    def filter_by_priority(self, priority: str) -> Tuple[pd.DataFrame, str]:
+        """Filter users by priority level."""
+        df = self.get_users_data(priority_filter=priority)
+        # Remove the _raw column for display
+        display_df = df.drop(columns=["_raw"]) if "_raw" in df.columns else df
+        return display_df, f"Showing {len(df)} users with {priority} priority"
+    def search_table(self, df: pd.DataFrame, search_term: str) -> pd.DataFrame:
+        """Search across all columns in the table."""
+        if not search_term or df is None or len(df) == 0:
+            return df
+        # Search across all string columns
+        mask = df.astype(str).apply(
+            lambda row: row.str.contains(search_term, case=False, na=False).any(),
+            axis=1
+        )
+        return df[mask]
+def create_dashboard():
+    """Create the Gradio dashboard."""
+    dashboard = UserInteractionDashboard()
+    # Get initial stats
+    total_users, total_convs, total_meetings, high_count, medium_count, low_count = dashboard.get_summary_stats()
+    # Custom CSS for better styling
+    custom_css = """
+    .priority-btn {
+        font-size: 18px !important;
+        font-weight: bold !important;
+        padding: 15px 30px !important;
+        border-radius: 8px !important;
+    }
+    .stats-box {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        padding: 20px;
+        border-radius: 10px;
+        color: white;
+        text-align: center;
+    }
+    """
+    with gr.Blocks(css=custom_css, title="User Interaction Analysis Dashboard") as demo:
+        # Header
+        gr.Markdown("# 🎯 User Interaction Analysis Dashboard")
+        gr.Markdown("*Analyzing user interactions across Intercom chats and JustCall meetings*")
+        # ============================================================
+        # SECTION 1: Summary Statistics and Priority Filters
+        # ============================================================
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown(f"""
+                <div class="stats-box">
+                    <h2>{total_users}</h2>
+                    <p>Total Users Analyzed</p>
+                </div>
+                """)
+            with gr.Column(scale=1):
+                gr.Markdown(f"""
+                <div class="stats-box">
+                    <h2>{total_convs}</h2>
+                    <p>Intercom Conversations</p>
+                </div>
+                """)
+            with gr.Column(scale=1):
+                gr.Markdown(f"""
+                <div class="stats-box">
+                    <h2>{total_meetings}</h2>
+                    <p>JustCall Meetings</p>
+                </div>
+                """)
+        gr.Markdown("---")
+        # Priority Filter Buttons
+        gr.Markdown("### 🎚️ Filter by Priority Level")
+        with gr.Row():
+            high_btn = gr.Button(
+                f"🔴 High Priority ({high_count})",
+                elem_classes=["priority-btn"],
+                variant="primary",
+                scale=1
+            )
+            medium_btn = gr.Button(
+                f"🟡 Medium Priority ({medium_count})",
+                elem_classes=["priority-btn"],
+                variant="secondary",
+                scale=1
+            )
+            low_btn = gr.Button(
+                f"🟢 Low Priority ({low_count})",
+                elem_classes=["priority-btn"],
+                variant="secondary",
+                scale=1
+            )
+            all_btn = gr.Button(
+                f"⚪ All Users ({total_users})",
+                elem_classes=["priority-btn"],
+                variant="secondary",
+                scale=1
+            )
+        filter_status = gr.Textbox(
+            label="Filter Status",
+            value=f"Showing all {total_users} users",
+            interactive=False
+        )
+        gr.Markdown("---")
+        # ============================================================
+        # SECTION 2: User Data Table with Search
+        # ============================================================
+        gr.Markdown("### 📊 User Interaction Data")
+        search_box = gr.Textbox(
+            label="🔍 Search across all columns",
+            placeholder="Search by User ID, Conversation ID, Meeting ID, findings...",
+            scale=1
+        )
+        # Get initial data
+        initial_df = dashboard.get_users_data()
+        display_df = initial_df.drop(columns=["_raw"]) if "_raw" in initial_df.columns else initial_df
+        user_table = gr.Dataframe(
+            value=display_df,
+            label="User Interactions",
+            interactive=False,
+            wrap=True
+        )
+        # Hidden state to store full dataframe with _raw data
+        full_data_state = gr.State(value=initial_df)
+        filtered_data_state = gr.State(value=initial_df)
+        gr.Markdown("---")
+        # ============================================================
+        # SECTION 3: Detailed User View
+        # ============================================================
+        gr.Markdown("### 👤 User Details")
+        gr.Markdown("*Click on any row in the table above to see detailed analysis*")
+        user_detail = gr.HTML(
+            value="<p style='text-align: center; color: #6b7280; padding: 40px;'>Select a user from the table above to view detailed insights</p>"
+        )
+        # ============================================================
+        # Event Handlers
+        # ============================================================
+        def filter_high():
+            df = dashboard.get_users_data(priority_filter="High")
+            display = df.drop(columns=["_raw"]) if "_raw" in df.columns else df
+            return display, df, df, f"Showing {len(df)} HIGH priority users"
+        def filter_medium():
+            df = dashboard.get_users_data(priority_filter="Medium")
+            display = df.drop(columns=["_raw"]) if "_raw" in df.columns else df
+            return display, df, df, f"Showing {len(df)} MEDIUM priority users"
+        def filter_low():
+            df = dashboard.get_users_data(priority_filter="Low")
+            display = df.drop(columns=["_raw"]) if "_raw" in df.columns else df
+            return display, df, df, f"Showing {len(df)} LOW priority users"
+        def filter_all():
+            df = dashboard.get_users_data(priority_filter=None)
+            display = df.drop(columns=["_raw"]) if "_raw" in df.columns else df
+            return display, df, df, f"Showing all {len(df)} users"
+        def search_users(search_term: str, current_filtered_df: pd.DataFrame):
+            """Search within currently filtered data."""
+            if not search_term:
+                # Return the current filtered data
+                display = current_filtered_df.drop(columns=["_raw"]) if "_raw" in current_filtered_df.columns else current_filtered_df
+                return display
+            # Search in the filtered data
+            if current_filtered_df is None or len(current_filtered_df) == 0:
+                return pd.DataFrame()
+            # Create a copy for searching
+            search_df = current_filtered_df.copy()
+            # Search across all visible columns (excluding _raw)
+            visible_cols = [col for col in search_df.columns if col != "_raw"]
+            mask = search_df[visible_cols].astype(str).apply(
+                lambda row: row.str.contains(search_term, case=False, na=False).any(),
+                axis=1
+            )
+            result_df = search_df[mask]
+            display = result_df.drop(columns=["_raw"]) if "_raw" in result_df.columns else result_df
+            return display
+        def show_detail(evt: gr.SelectData, full_data: pd.DataFrame):
+            """Show detailed view when row is selected."""
+            return dashboard.get_user_detail(full_data, evt)
+        # Wire up event handlers
+        high_btn.click(
+            fn=filter_high,
+            outputs=[user_table, filtered_data_state, full_data_state, filter_status]
+        )
+        medium_btn.click(
+            fn=filter_medium,
+            outputs=[user_table, filtered_data_state, full_data_state, filter_status]
+        )
+        low_btn.click(
+            fn=filter_low,
+            outputs=[user_table, filtered_data_state, full_data_state, filter_status]
+        )
+        all_btn.click(
+            fn=filter_all,
+            outputs=[user_table, filtered_data_state, full_data_state, filter_status]
+        )
+        search_box.change(
+            fn=search_users,
+            inputs=[search_box, filtered_data_state],
+            outputs=[user_table]
+        )
+        user_table.select(
+            fn=show_detail,
+            inputs=[full_data_state],
+            outputs=[user_detail]
+        )
+    return demo
+if __name__ == "__main__":
+    logger.info("Starting User Interaction Analysis Dashboard...")
+    demo = create_dashboard()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7861,
+        share=False
+    )