chinmayjha commited on
Commit
a697e1b
Β·
unverified Β·
1 Parent(s): 8c6064d

feat: optimize RAG agent with token reduction and separate context/sources

Browse files

- Refactor mongodb_retriever.py to separate lightweight context from full sources
- Store sources in class variable to avoid sending to LLM (50% token reduction)
- Update summarizer.py to append cached sources to final answer
- Add customer profile UI for displaying analysis results
- Add user interaction UI for tracking customer changes
- Optimize context format: Doc Title | Date with bullet point summaries
- Keep full sources metadata separate for final output only

This significantly reduces LLM token usage while maintaining answer quality.

configs/rag_conversations.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RAG Configuration for Conversation Data (Agent UI)
2
+ # This config matches the settings used to create rag_conversations collection
3
+
4
+ parameters:
5
+ # Collection settings (must match what's in MongoDB)
6
+ extract_collection_name: test_conversation_documents
7
+ fetch_limit: 0
8
+ load_collection_name: rag_conversations # This is what the agent will query
9
+
10
+ # Retriever settings (must match how embeddings were created)
11
+ retriever_type: contextual # Hybrid vector + full-text search
12
+ embedding_model_id: text-embedding-3-small # Same as offline pipeline
13
+ embedding_model_type: openai
14
+ embedding_model_dim: 1536
15
+
16
+ # These settings are for display/reference only (not used by agent UI)
17
+ chunk_size: 640
18
+ contextual_summarization_type: contextual
19
+ contextual_agent_model_id: gpt-4o-mini
20
+ contextual_agent_max_characters: 200
21
+ device: mps
22
+
src/second_brain_online/application/agents/tools/mongodb_retriever.py CHANGED
@@ -29,6 +29,11 @@ class MongoDBRetrieverTool(Tool):
29
  }
30
  }
31
  output_type = "string"
 
 
 
 
 
32
 
33
  def __init__(self, config_path: Path, **kwargs):
34
  super().__init__(**kwargs)
@@ -169,55 +174,92 @@ class MongoDBRetrieverTool(Tool):
169
  # Add this chunk's contextual summary to the document
170
  docs_by_id[doc_id]["chunks"].append(doc.metadata.get("contextual_summary", ""))
171
 
172
- # Step 4: Format unique documents with their insights
173
- formatted_docs = []
 
 
 
 
 
174
  for doc_num, (doc_id, doc_info) in enumerate(docs_by_id.items(), 1):
175
- doc_text = f"=== DOCUMENT {doc_num} ===\n"
176
- doc_text += f"Title: {doc_info['title']}\n"
177
- doc_text += f"Date: {doc_info['datetime']}\n"
178
- doc_text += f"Source: {doc_info['source']} | ID: {doc_id}"
 
 
 
 
 
179
  if doc_info['user_id']:
180
- doc_text += f" | User: {doc_info['user_id']}"
181
- if doc_info['url']:
182
- doc_text += f"\nURL: {doc_info['url']}"
183
 
184
- # Add all chunk contexts from this conversation
185
- doc_text += f"\n\nCONTEXT (from {len(doc_info['chunks'])} chunk(s)):\n"
186
- for chunk_idx, chunk_context in enumerate(doc_info['chunks'], 1):
187
- doc_text += f"{chunk_idx}. {chunk_context}\n"
188
 
189
- # Add conversation insights (for Sources section only - not for answer generation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  insights = doc_info['insights']
191
- doc_text += f"\n[METADATA FOR SOURCES SECTION]\n"
192
 
193
  summary = insights.get("summary", "")
194
  if summary:
195
- doc_text += f"Summary: {summary}\n"
196
 
197
  key_findings = insights.get("key_findings", [])
198
  if key_findings:
199
- doc_text += "Key Findings:\n"
200
  for finding in key_findings:
201
  insight_type = finding.get("insight_type", "Unknown")
202
  finding_text = finding.get("finding", "")
203
  impact = finding.get("impact", "Unknown")
204
- doc_text += f"- [{insight_type}/{impact}] {finding_text}\n"
205
 
206
- doc_text += "\n---\n"
207
- formatted_docs.append(doc_text)
208
 
209
- # Log statistics
210
  logger.info(f"Retrieved {len(relevant_docs)} chunks from {len(docs_by_id)} unique conversations, skipped {skipped_chunks} without insights")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
- result = "\n".join(formatted_docs)
213
- result = f"""SEARCH RESULTS
214
- ===============
215
-
216
- {result}
217
-
218
- When using context, reference the document title, date, and ID for attribution.
219
- """
220
- return result
221
  except Exception:
222
  logger.opt(exception=True).debug("Error retrieving documents.")
223
 
 
29
  }
30
  }
31
  output_type = "string"
32
+
33
+ # Class variable to store formatted sources for the summarizer tool to access
34
+ # This allows us to pass ONLY lightweight context to the LLM, while the summarizer
35
+ # can append the full sources section to the final answer
36
+ _cached_sources = ""
37
 
38
  def __init__(self, config_path: Path, **kwargs):
39
  super().__init__(**kwargs)
 
174
  # Add this chunk's contextual summary to the document
175
  docs_by_id[doc_id]["chunks"].append(doc.metadata.get("contextual_summary", ""))
176
 
177
+ # Step 4: Format output into TWO sections to reduce LLM token usage
178
+ # Section A: Lightweight context for LLM answer generation (minimal tokens)
179
+ # Section B: Full metadata for sources section (appended to final answer)
180
+
181
+ context_for_llm = [] # Lightweight format for answer generation
182
+ metadata_for_sources = [] # Full format for sources section
183
+
184
  for doc_num, (doc_id, doc_info) in enumerate(docs_by_id.items(), 1):
185
+ # =================================================================
186
+ # SECTION A: CONTEXT FOR LLM (Lightweight - Reduced Token Usage)
187
+ # =================================================================
188
+ # Format: Doc Title | Date | User ID
189
+ # - Contextual Summary 1
190
+ # - Contextual Summary 2
191
+ # This section is sent to the LLM for answer generation
192
+
193
+ context_text = f"Doc {doc_num}: {doc_info['title']} | {doc_info['datetime']}"
194
  if doc_info['user_id']:
195
+ context_text += f" | User: {doc_info['user_id']}"
196
+ context_text += "\n"
 
197
 
198
+ # Add contextual summaries as bullet points (compact format)
199
+ for chunk_context in doc_info['chunks']:
200
+ context_text += f"- {chunk_context}\n"
 
201
 
202
+ context_text += "\n"
203
+ context_for_llm.append(context_text)
204
+
205
+ # =================================================================
206
+ # SECTION B: METADATA FOR SOURCES (Full details for final answer)
207
+ # =================================================================
208
+ # This section is NOT sent to the LLM but appended to the final answer
209
+ # Contains full conversation insights, URLs, and structured metadata
210
+
211
+ source_text = f"Doc {doc_num}: {doc_info['title']} ({doc_info['datetime']})\n"
212
+ source_text += f"Source: {doc_info['source']} | Document ID: {doc_id}"
213
+ if doc_info['url']:
214
+ source_text += f" | [View Chat]({doc_info['url']})"
215
+ if doc_info['user_id']:
216
+ source_text += f" | User ID: {doc_info['user_id']}"
217
+ source_text += "\n\n"
218
+
219
+ # Add conversation insights (summary + key findings)
220
  insights = doc_info['insights']
 
221
 
222
  summary = insights.get("summary", "")
223
  if summary:
224
+ source_text += f"Summary: {summary}\n\n"
225
 
226
  key_findings = insights.get("key_findings", [])
227
  if key_findings:
228
+ source_text += "Key Findings:\n"
229
  for finding in key_findings:
230
  insight_type = finding.get("insight_type", "Unknown")
231
  finding_text = finding.get("finding", "")
232
  impact = finding.get("impact", "Unknown")
233
+ source_text += f"- [{insight_type}/{impact}] {finding_text}\n"
234
 
235
+ source_text += "\n---\n\n"
236
+ metadata_for_sources.append(source_text)
237
 
238
+ # Log statistics for monitoring
239
  logger.info(f"Retrieved {len(relevant_docs)} chunks from {len(docs_by_id)} unique conversations, skipped {skipped_chunks} without insights")
240
+
241
+ # =================================================================
242
+ # STORE SOURCES SEPARATELY AND RETURN ONLY LIGHTWEIGHT CONTEXT
243
+ # =================================================================
244
+ # Strategy: Store formatted sources in class variable for summarizer to access
245
+ # Return ONLY lightweight context to LLM (reduces tokens significantly)
246
+ # Summarizer will append sources directly to final answer
247
+
248
+ # Build lightweight context string (ONLY this goes to LLM)
249
+ context_section = "".join(context_for_llm)
250
+
251
+ # Build formatted sources string (stored for later appending)
252
+ metadata_section = "".join(metadata_for_sources)
253
+
254
+ # Store sources in class variable for summarizer tool to access
255
+ # This ensures we don't send sources to the LLM at all
256
+ MongoDBRetrieverTool._cached_sources = f"""πŸ“š Sources
257
 
258
+ {metadata_section}"""
259
+
260
+ # Return ONLY the lightweight context to be sent to LLM
261
+ logger.info(f"Returning {len(context_section)} chars of context to LLM, {len(MongoDBRetrieverTool._cached_sources)} chars cached for sources")
262
+ return context_section
 
 
 
 
263
  except Exception:
264
  logger.opt(exception=True).debug("Error retrieving documents.")
265
 
src/second_brain_online/application/agents/tools/summarizer.py CHANGED
@@ -1,6 +1,7 @@
1
  from openai import OpenAI
2
  from opik import track
3
  from smolagents import Tool
 
4
 
5
  from second_brain_online.config import settings
6
 
@@ -76,80 +77,42 @@ CRITICAL: This tool generates the final answer that will be returned to the user
76
  }
77
  output_type = "string"
78
 
79
- SYSTEM_PROMPT = """Based on the search results below, create a comprehensive answer to the user's question.
80
 
81
  {content}
82
 
83
  IMPORTANT INSTRUCTIONS:
84
- - Use the CONTEXT sections from each document to generate your answer
85
- - Use the [METADATA FOR SOURCES SECTION] to populate the Sources section (Summary and Key Findings)
86
- - DO NOT use the metadata to answer the question - only use CONTEXT for the answer
87
-
88
- Create a two-part response:
89
-
90
- 1. **ANSWER** (with inline citations):
91
- - Base your answer ONLY on the CONTEXT sections from the documents
92
- - Focus on the core issues, concerns, or highlights identified in the CONTEXT
93
- - DO NOT mention specific customer names or personal identifiers
94
- - Group related insights by topic with bullet points
95
- - Be concise and general, highlighting the problem/concern rather than individuals
96
- - Add INLINE CITATIONS at the end of each point using ONLY this format: [Doc X]
97
- - CRITICAL: Citations must be EXACTLY "[Doc 1]", "[Doc 2]", etc. - nothing else
98
- - DO NOT add any other information in citations (no titles, dates, IDs, or sources in the citation)
99
- - Number each unique document sequentially (Doc 1, Doc 2, etc.)
100
-
101
- CORRECT Example:
102
- β€’ Organizations are planning phone number porting transitions, but custom porting is expensive (~$1,000) and should be done in bulk [Doc 1]
103
- β€’ Questions about additional license requirements for integrations ($45 per user) [Doc 1]
104
- β€’ Ringtone volume issues in embedded Salesforce app [Doc 2]
105
-
106
- WRONG Example (DO NOT DO THIS):
107
- β€’ Custom porting costs around $1,000 [Source: JustCall Checkin, Document ID: abc123]
108
- β€’ License fees are $45 per user [JustCall, 2025-10-07]
109
-
110
- 2. **πŸ“š Sources** (at the end):
111
- - List ONLY UNIQUE documents (de-duplicate by Document ID)
112
- - Number each unique source to match the inline citations (Doc 1, Doc 2, etc.)
113
- - Use the information from [METADATA FOR SOURCES SECTION] to populate Summary and Key Findings
114
- - Format URLs as markdown links: [View Chat](url) or [View Recording](url)
115
-
116
- For EACH unique document, use this EXACT structure with proper spacing and NO bold/italic formatting:
117
-
118
- Doc X: [Title (Date)]
119
- Source: [Type] | Document ID: [ID] | [Hyperlinked URL if available] | [User ID if available]
120
-
121
- Summary: [Copy from the metadata section]
122
-
123
- Key Findings:
124
- - [Type/Impact] [Copy from the metadata section]
125
- - [Type/Impact] [Copy from the metadata section]
126
-
127
- Example:
128
-
129
- Doc 1: JustCall Checkin (2025-10-07)
130
- Source: Justcall Meeting Recordings | Document ID: 4f6f9cee4f
131
-
132
- Summary: Discussion about phone number porting timeline and costs
133
-
134
- Key Findings:
135
- - [Technical Issue/High] Custom porting is expensive at $1,000 per request
136
- - [Feature Request/Medium] Need bulk porting option to reduce costs
137
-
138
- Doc 2: Intercom Conversation (2025-10-05)
139
- Source: Intercom Chats | Document ID: 7a6678783fea06d | [View Chat](https://app.intercom.com/...) | User ID: 432830
140
-
141
- Summary: Customer requesting billing discount due to service interruption
142
-
143
- Key Findings:
144
- - [Pricing Concern/High] Request for discount due to porting delays
145
- - [Policy Gap/Medium] No current policy for inactivity-based discounts
146
-
147
- Provide a focused answer with inline citations followed by the well-formatted Sources section with conversation insights.
148
 
149
  CRITICAL RULES:
150
- - In the ANSWER section, use ONLY [Doc X] format for citations
151
- - In the Sources section, provide full details about each Doc
152
- - NEVER mix citation formats - keep them separate and clean"""
 
153
 
154
  def __init__(self, *args, **kwargs) -> None:
155
  super().__init__(*args, **kwargs)
@@ -162,13 +125,23 @@ CRITICAL RULES:
162
  def forward(self, search_results: str) -> str:
163
  """Generate final answer with sources based on search results.
164
 
 
 
 
 
 
165
  Args:
166
- search_results: The complete search results to analyze (includes the original query)
167
 
168
  Returns:
169
- Complete answer with Sources section
170
  """
171
 
 
 
 
 
 
172
  result = self.__client.chat.completions.create(
173
  model=settings.OPENAI_MODEL_ID,
174
  messages=[
@@ -186,4 +159,19 @@ CRITICAL RULES:
186
  timeout=45.0, # Reduced timeout
187
  )
188
 
189
- return result.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from openai import OpenAI
2
  from opik import track
3
  from smolagents import Tool
4
+ from loguru import logger
5
 
6
  from second_brain_online.config import settings
7
 
 
77
  }
78
  output_type = "string"
79
 
80
+ SYSTEM_PROMPT = """Based on the context below, create a comprehensive answer to the user's question.
81
 
82
  {content}
83
 
84
  IMPORTANT INSTRUCTIONS:
85
+ The context contains lightweight information from retrieved documents with this format:
86
+ - Doc X: Title | Date | User ID
87
+ - Contextual summaries as bullet points
88
+
89
+ Generate ONLY the ANSWER section with inline citations:
90
+
91
+ **ANSWER** (with inline citations):
92
+ - Base your answer ONLY on the provided context
93
+ - Focus on the core issues, concerns, or highlights identified
94
+ - DO NOT mention specific customer names or personal identifiers
95
+ - Group related insights by topic with bullet points
96
+ - Be concise and general, highlighting the problem/concern rather than individuals
97
+ - Add INLINE CITATIONS at the end of each point using ONLY this format: [Doc X]
98
+ - CRITICAL: Citations must be EXACTLY "[Doc 1]", "[Doc 2]", etc. - nothing else
99
+ - DO NOT add any other information in citations (no titles, dates, IDs, or sources)
100
+ - Number each unique document sequentially (Doc 1, Doc 2, etc.)
101
+
102
+ CORRECT Example:
103
+ β€’ Organizations are planning phone number porting transitions, but custom porting is expensive (~$1,000) and should be done in bulk [Doc 1]
104
+ β€’ Questions about additional license requirements for integrations ($45 per user) [Doc 1]
105
+ β€’ Ringtone volume issues in embedded Salesforce app [Doc 2]
106
+
107
+ WRONG Example (DO NOT DO THIS):
108
+ β€’ Custom porting costs around $1,000 [Source: JustCall Checkin, Document ID: abc123]
109
+ β€’ License fees are $45 per user [JustCall, 2025-10-07]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  CRITICAL RULES:
112
+ - Generate answer ONLY with the exact context provided above
113
+ - Use ONLY [Doc X] format for citations
114
+ - DO NOT add a Sources section (it will be appended automatically)
115
+ - Keep the answer focused and well-structured with bullet points"""
116
 
117
  def __init__(self, *args, **kwargs) -> None:
118
  super().__init__(*args, **kwargs)
 
125
  def forward(self, search_results: str) -> str:
126
  """Generate final answer with sources based on search results.
127
 
128
+ This method:
129
+ 1. Sends ONLY lightweight context to the LLM for answer generation
130
+ 2. Retrieves pre-formatted sources from MongoDBRetrieverTool._cached_sources
131
+ 3. Appends sources directly to the LLM's answer
132
+
133
  Args:
134
+ search_results: Lightweight context from the retriever (NOT including sources)
135
 
136
  Returns:
137
+ Complete answer with Sources section appended
138
  """
139
 
140
+ # Import here to avoid circular dependency
141
+ from second_brain_online.application.agents.tools.mongodb_retriever import MongoDBRetrieverTool
142
+
143
+ # Step 1: Generate answer from LLM using ONLY lightweight context
144
+ # This significantly reduces token usage compared to sending full sources
145
  result = self.__client.chat.completions.create(
146
  model=settings.OPENAI_MODEL_ID,
147
  messages=[
 
159
  timeout=45.0, # Reduced timeout
160
  )
161
 
162
+ llm_answer = result.choices[0].message.content
163
+
164
+ # Step 2: Retrieve pre-formatted sources from the retriever's class variable
165
+ # These sources were cached during the retrieval step and are NOT sent to LLM
166
+ cached_sources = MongoDBRetrieverTool._cached_sources
167
+
168
+ # Step 3: Append sources directly to the answer
169
+ # This ensures sources are included in the final output without being sent to LLM
170
+ if cached_sources:
171
+ final_answer = f"{llm_answer}\n\n{cached_sources}"
172
+ logger.info(f"Appended {len(cached_sources)} chars of sources to {len(llm_answer)} chars of LLM answer")
173
+ else:
174
+ final_answer = llm_answer
175
+ logger.warning("No cached sources found - returning LLM answer only")
176
+
177
+ return final_answer
src/second_brain_online/application/ui/customer_profile_ui.py ADDED
@@ -0,0 +1,722 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from typing import Any, Dict, List, Tuple, Optional
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+
7
+ import gradio as gr
8
+ import pandas as pd
9
+
10
+ from second_brain_online.config import settings
11
+
12
+
13
+ class CustomerProfileUI:
14
+ """Gradio UI for displaying customer profile analyses with search functionality."""
15
+
16
+ def __init__(self, data_dir: str = None):
17
+ # Default to the offline data directory
18
+ if data_dir is None:
19
+ # Go up from second-brain-online to second-brain-offline/data/customer_analyses
20
+ data_dir = Path(__file__).parent.parent.parent.parent.parent.parent / "second-brain-offline" / "data" / "customer_analyses"
21
+
22
+ self.data_dir = Path(data_dir)
23
+ self.analyses_cache = None
24
+ self.load_analyses_from_disk()
25
+ self.setup_ui()
26
+
27
+ def load_analyses_from_disk(self):
28
+ """Load all customer analyses from JSON files on disk."""
29
+ try:
30
+ # Check if the all-in-one file exists
31
+ all_file = self.data_dir / "customer_analyses_all.json"
32
+ if all_file.exists():
33
+ print(f"πŸ“‚ Loading analyses from: {all_file}")
34
+ with open(all_file, 'r') as f:
35
+ self.analyses_cache = json.load(f)
36
+ print(f"βœ… Loaded {len(self.analyses_cache)} analyses from disk")
37
+ else:
38
+ # Load individual files
39
+ print(f"πŸ“‚ Loading analyses from directory: {self.data_dir}")
40
+ self.analyses_cache = []
41
+ for json_file in self.data_dir.glob("customer_analysis_*.json"):
42
+ try:
43
+ with open(json_file, 'r') as f:
44
+ analysis = json.load(f)
45
+ self.analyses_cache.append(analysis)
46
+ except Exception as e:
47
+ print(f"⚠️ Failed to load {json_file}: {e}")
48
+ print(f"βœ… Loaded {len(self.analyses_cache)} analyses from {len(list(self.data_dir.glob('customer_analysis_*.json')))} files")
49
+ except Exception as e:
50
+ print(f"❌ Failed to load analyses from disk: {e}")
51
+ self.analyses_cache = []
52
+
53
+ def format_text_for_table(self, text_list, max_items=3):
54
+ """Format a list of text items for better table display with proper bullet point spacing."""
55
+ if not text_list:
56
+ return "No items"
57
+
58
+ if isinstance(text_list, str):
59
+ return text_list
60
+
61
+ if len(text_list) == 0:
62
+ return "No items"
63
+ elif len(text_list) == 1:
64
+ return f"β€’ {text_list[0]}"
65
+ else:
66
+ # Format bullet points with proper spacing
67
+ formatted_items = []
68
+ items_to_show = min(len(text_list), max_items)
69
+
70
+ for i in range(items_to_show):
71
+ formatted_items.append(f"β€’ {text_list[i]}")
72
+
73
+ # Add indicator for remaining items if any
74
+ if len(text_list) > max_items:
75
+ remaining = len(text_list) - max_items
76
+ formatted_items.append(f"β€’ ... and {remaining} more")
77
+
78
+ return "\n".join(formatted_items)
79
+
80
+ def setup_ui(self):
81
+ """Setup the Gradio interface for customer profile analyses."""
82
+ with gr.Blocks(
83
+ title="Customer Profile Analysis Dashboard",
84
+ theme=gr.themes.Soft(),
85
+ css="""
86
+ .customer-card {
87
+ border: 1px solid #e0e0e0;
88
+ border-radius: 8px;
89
+ padding: 16px;
90
+ margin: 8px 0;
91
+ background-color: #f8f9fa;
92
+ }
93
+ .customer-title {
94
+ font-weight: bold;
95
+ color: #2c3e50;
96
+ margin-bottom: 8px;
97
+ font-size: 1.1em;
98
+ }
99
+ .customer-meta {
100
+ font-size: 0.9em;
101
+ color: #6c757d;
102
+ margin-bottom: 12px;
103
+ }
104
+ .key-changes {
105
+ background-color: #fff3cd;
106
+ border-left: 4px solid #ffc107;
107
+ padding: 8px 12px;
108
+ margin: 8px 0;
109
+ border-radius: 4px;
110
+ font-size: 0.9em;
111
+ }
112
+ .recommendations {
113
+ background-color: #d1ecf1;
114
+ border-left: 4px solid #17a2b8;
115
+ padding: 8px 12px;
116
+ margin: 8px 0;
117
+ border-radius: 4px;
118
+ font-size: 0.9em;
119
+ }
120
+ .email-strategy {
121
+ background-color: #d4edda;
122
+ border-left: 4px solid #28a745;
123
+ padding: 8px 12px;
124
+ margin: 8px 0;
125
+ border-radius: 4px;
126
+ font-size: 0.9em;
127
+ }
128
+ .follow-up-email {
129
+ background-color: #e2e3e5;
130
+ border-left: 4px solid #6c757d;
131
+ padding: 8px 12px;
132
+ margin: 8px 0;
133
+ border-radius: 4px;
134
+ font-size: 0.9em;
135
+ }
136
+ .search-highlight {
137
+ background-color: #fff3cd;
138
+ padding: 2px 4px;
139
+ border-radius: 3px;
140
+ }
141
+ .dataframe {
142
+ font-size: 0.9em;
143
+ line-height: 1.4;
144
+ }
145
+ .dataframe td {
146
+ padding: 8px 6px;
147
+ vertical-align: top;
148
+ word-wrap: break-word;
149
+ white-space: pre-wrap;
150
+ }
151
+ .dataframe th {
152
+ padding: 8px 6px;
153
+ font-weight: bold;
154
+ background-color: #f8f9fa;
155
+ }
156
+ """
157
+ ) as self.interface:
158
+
159
+ gr.Markdown("# πŸ“Š Customer Profile Analysis Dashboard")
160
+ gr.Markdown("View and search through customer profile analyses with AI-generated insights and follow-up emails.")
161
+
162
+ # Statistics section at the top
163
+ with gr.Row():
164
+ with gr.Column(scale=1):
165
+ gr.Markdown("### πŸ“ˆ Dashboard Statistics")
166
+ self.stats_view = gr.HTML(
167
+ value=self.generate_statistics(),
168
+ label="Analysis Statistics",
169
+ show_label=False
170
+ )
171
+
172
+ # Filter buttons
173
+ with gr.Row():
174
+ gr.Markdown("### πŸ” Quick Filters")
175
+ with gr.Row():
176
+ self.strategy_dropdown = gr.Dropdown(
177
+ choices=self.get_strategy_choices(),
178
+ label="Email Strategy",
179
+ value=None,
180
+ multiselect=False,
181
+ scale=2
182
+ )
183
+ self.priority_dropdown = gr.Dropdown(
184
+ choices=self.get_priority_choices(),
185
+ label="Priority Level",
186
+ value=None,
187
+ multiselect=False,
188
+ scale=2
189
+ )
190
+ self.clear_filters_btn = gr.Button("πŸ”„ Clear All Filters", scale=1, variant="secondary")
191
+
192
+ gr.Markdown("---") # Separator line
193
+
194
+ # Search functionality
195
+ with gr.Row():
196
+ self.search_input = gr.Textbox(
197
+ label="Search Customer Analyses",
198
+ placeholder="Search by company name, customer ID, key changes, recommendations, or email content...",
199
+ scale=4
200
+ )
201
+ self.clear_search_btn = gr.Button("Clear Search", scale=1)
202
+ self.refresh_btn = gr.Button("Refresh Data", scale=1, variant="secondary")
203
+
204
+ # Main data table
205
+ self.customer_table = gr.Dataframe(
206
+ headers=["Customer ID", "Company Name", "Analysis Date", "Key Changes", "Recommendations", "Email Strategy"],
207
+ datatype=["str", "str", "str", "str", "str", "str"],
208
+ interactive=False,
209
+ label="Customer Profile Analyses",
210
+ wrap=True,
211
+ max_height=600,
212
+ column_widths=["10%", "20%", "12%", "25%", "25%", "8%"],
213
+ value=self.load_customer_analyses()
214
+ )
215
+
216
+ # Detailed view accordion
217
+ with gr.Accordion("πŸ“‹ Detailed Analysis View", open=False):
218
+ self.detailed_view = gr.HTML(
219
+ value="Select a row from the table above to view detailed analysis",
220
+ label="Detailed Analysis"
221
+ )
222
+
223
+ # Event handlers
224
+ self.search_input.change(
225
+ fn=self.filter_customer_analyses,
226
+ inputs=[self.search_input],
227
+ outputs=[self.customer_table]
228
+ )
229
+
230
+ self.clear_search_btn.click(
231
+ fn=self.clear_search,
232
+ inputs=[],
233
+ outputs=[self.search_input, self.customer_table]
234
+ )
235
+
236
+ self.refresh_btn.click(
237
+ fn=self.refresh_data,
238
+ inputs=[],
239
+ outputs=[self.customer_table, self.stats_view]
240
+ )
241
+
242
+ self.strategy_dropdown.change(
243
+ fn=self.filter_by_strategy,
244
+ inputs=[self.strategy_dropdown],
245
+ outputs=[self.customer_table]
246
+ )
247
+
248
+ self.priority_dropdown.change(
249
+ fn=self.filter_by_priority,
250
+ inputs=[self.priority_dropdown],
251
+ outputs=[self.customer_table]
252
+ )
253
+
254
+ self.clear_filters_btn.click(
255
+ fn=self.clear_filters,
256
+ inputs=[],
257
+ outputs=[self.strategy_dropdown, self.priority_dropdown, self.customer_table]
258
+ )
259
+
260
+ self.customer_table.select(
261
+ fn=self.show_detailed_analysis,
262
+ inputs=[self.customer_table],
263
+ outputs=[self.detailed_view]
264
+ )
265
+
266
+ def get_strategy_choices(self):
267
+ """Get unique email strategy choices for dropdown."""
268
+ if not self.analyses_cache:
269
+ return []
270
+
271
+ strategies = set()
272
+ for doc in self.analyses_cache:
273
+ strategy = doc.get("email_strategy", {}).get("email_type", "Unknown")
274
+ strategies.add(strategy)
275
+
276
+ return sorted(list(strategies))
277
+
278
+ def get_priority_choices(self):
279
+ """Get unique priority choices for dropdown."""
280
+ if not self.analyses_cache:
281
+ return []
282
+
283
+ priorities = set()
284
+ for doc in self.analyses_cache:
285
+ priority = doc.get("email_strategy", {}).get("priority", "Unknown")
286
+ priorities.add(priority)
287
+
288
+ return sorted(list(priorities))
289
+
290
+ def filter_by_strategy(self, strategy):
291
+ """Filter analyses by email strategy."""
292
+ if not strategy:
293
+ return self.load_customer_analyses()
294
+
295
+ if not self.analyses_cache:
296
+ return pd.DataFrame(columns=["Customer ID", "Company Name", "Analysis Date", "Key Changes", "Recommendations", "Email Strategy"])
297
+
298
+ try:
299
+ filtered_docs = []
300
+ for doc in self.analyses_cache:
301
+ doc_strategy = doc.get("email_strategy", {}).get("email_type", "Unknown")
302
+ if doc_strategy == strategy:
303
+ filtered_docs.append(doc)
304
+
305
+ return self.format_analyses_for_table(filtered_docs)
306
+
307
+ except Exception as e:
308
+ print(f"❌ Error filtering by strategy: {e}")
309
+ return self.load_customer_analyses()
310
+
311
+ def filter_by_priority(self, priority):
312
+ """Filter analyses by priority level."""
313
+ if not priority:
314
+ return self.load_customer_analyses()
315
+
316
+ if not self.analyses_cache:
317
+ return pd.DataFrame(columns=["Customer ID", "Company Name", "Analysis Date", "Key Changes", "Recommendations", "Email Strategy"])
318
+
319
+ try:
320
+ filtered_docs = []
321
+ for doc in self.analyses_cache:
322
+ doc_priority = doc.get("email_strategy", {}).get("priority", "Unknown")
323
+ if doc_priority == priority:
324
+ filtered_docs.append(doc)
325
+
326
+ return self.format_analyses_for_table(filtered_docs)
327
+
328
+ except Exception as e:
329
+ print(f"❌ Error filtering by priority: {e}")
330
+ return self.load_customer_analyses()
331
+
332
+ def clear_filters(self):
333
+ """Clear all filters and reload data."""
334
+ return None, None, self.load_customer_analyses()
335
+
336
+ def format_analyses_for_table(self, docs):
337
+ """Format a list of documents for table display."""
338
+ data = []
339
+ for doc in docs:
340
+ customer_id = str(doc.get("customer_id", "Unknown"))
341
+ company_name = doc.get("company_name", "Unknown Company")
342
+ analysis_date = doc.get("analysis_date", "Unknown Date")
343
+
344
+ # Format key changes with proper bullet points
345
+ key_changes = doc.get("key_changes", [])
346
+ key_changes_text = self.format_text_for_table(key_changes, max_items=3)
347
+
348
+ # Format recommendations with proper bullet points
349
+ recommendations = doc.get("recommendations", [])
350
+ recommendations_text = self.format_text_for_table(recommendations, max_items=3)
351
+
352
+ # Format email strategy
353
+ email_strategy = doc.get("email_strategy", {})
354
+ if isinstance(email_strategy, dict):
355
+ strategy_type = email_strategy.get("email_type", "Unknown")
356
+ priority = email_strategy.get("priority", "Unknown")
357
+ email_strategy_text = f"{strategy_type.replace('_', ' ').title()}\n({priority.title()})"
358
+ else:
359
+ email_strategy_text = str(email_strategy)[:60] + "..." if len(str(email_strategy)) > 60 else str(email_strategy)
360
+
361
+ data.append([
362
+ customer_id,
363
+ company_name,
364
+ analysis_date,
365
+ key_changes_text,
366
+ recommendations_text,
367
+ email_strategy_text
368
+ ])
369
+
370
+ return pd.DataFrame(data, columns=["Customer ID", "Company Name", "Analysis Date", "Key Changes", "Recommendations", "Email Strategy"])
371
+
372
+ def load_customer_analyses(self, limit: int = 100) -> pd.DataFrame:
373
+ """Load customer analyses from disk and format for display."""
374
+ if not self.analyses_cache:
375
+ return pd.DataFrame(columns=["Customer ID", "Company Name", "Analysis Date", "Key Changes", "Recommendations", "Email Strategy"])
376
+
377
+ try:
378
+ return self.format_analyses_for_table(self.analyses_cache[:limit])
379
+
380
+ except Exception as e:
381
+ print(f"❌ Error loading customer analyses: {e}")
382
+ return pd.DataFrame(columns=["Customer ID", "Company Name", "Analysis Date", "Key Changes", "Recommendations", "Email Strategy"])
383
+
384
+ def filter_customer_analyses(self, search_term: str) -> pd.DataFrame:
385
+ """Filter customer analyses based on search term."""
386
+ if not search_term.strip():
387
+ return self.load_customer_analyses()
388
+
389
+ if not self.analyses_cache:
390
+ return pd.DataFrame(columns=["Customer ID", "Company Name", "Analysis Date", "Key Changes", "Recommendations", "Email Strategy"])
391
+
392
+ try:
393
+ # Filter analyses based on search term (case-insensitive)
394
+ search_lower = search_term.lower()
395
+ filtered_docs = []
396
+
397
+ for doc in self.analyses_cache:
398
+ # Search in various fields
399
+ if (search_lower in str(doc.get("customer_id", "")).lower() or
400
+ search_lower in doc.get("company_name", "").lower() or
401
+ any(search_lower in change.lower() for change in doc.get("key_changes", [])) or
402
+ any(search_lower in rec.lower() for rec in doc.get("recommendations", [])) or
403
+ search_lower in str(doc.get("email_strategy", {}).get("key_messaging", "")).lower() or
404
+ search_lower in str(doc.get("follow_up_email", {}).get("subject", "")).lower() or
405
+ search_lower in str(doc.get("follow_up_email", {}).get("body", "")).lower()):
406
+ filtered_docs.append(doc)
407
+
408
+ return self.format_analyses_for_table(filtered_docs[:100]) # Limit to 100 results
409
+
410
+ except Exception as e:
411
+ print(f"❌ Error filtering customer analyses: {e}")
412
+ return self.load_customer_analyses()
413
+
414
+ def clear_search(self):
415
+ """Clear search input and reload all data."""
416
+ return "", self.load_customer_analyses()
417
+
418
+ def refresh_data(self):
419
+ """Refresh the data from disk."""
420
+ self.load_analyses_from_disk()
421
+ return self.load_customer_analyses(), self.generate_statistics()
422
+
423
+ def show_detailed_analysis(self, table_data, evt: gr.SelectData):
424
+ """Show detailed analysis for selected row."""
425
+ try:
426
+ if evt.index[0] >= len(table_data):
427
+ return "Please select a valid row from the table."
428
+
429
+ # Get the row data using iloc for proper pandas indexing
430
+ row_data = table_data.iloc[evt.index[0]]
431
+ customer_id = str(row_data.iloc[0]) # Customer ID is the first column
432
+
433
+ # Find document in cache by customer_id
434
+ if not self.analyses_cache:
435
+ return "No analyses loaded from disk."
436
+
437
+ doc = None
438
+ for analysis in self.analyses_cache:
439
+ if str(analysis.get("customer_id")) == customer_id:
440
+ doc = analysis
441
+ break
442
+
443
+ if not doc:
444
+ return f"No detailed data found for customer {customer_id}"
445
+
446
+ # Format detailed analysis
447
+ html = self.format_detailed_analysis(doc)
448
+ return html
449
+
450
+ except Exception as e:
451
+ return f"Error loading detailed analysis: {str(e)}"
452
+
453
+ def format_detailed_analysis(self, doc: dict) -> str:
454
+ """Format detailed analysis as HTML."""
455
+ customer_id = doc.get("customer_id", "Unknown")
456
+ company_name = doc.get("company_name", "Unknown Company")
457
+ analysis_date = doc.get("analysis_date", "Unknown Date")
458
+
459
+ # Format key changes
460
+ key_changes = doc.get("key_changes", [])
461
+ key_changes_html = ""
462
+ if isinstance(key_changes, list):
463
+ for i, change in enumerate(key_changes, 1):
464
+ key_changes_html += f"<li>{change}</li>"
465
+ else:
466
+ key_changes_html = f"<li>{key_changes}</li>"
467
+
468
+ # Format recommendations
469
+ recommendations = doc.get("recommendations", [])
470
+ recommendations_html = ""
471
+ if isinstance(recommendations, list):
472
+ for i, rec in enumerate(recommendations, 1):
473
+ recommendations_html += f"<li>{rec}</li>"
474
+ else:
475
+ recommendations_html = f"<li>{recommendations}</li>"
476
+
477
+ # Format email strategy
478
+ email_strategy = doc.get("email_strategy", {})
479
+ strategy_html = ""
480
+ if isinstance(email_strategy, dict):
481
+ strategy_type = email_strategy.get("email_type", "Unknown")
482
+ priority = email_strategy.get("priority", "Unknown")
483
+ key_messaging = email_strategy.get("key_messaging", "No messaging provided")
484
+ call_to_action = email_strategy.get("call_to_action", "No call to action")
485
+
486
+ strategy_html = f"""
487
+ <div class="email-strategy">
488
+ <strong>Type:</strong> {strategy_type.title()}<br>
489
+ <strong>Priority:</strong> {priority.title()}<br>
490
+ <strong>Key Messaging:</strong> {key_messaging}<br>
491
+ <strong>Call to Action:</strong> {call_to_action}
492
+ </div>
493
+ """
494
+ else:
495
+ strategy_html = f"<div class='email-strategy'>{email_strategy}</div>"
496
+
497
+ # Format conversation insights
498
+ conversation_insights = doc.get("conversation_insights", [])
499
+ insights_html = ""
500
+ if conversation_insights:
501
+ insights_html = """
502
+ <div style="background-color: #f0f4f8; border-left: 4px solid #3b82f6; padding: 12px 16px; margin: 8px 0; border-radius: 4px;">
503
+ <h4 style="margin: 0 0 12px 0; color: #1e40af;">πŸ’¬ Recent Conversation Insights</h4>
504
+ """
505
+
506
+ for i, conv in enumerate(conversation_insights[:5], 1): # Show up to 5 conversations
507
+ title = conv.get('title', 'Unknown')
508
+ source = conv.get('source', 'Unknown')
509
+ datetime_str = conv.get('datetime', 'Unknown')
510
+ summary = conv.get('summary', 'No summary available')
511
+ key_findings = conv.get('key_findings', [])
512
+
513
+ insights_html += f"""
514
+ <div style="background-color: white; padding: 12px; margin: 8px 0; border-radius: 4px; border: 1px solid #e0e7ff;">
515
+ <div style="font-weight: bold; color: #1e40af; margin-bottom: 4px;">
516
+ {i}. {title}
517
+ </div>
518
+ <div style="font-size: 0.85em; color: #6b7280; margin-bottom: 8px;">
519
+ {source} | {datetime_str}
520
+ </div>
521
+ <div style="margin-bottom: 8px;">
522
+ <strong>Summary:</strong> {summary}
523
+ </div>
524
+ """
525
+
526
+ if key_findings:
527
+ insights_html += '<div style="margin-top: 8px;"><strong>Key Findings:</strong><ul style="margin: 4px 0; padding-left: 20px;">'
528
+ for finding in key_findings[:3]: # Show top 3 findings
529
+ finding_text = finding.get('finding', '')
530
+ impact = finding.get('impact', '')
531
+ insight_type = finding.get('insight_type', '')
532
+
533
+ # Color code by impact
534
+ impact_color = "#dc2626" if impact.lower() == "high" else "#f59e0b" if impact.lower() == "medium" else "#10b981"
535
+
536
+ insights_html += f"""
537
+ <li style="margin: 4px 0;">
538
+ <span style="background-color: {impact_color}; color: white; padding: 2px 6px; border-radius: 3px; font-size: 0.75em; font-weight: bold;">
539
+ {insight_type}/{impact}
540
+ </span>
541
+ {finding_text}
542
+ </li>
543
+ """
544
+ insights_html += '</ul></div>'
545
+
546
+ insights_html += '</div>'
547
+
548
+ insights_html += '</div>'
549
+ else:
550
+ insights_html = """
551
+ <div style="background-color: #fef3c7; border-left: 4px solid #f59e0b; padding: 12px 16px; margin: 8px 0; border-radius: 4px;">
552
+ <strong>ℹ️ No conversation insights available for this customer</strong>
553
+ </div>
554
+ """
555
+
556
+ # Format follow-up email
557
+ follow_up_email = doc.get("follow_up_email", {})
558
+ email_html = ""
559
+ if isinstance(follow_up_email, dict):
560
+ subject = follow_up_email.get("subject", "No Subject")
561
+ body = follow_up_email.get("body", "No body content")
562
+ call_to_action = follow_up_email.get("call_to_action", "No call to action")
563
+ priority = follow_up_email.get("priority", "Unknown")
564
+
565
+ # Color code priority
566
+ priority_color = "#dc2626" if priority.lower() == "high" else "#f59e0b" if priority.lower() == "medium" else "#10b981"
567
+
568
+ email_html = f"""
569
+ <div class="follow-up-email">
570
+ <strong>Subject:</strong> {subject}<br>
571
+ <strong>Priority:</strong> <span style="background-color: {priority_color}; color: white; padding: 2px 8px; border-radius: 3px; font-weight: bold;">{priority.upper()}</span><br>
572
+ <strong>Body:</strong><br>
573
+ <div style="margin-left: 20px; margin-top: 8px; white-space: pre-wrap;">{body}</div>
574
+ <br><strong>Call to Action:</strong> {call_to_action}
575
+ </div>
576
+ """
577
+ else:
578
+ email_html = f"<div class='follow-up-email'>{follow_up_email}</div>"
579
+
580
+ html = f"""
581
+ <div class="customer-card">
582
+ <div class="customer-title">{company_name} (ID: {customer_id})</div>
583
+ <div class="customer-meta">
584
+ <strong>Analysis Date:</strong> {analysis_date}
585
+ </div>
586
+
587
+ <h4>πŸ” Key Changes</h4>
588
+ <div class="key-changes">
589
+ <ul>{key_changes_html}</ul>
590
+ </div>
591
+
592
+ <h4>πŸ’‘ Recommendations</h4>
593
+ <div class="recommendations">
594
+ <ul>{recommendations_html}</ul>
595
+ </div>
596
+
597
+ <h4>πŸ’¬ Conversation Insights</h4>
598
+ {insights_html}
599
+
600
+ <h4>πŸ“§ Email Strategy</h4>
601
+ {strategy_html}
602
+
603
+ <h4>πŸ“¨ Follow-up Email</h4>
604
+ {email_html}
605
+ </div>
606
+ """
607
+
608
+ return html
609
+
610
+ def generate_statistics(self) -> str:
611
+ """Generate statistics about the customer analyses."""
612
+ if not self.analyses_cache:
613
+ return "No analyses loaded from disk."
614
+
615
+ try:
616
+ # Get total count
617
+ total_count = len(self.analyses_cache)
618
+
619
+ # Get email strategy distribution
620
+ strategy_counts = {}
621
+ for doc in self.analyses_cache:
622
+ strategy_type = doc.get("email_strategy", {}).get("email_type", "Unknown")
623
+ strategy_counts[strategy_type] = strategy_counts.get(strategy_type, 0) + 1
624
+ strategy_stats = [{"_id": k, "count": v} for k, v in sorted(strategy_counts.items(), key=lambda x: x[1], reverse=True)]
625
+
626
+ # Get priority distribution
627
+ priority_counts = {}
628
+ for doc in self.analyses_cache:
629
+ priority = doc.get("email_strategy", {}).get("priority", "Unknown")
630
+ priority_counts[priority] = priority_counts.get(priority, 0) + 1
631
+ priority_stats = [{"_id": k, "count": v} for k, v in sorted(priority_counts.items(), key=lambda x: x[1], reverse=True)]
632
+
633
+ # Count recent analyses (just show total for now since we don't have created_at timestamps)
634
+ recent_count = total_count
635
+
636
+ # Format statistics with enhanced visual design and clickable filters
637
+ stats_html = f"""
638
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 12px; margin: 10px 0; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
639
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 20px;">
640
+
641
+ <!-- Total Counts Card -->
642
+ <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px; backdrop-filter: blur(10px);">
643
+ <h4 style="margin: 0 0 10px 0; color: #fff; font-size: 1.1em;">πŸ“Š Total Accounts</h4>
644
+ <div style="font-size: 2em; font-weight: bold; color: #fff;">{total_count}</div>
645
+ </div>
646
+
647
+ <!-- Email Strategies Card -->
648
+ <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px; backdrop-filter: blur(10px);">
649
+ <h4 style="margin: 0 0 10px 0; color: #fff; font-size: 1.1em;">πŸ“§ Email Strategies</h4>
650
+ <div style="font-size: 0.9em;">
651
+ """
652
+
653
+ for stat in strategy_stats:
654
+ strategy_type = stat["_id"] or "Unknown"
655
+ count = stat["count"]
656
+ percentage = (count / total_count * 100) if total_count > 0 else 0
657
+ stats_html += f"""
658
+ <div style="display: flex; justify-content: space-between; margin: 5px 0; padding: 3px 0; border-bottom: 1px solid rgba(255,255,255,0.2);">
659
+ <span style="cursor: pointer; padding: 2px 6px; border-radius: 3px; transition: background-color 0.2s;"
660
+ onmouseover="this.style.backgroundColor='rgba(255,255,255,0.2)'"
661
+ onmouseout="this.style.backgroundColor='transparent'"
662
+ onclick="filterByStrategy('{strategy_type}')">{strategy_type.title()}</span>
663
+ <span style="font-weight: bold;">{count} ({percentage:.1f}%)</span>
664
+ </div>
665
+ """
666
+
667
+ stats_html += """
668
+ </div>
669
+ </div>
670
+
671
+ <!-- Priority Distribution Card -->
672
+ <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 8px; backdrop-filter: blur(10px);">
673
+ <h4 style="margin: 0 0 10px 0; color: #fff; font-size: 1.1em;">⚑ Priority Levels</h4>
674
+ <div style="font-size: 0.9em;">
675
+ """
676
+
677
+ for stat in priority_stats:
678
+ priority = stat["_id"] or "Unknown"
679
+ count = stat["count"]
680
+ percentage = (count / total_count * 100) if total_count > 0 else 0
681
+ # Color code based on priority
682
+ color = "#ff6b6b" if priority.lower() == "high" else "#feca57" if priority.lower() == "medium" else "#48dbfb"
683
+ stats_html += f"""
684
+ <div style="display: flex; justify-content: space-between; margin: 5px 0; padding: 3px 0; border-bottom: 1px solid rgba(255,255,255,0.2);">
685
+ <span style="color: {color}; font-weight: bold; cursor: pointer; padding: 2px 6px; border-radius: 3px; transition: background-color 0.2s;"
686
+ onmouseover="this.style.backgroundColor='rgba(255,255,255,0.2)'"
687
+ onmouseout="this.style.backgroundColor='transparent'"
688
+ onclick="filterByPriority('{priority}')">{priority.title()}</span>
689
+ <span style="font-weight: bold;">{count} ({percentage:.1f}%)</span>
690
+ </div>
691
+ """
692
+
693
+ stats_html += """
694
+ </div>
695
+ </div>
696
+
697
+ </div>
698
+ </div>
699
+
700
+ <script>
701
+ function filterByStrategy(strategy) {
702
+ // This will be handled by Gradio's JavaScript interface
703
+ console.log('Filtering by strategy:', strategy);
704
+ // We'll implement this with Gradio's interface
705
+ }
706
+
707
+ function filterByPriority(priority) {
708
+ // This will be handled by Gradio's JavaScript interface
709
+ console.log('Filtering by priority:', priority);
710
+ // We'll implement this with Gradio's interface
711
+ }
712
+ </script>
713
+ """
714
+
715
+ return stats_html
716
+
717
+ except Exception as e:
718
+ return f"Error generating statistics: {str(e)}"
719
+
720
+ def launch(self, **kwargs):
721
+ """Launch the Gradio interface."""
722
+ return self.interface.launch(**kwargs)
tools/customer_profile_app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Customer Profile Analysis Dashboard App.
4
+
5
+ This app provides a Gradio UI for viewing and searching customer profile analyses
6
+ from the customer_profile_analyses MongoDB collection.
7
+ """
8
+
9
+ import click
10
+ from second_brain_online.application.ui.customer_profile_ui import CustomerProfileUI
11
+
12
+
13
+ @click.command()
14
+ @click.option(
15
+ "--host",
16
+ type=str,
17
+ default="127.0.0.1",
18
+ help="Host to run the server on",
19
+ )
20
+ @click.option(
21
+ "--port",
22
+ type=int,
23
+ default=7860,
24
+ help="Port to run the server on",
25
+ )
26
+ @click.option(
27
+ "--share",
28
+ is_flag=True,
29
+ default=False,
30
+ help="Create a public link for the interface",
31
+ )
32
+ @click.option(
33
+ "--debug",
34
+ is_flag=True,
35
+ default=False,
36
+ help="Enable debug mode",
37
+ )
38
+ def main(host: str, port: int, share: bool, debug: bool) -> None:
39
+ """Launch the Customer Profile Analysis Dashboard.
40
+
41
+ This dashboard allows you to:
42
+ - View all customer profile analyses in a searchable table
43
+ - Search by company name, customer ID, key changes, recommendations, or email content
44
+ - View detailed analysis for each customer
45
+ - See statistics about the analyses
46
+
47
+ Args:
48
+ host: Host to run the server on
49
+ port: Port to run the server on
50
+ share: Create a public link for the interface
51
+ debug: Enable debug mode
52
+ """
53
+ print("πŸš€ Starting Customer Profile Analysis Dashboard...")
54
+ print(f"πŸ“ Server will be available at: http://{host}:{port}")
55
+
56
+ if share:
57
+ print("🌐 Creating public link...")
58
+
59
+ if debug:
60
+ print("πŸ› Debug mode enabled")
61
+
62
+ try:
63
+ # Initialize the UI
64
+ ui = CustomerProfileUI()
65
+
66
+ # Launch the interface
67
+ ui.launch(
68
+ server_name=host,
69
+ server_port=port,
70
+ share=share,
71
+ debug=debug,
72
+ show_error=True,
73
+ quiet=False
74
+ )
75
+
76
+ except Exception as e:
77
+ print(f"❌ Failed to start the dashboard: {e}")
78
+ raise
79
+
80
+
81
+ if __name__ == "__main__":
82
+ main()
tools/user_interaction_ui.py ADDED
@@ -0,0 +1,516 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ User Interaction Analysis Dashboard
4
+
5
+ A comprehensive UI for viewing and analyzing user interactions across
6
+ Intercom chats and JustCall meetings with priority-based filtering.
7
+ """
8
+
9
+ import gradio as gr
10
+ from pymongo import MongoClient
11
+ from typing import List, Dict, Any, Tuple, Optional
12
+ import pandas as pd
13
+ from loguru import logger
14
+
15
+ # MongoDB Configuration
16
+ MONGODB_URI = "mongodb+srv://contextdb:HOqIgSH01CoEiMb1@cluster0.d9cmff.mongodb.net/"
17
+ DATABASE_NAME = "second_brain_course"
18
+ COLLECTION_NAME = "user_interaction_analyses"
19
+
20
+
21
+ class UserInteractionDashboard:
22
+ """Dashboard for user interaction analyses."""
23
+
24
+ def __init__(self):
25
+ """Initialize dashboard with MongoDB connection."""
26
+ self.client = MongoClient(MONGODB_URI)
27
+ self.db = self.client[DATABASE_NAME]
28
+ self.collection = self.db[COLLECTION_NAME]
29
+ logger.info(f"Connected to MongoDB: {DATABASE_NAME}.{COLLECTION_NAME}")
30
+
31
+ def get_summary_stats(self) -> Tuple[int, int, int, int, int, int]:
32
+ """Get summary statistics for the dashboard."""
33
+
34
+ total_users = self.collection.count_documents({})
35
+
36
+ # Count by priority
37
+ high_priority = self.collection.count_documents({"priority_level": "high"})
38
+ medium_priority = self.collection.count_documents({"priority_level": "medium"})
39
+ low_priority = self.collection.count_documents({"priority_level": "low"})
40
+
41
+ # Aggregate total conversations and meetings
42
+ pipeline = [
43
+ {
44
+ "$group": {
45
+ "_id": None,
46
+ "total_conversations": {"$sum": "$total_conversations"},
47
+ "total_meetings": {"$sum": "$total_meetings"}
48
+ }
49
+ }
50
+ ]
51
+
52
+ agg_result = list(self.collection.aggregate(pipeline))
53
+ total_conversations = agg_result[0]["total_conversations"] if agg_result else 0
54
+ total_meetings = agg_result[0]["total_meetings"] if agg_result else 0
55
+
56
+ return (
57
+ total_users,
58
+ total_conversations,
59
+ total_meetings,
60
+ high_priority,
61
+ medium_priority,
62
+ low_priority
63
+ )
64
+
65
+ def get_users_data(self, priority_filter: Optional[str] = None) -> pd.DataFrame:
66
+ """Get user data for table display with optional priority filter."""
67
+
68
+ # Build query
69
+ query = {}
70
+ if priority_filter and priority_filter != "All":
71
+ query["priority_level"] = priority_filter.lower()
72
+
73
+ # Fetch documents
74
+ users = list(self.collection.find(query))
75
+
76
+ if not users:
77
+ return pd.DataFrame(columns=[
78
+ "User ID", "Conversations", "Meetings",
79
+ "Conv Key Findings", "Meeting Key Findings", "Priority"
80
+ ])
81
+
82
+ # Transform to table format
83
+ table_data = []
84
+ for user in users:
85
+ user_id = user.get("user_id", "")
86
+
87
+ # Get conversation IDs
88
+ conv_ids = user.get("conversation_ids", [])
89
+ conv_ids_str = ", ".join(conv_ids[:3]) # Show first 3
90
+ if len(conv_ids) > 3:
91
+ conv_ids_str += f" (+{len(conv_ids) - 3} more)"
92
+
93
+ # Get meeting IDs
94
+ meeting_ids = user.get("meeting_ids", [])
95
+ meeting_ids_str = ", ".join(meeting_ids[:3]) # Show first 3
96
+ if len(meeting_ids) > 3:
97
+ meeting_ids_str += f" (+{len(meeting_ids) - 3} more)"
98
+
99
+ # Get key findings from conversation level
100
+ conv_insights = user.get("conversation_level_insights", {})
101
+ conv_findings = conv_insights.get("aggregated_marketing_insights", {}).get("key_findings", [])
102
+ conv_findings_str = f"{len(conv_findings)} findings"
103
+
104
+ # Get key findings from meeting level
105
+ meeting_insights = user.get("meeting_level_insights", {})
106
+ meeting_findings = meeting_insights.get("aggregated_marketing_insights", {}).get("key_findings", [])
107
+ meeting_findings_str = f"{len(meeting_findings)} findings"
108
+
109
+ priority = user.get("priority_level", "unknown").upper()
110
+
111
+ table_data.append({
112
+ "User ID": user_id,
113
+ "Conversations": conv_ids_str,
114
+ "Meetings": meeting_ids_str,
115
+ "Conv Key Findings": conv_findings_str,
116
+ "Meeting Key Findings": meeting_findings_str,
117
+ "Priority": priority,
118
+ "_raw": user # Store raw data for detail view
119
+ })
120
+
121
+ df = pd.DataFrame(table_data)
122
+ return df
123
+
124
+ def get_user_detail(self, df: pd.DataFrame, evt: gr.SelectData) -> str:
125
+ """Get detailed view of selected user."""
126
+
127
+ if df is None or len(df) == 0:
128
+ return "No user selected"
129
+
130
+ try:
131
+ selected_row = evt.index[0] if isinstance(evt.index, list) else evt.index
132
+ user_data = df.iloc[selected_row]["_raw"]
133
+
134
+ # Build detailed HTML view
135
+ html = f"""
136
+ <div style="font-family: Arial, sans-serif; padding: 20px;">
137
+ <h2 style="color: #2563eb;">User Profile: {user_data.get('user_id', 'N/A')}</h2>
138
+ <p><strong>Priority Level:</strong> <span style="color: {'#dc2626' if user_data.get('priority_level') == 'high' else '#f59e0b' if user_data.get('priority_level') == 'medium' else '#16a34a'}; font-weight: bold;">{user_data.get('priority_level', 'unknown').upper()}</span></p>
139
+ <p><strong>Analysis Date:</strong> {user_data.get('analysis_timestamp', 'N/A')}</p>
140
+ <hr style="margin: 20px 0;">
141
+
142
+ <h3 style="color: #7c3aed;">πŸ“Š Overview</h3>
143
+ <ul>
144
+ <li><strong>Total Conversations:</strong> {user_data.get('total_conversations', 0)}</li>
145
+ <li><strong>Total Meetings:</strong> {user_data.get('total_meetings', 0)}</li>
146
+ <li><strong>Conversation Chunks:</strong> {user_data.get('total_conversation_chunks', 0)}</li>
147
+ <li><strong>Meeting Chunks:</strong> {user_data.get('total_meeting_chunks', 0)}</li>
148
+ </ul>
149
+
150
+ <hr style="margin: 20px 0;">
151
+
152
+ <h3 style="color: #0891b2;">πŸ’¬ Conversation Level Insights (Intercom)</h3>
153
+ """
154
+
155
+ # Conversation insights
156
+ conv_insights = user_data.get("conversation_level_insights", {})
157
+ conv_summary = conv_insights.get("conversation_summary", "No summary available")
158
+ html += f"<p><strong>Summary:</strong> {conv_summary}</p>"
159
+
160
+ # Conversation quotes
161
+ conv_marketing = conv_insights.get("aggregated_marketing_insights", {})
162
+ conv_quotes = conv_marketing.get("quotes", [])
163
+ if conv_quotes:
164
+ html += "<h4>Key Quotes:</h4><ul>"
165
+ for quote in conv_quotes[:5]: # Show first 5
166
+ html += f"""
167
+ <li>
168
+ <strong>"{quote.get('quote', '')}"</strong>
169
+ <br><em>Context:</em> {quote.get('context', '')}
170
+ <br><em>Sentiment:</em> {quote.get('sentiment', '')}
171
+ </li>
172
+ """
173
+ html += "</ul>"
174
+
175
+ # Conversation findings
176
+ conv_findings = conv_marketing.get("key_findings", [])
177
+ if conv_findings:
178
+ html += "<h4>Key Findings:</h4><ul>"
179
+ for finding in conv_findings[:5]: # Show first 5
180
+ impact_color = "#dc2626" if finding.get("impact") == "high" else "#f59e0b" if finding.get("impact") == "medium" else "#16a34a"
181
+ html += f"""
182
+ <li>
183
+ <strong>{finding.get('finding', '')}</strong>
184
+ <br><em>Evidence:</em> {finding.get('evidence', '')}
185
+ <br><em>Impact:</em> <span style="color: {impact_color}; font-weight: bold;">{finding.get('impact', '').upper()}</span>
186
+ </li>
187
+ """
188
+ html += "</ul>"
189
+
190
+ html += "<hr style='margin: 20px 0;'>"
191
+
192
+ # Meeting insights
193
+ html += "<h3 style='color: #ea580c;'>πŸ“ž Meeting Level Insights (JustCall)</h3>"
194
+ meeting_insights = user_data.get("meeting_level_insights", {})
195
+ meeting_summary = meeting_insights.get("meeting_summary", "No summary available")
196
+ html += f"<p><strong>Summary:</strong> {meeting_summary}</p>"
197
+
198
+ # Meeting quotes
199
+ meeting_marketing = meeting_insights.get("aggregated_marketing_insights", {})
200
+ meeting_quotes = meeting_marketing.get("quotes", [])
201
+ if meeting_quotes:
202
+ html += "<h4>Key Quotes:</h4><ul>"
203
+ for quote in meeting_quotes[:5]: # Show first 5
204
+ html += f"""
205
+ <li>
206
+ <strong>"{quote.get('quote', '')}"</strong>
207
+ <br><em>Context:</em> {quote.get('context', '')}
208
+ <br><em>Sentiment:</em> {quote.get('sentiment', '')}
209
+ </li>
210
+ """
211
+ html += "</ul>"
212
+
213
+ # Meeting findings
214
+ meeting_findings = meeting_marketing.get("key_findings", [])
215
+ if meeting_findings:
216
+ html += "<h4>Key Findings:</h4><ul>"
217
+ for finding in meeting_findings[:5]: # Show first 5
218
+ impact_color = "#dc2626" if finding.get("impact") == "high" else "#f59e0b" if finding.get("impact") == "medium" else "#16a34a"
219
+ html += f"""
220
+ <li>
221
+ <strong>{finding.get('finding', '')}</strong>
222
+ <br><em>Evidence:</em> {finding.get('evidence', '')}
223
+ <br><em>Impact:</em> <span style="color: {impact_color}; font-weight: bold;">{finding.get('impact', '').upper()}</span>
224
+ </li>
225
+ """
226
+ html += "</ul>"
227
+
228
+ html += "<hr style='margin: 20px 0;'>"
229
+
230
+ # Unified insights
231
+ html += "<h3 style='color: #059669;'>🎯 Unified Insights</h3>"
232
+ unified_summary = user_data.get("unified_insights", {}).get("unified_summary", "No unified summary available")
233
+ html += f"<p><strong>Summary:</strong> {unified_summary}</p>"
234
+
235
+ # User journey
236
+ user_journey = user_data.get("user_journey_summary", "No journey summary available")
237
+ html += f"<h4>User Journey:</h4><p>{user_journey}</p>"
238
+
239
+ # Cross-interaction patterns
240
+ patterns = user_data.get("cross_interaction_patterns", [])
241
+ if patterns:
242
+ html += "<h4>Cross-Interaction Patterns:</h4><ul>"
243
+ for pattern in patterns:
244
+ html += f"<li>{pattern}</li>"
245
+ html += "</ul>"
246
+
247
+ # Follow-up recommendations
248
+ recommendations = user_data.get("unified_follow_up_recommendations", "No recommendations available")
249
+ html += f"<h4>Follow-up Recommendations:</h4><p style='background: #f3f4f6; padding: 15px; border-radius: 5px;'>{recommendations}</p>"
250
+
251
+ html += "</div>"
252
+
253
+ return html
254
+
255
+ except Exception as e:
256
+ logger.error(f"Error getting user detail: {e}")
257
+ return f"Error loading user details: {str(e)}"
258
+
259
+ def filter_by_priority(self, priority: str) -> Tuple[pd.DataFrame, str]:
260
+ """Filter users by priority level."""
261
+
262
+ df = self.get_users_data(priority_filter=priority)
263
+
264
+ # Remove the _raw column for display
265
+ display_df = df.drop(columns=["_raw"]) if "_raw" in df.columns else df
266
+
267
+ return display_df, f"Showing {len(df)} users with {priority} priority"
268
+
269
+ def search_table(self, df: pd.DataFrame, search_term: str) -> pd.DataFrame:
270
+ """Search across all columns in the table."""
271
+
272
+ if not search_term or df is None or len(df) == 0:
273
+ return df
274
+
275
+ # Search across all string columns
276
+ mask = df.astype(str).apply(
277
+ lambda row: row.str.contains(search_term, case=False, na=False).any(),
278
+ axis=1
279
+ )
280
+
281
+ return df[mask]
282
+
283
+
284
+ def create_dashboard():
285
+ """Create the Gradio dashboard."""
286
+
287
+ dashboard = UserInteractionDashboard()
288
+
289
+ # Get initial stats
290
+ total_users, total_convs, total_meetings, high_count, medium_count, low_count = dashboard.get_summary_stats()
291
+
292
+ # Custom CSS for better styling
293
+ custom_css = """
294
+ .priority-btn {
295
+ font-size: 18px !important;
296
+ font-weight: bold !important;
297
+ padding: 15px 30px !important;
298
+ border-radius: 8px !important;
299
+ }
300
+ .stats-box {
301
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
302
+ padding: 20px;
303
+ border-radius: 10px;
304
+ color: white;
305
+ text-align: center;
306
+ }
307
+ """
308
+
309
+ with gr.Blocks(css=custom_css, title="User Interaction Analysis Dashboard") as demo:
310
+
311
+ # Header
312
+ gr.Markdown("# 🎯 User Interaction Analysis Dashboard")
313
+ gr.Markdown("*Analyzing user interactions across Intercom chats and JustCall meetings*")
314
+
315
+ # ============================================================
316
+ # SECTION 1: Summary Statistics and Priority Filters
317
+ # ============================================================
318
+ with gr.Row():
319
+ with gr.Column(scale=1):
320
+ gr.Markdown(f"""
321
+ <div class="stats-box">
322
+ <h2>{total_users}</h2>
323
+ <p>Total Users Analyzed</p>
324
+ </div>
325
+ """)
326
+
327
+ with gr.Column(scale=1):
328
+ gr.Markdown(f"""
329
+ <div class="stats-box">
330
+ <h2>{total_convs}</h2>
331
+ <p>Intercom Conversations</p>
332
+ </div>
333
+ """)
334
+
335
+ with gr.Column(scale=1):
336
+ gr.Markdown(f"""
337
+ <div class="stats-box">
338
+ <h2>{total_meetings}</h2>
339
+ <p>JustCall Meetings</p>
340
+ </div>
341
+ """)
342
+
343
+ gr.Markdown("---")
344
+
345
+ # Priority Filter Buttons
346
+ gr.Markdown("### 🎚️ Filter by Priority Level")
347
+ with gr.Row():
348
+ high_btn = gr.Button(
349
+ f"πŸ”΄ High Priority ({high_count})",
350
+ elem_classes=["priority-btn"],
351
+ variant="primary",
352
+ scale=1
353
+ )
354
+ medium_btn = gr.Button(
355
+ f"🟑 Medium Priority ({medium_count})",
356
+ elem_classes=["priority-btn"],
357
+ variant="secondary",
358
+ scale=1
359
+ )
360
+ low_btn = gr.Button(
361
+ f"🟒 Low Priority ({low_count})",
362
+ elem_classes=["priority-btn"],
363
+ variant="secondary",
364
+ scale=1
365
+ )
366
+ all_btn = gr.Button(
367
+ f"βšͺ All Users ({total_users})",
368
+ elem_classes=["priority-btn"],
369
+ variant="secondary",
370
+ scale=1
371
+ )
372
+
373
+ filter_status = gr.Textbox(
374
+ label="Filter Status",
375
+ value=f"Showing all {total_users} users",
376
+ interactive=False
377
+ )
378
+
379
+ gr.Markdown("---")
380
+
381
+ # ============================================================
382
+ # SECTION 2: User Data Table with Search
383
+ # ============================================================
384
+ gr.Markdown("### πŸ“Š User Interaction Data")
385
+
386
+ search_box = gr.Textbox(
387
+ label="πŸ” Search across all columns",
388
+ placeholder="Search by User ID, Conversation ID, Meeting ID, findings...",
389
+ scale=1
390
+ )
391
+
392
+ # Get initial data
393
+ initial_df = dashboard.get_users_data()
394
+ display_df = initial_df.drop(columns=["_raw"]) if "_raw" in initial_df.columns else initial_df
395
+
396
+ user_table = gr.Dataframe(
397
+ value=display_df,
398
+ label="User Interactions",
399
+ interactive=False,
400
+ wrap=True
401
+ )
402
+
403
+ # Hidden state to store full dataframe with _raw data
404
+ full_data_state = gr.State(value=initial_df)
405
+ filtered_data_state = gr.State(value=initial_df)
406
+
407
+ gr.Markdown("---")
408
+
409
+ # ============================================================
410
+ # SECTION 3: Detailed User View
411
+ # ============================================================
412
+ gr.Markdown("### πŸ‘€ User Details")
413
+ gr.Markdown("*Click on any row in the table above to see detailed analysis*")
414
+
415
+ user_detail = gr.HTML(
416
+ value="<p style='text-align: center; color: #6b7280; padding: 40px;'>Select a user from the table above to view detailed insights</p>"
417
+ )
418
+
419
+ # ============================================================
420
+ # Event Handlers
421
+ # ============================================================
422
+
423
+ def filter_high():
424
+ df = dashboard.get_users_data(priority_filter="High")
425
+ display = df.drop(columns=["_raw"]) if "_raw" in df.columns else df
426
+ return display, df, df, f"Showing {len(df)} HIGH priority users"
427
+
428
+ def filter_medium():
429
+ df = dashboard.get_users_data(priority_filter="Medium")
430
+ display = df.drop(columns=["_raw"]) if "_raw" in df.columns else df
431
+ return display, df, df, f"Showing {len(df)} MEDIUM priority users"
432
+
433
+ def filter_low():
434
+ df = dashboard.get_users_data(priority_filter="Low")
435
+ display = df.drop(columns=["_raw"]) if "_raw" in df.columns else df
436
+ return display, df, df, f"Showing {len(df)} LOW priority users"
437
+
438
+ def filter_all():
439
+ df = dashboard.get_users_data(priority_filter=None)
440
+ display = df.drop(columns=["_raw"]) if "_raw" in df.columns else df
441
+ return display, df, df, f"Showing all {len(df)} users"
442
+
443
+ def search_users(search_term: str, current_filtered_df: pd.DataFrame):
444
+ """Search within currently filtered data."""
445
+ if not search_term:
446
+ # Return the current filtered data
447
+ display = current_filtered_df.drop(columns=["_raw"]) if "_raw" in current_filtered_df.columns else current_filtered_df
448
+ return display
449
+
450
+ # Search in the filtered data
451
+ if current_filtered_df is None or len(current_filtered_df) == 0:
452
+ return pd.DataFrame()
453
+
454
+ # Create a copy for searching
455
+ search_df = current_filtered_df.copy()
456
+
457
+ # Search across all visible columns (excluding _raw)
458
+ visible_cols = [col for col in search_df.columns if col != "_raw"]
459
+ mask = search_df[visible_cols].astype(str).apply(
460
+ lambda row: row.str.contains(search_term, case=False, na=False).any(),
461
+ axis=1
462
+ )
463
+
464
+ result_df = search_df[mask]
465
+ display = result_df.drop(columns=["_raw"]) if "_raw" in result_df.columns else result_df
466
+ return display
467
+
468
+ def show_detail(evt: gr.SelectData, full_data: pd.DataFrame):
469
+ """Show detailed view when row is selected."""
470
+ return dashboard.get_user_detail(full_data, evt)
471
+
472
+ # Wire up event handlers
473
+ high_btn.click(
474
+ fn=filter_high,
475
+ outputs=[user_table, filtered_data_state, full_data_state, filter_status]
476
+ )
477
+
478
+ medium_btn.click(
479
+ fn=filter_medium,
480
+ outputs=[user_table, filtered_data_state, full_data_state, filter_status]
481
+ )
482
+
483
+ low_btn.click(
484
+ fn=filter_low,
485
+ outputs=[user_table, filtered_data_state, full_data_state, filter_status]
486
+ )
487
+
488
+ all_btn.click(
489
+ fn=filter_all,
490
+ outputs=[user_table, filtered_data_state, full_data_state, filter_status]
491
+ )
492
+
493
+ search_box.change(
494
+ fn=search_users,
495
+ inputs=[search_box, filtered_data_state],
496
+ outputs=[user_table]
497
+ )
498
+
499
+ user_table.select(
500
+ fn=show_detail,
501
+ inputs=[full_data_state],
502
+ outputs=[user_detail]
503
+ )
504
+
505
+ return demo
506
+
507
+
508
+ if __name__ == "__main__":
509
+ logger.info("Starting User Interaction Analysis Dashboard...")
510
+ demo = create_dashboard()
511
+ demo.launch(
512
+ server_name="0.0.0.0",
513
+ server_port=7861,
514
+ share=False
515
+ )
516
+