chinmayjha commited on
Commit
8223f74
·
unverified ·
1 Parent(s): d8a714e

Enhanced conversation analysis UI with customer details and migrated to Keshav MongoDB

Browse files

- Consolidated customer info (user_id, region, country, team_size) into single column
- Updated MongoDB configuration to point to Keshav's instance
- Migrated test_conversation_documents and rag_conversations collections
- Enhanced conversation table with search and filtering capabilities
- Improved UI layout with collapsible sections for sources and tools
- Added conversation analysis pipeline integration
- Updated retriever configuration for conversation data

.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
app.py CHANGED
@@ -20,7 +20,7 @@ from second_brain_online import opik_utils
20
  def main():
21
  """Main function for Hugging Face Space deployment."""
22
  # Set default values for HF Spaces
23
- retriever_config_path = os.getenv("RETRIEVER_CONFIG_PATH", "configs/compute_rag_vector_index_openai_contextual_simple.yaml")
24
 
25
  print("🚀 Starting Second Brain AI Assistant...")
26
  print(f"📁 Using retriever config: {retriever_config_path}")
 
20
  def main():
21
  """Main function for Hugging Face Space deployment."""
22
  # Set default values for HF Spaces
23
+ retriever_config_path = os.getenv("RETRIEVER_CONFIG_PATH", "configs/compute_rag_vector_index_conversations.yaml")
24
 
25
  print("🚀 Starting Second Brain AI Assistant...")
26
  print(f"📁 Using retriever config: {retriever_config_path}")
config.py CHANGED
@@ -44,11 +44,12 @@ class Settings(BaseSettings):
44
  description="Name of the MongoDB database.",
45
  )
46
  MONGODB_COLLECTION_NAME: str = Field(
47
- default="rag_insights_test",
48
  description="Name of the MongoDB collection for RAG documents.",
49
  )
50
  MONGODB_URI: str = Field(
51
- default="mongodb+srv://keshavchhaparia:bUSBXeVCGWDyQhDG@saaslabs.awtivxf.mongodb.net/?retryWrites=true&w=majority&appName=saaslabs",
 
52
  description="Connection URI for the MongoDB Atlas instance.",
53
  )
54
 
 
44
  description="Name of the MongoDB database.",
45
  )
46
  MONGODB_COLLECTION_NAME: str = Field(
47
+ default="rag_conversations",
48
  description="Name of the MongoDB collection for RAG documents.",
49
  )
50
  MONGODB_URI: str = Field(
51
+ default="mongodb+srv://contextdb:HOqIgSH01CoEiMb1@cluster0.d9cmff.mongodb.net/",
52
+ # default="mongodb+srv://keshavchhaparia:bUSBXeVCGWDyQhDG@saaslabs.awtivxf.mongodb.net/?retryWrites=true&w=majority&appName=saaslabs",
53
  description="Connection URI for the MongoDB Atlas instance.",
54
  )
55
 
compute_rag_vector_index_openai_contextual_simple.yaml → configs/compute_rag_vector_index_conversations.yaml RENAMED
@@ -1,17 +1,17 @@
1
  parameters:
2
- extract_collection_name: raw
3
- fetch_limit: 200
4
- load_collection_name: rag_insights_test
5
- content_quality_score_threshold: 0.6
6
  retriever_type: contextual
7
  embedding_model_id: text-embedding-3-small
8
  embedding_model_type: openai
9
  embedding_model_dim: 1536
10
  chunk_size: 640
11
  contextual_summarization_type: contextual
12
- contextual_agent_model_id: gpt-4o
13
- contextual_agent_max_characters: 128
14
  mock: false
15
- processing_batch_size: 2
16
  processing_max_workers: 2
17
- device: mps # or cuda (for Nvidia GPUs) or mps (for Apple M1/M2/M3 chips)
 
1
  parameters:
2
+ extract_collection_name: test_conversation_documents
3
+ fetch_limit: 0 # No limit - get all conversations
4
+ load_collection_name: rag_conversations
5
+ content_quality_score_threshold: 0.0
6
  retriever_type: contextual
7
  embedding_model_id: text-embedding-3-small
8
  embedding_model_type: openai
9
  embedding_model_dim: 1536
10
  chunk_size: 640
11
  contextual_summarization_type: contextual
12
+ contextual_agent_model_id: gpt-4o-mini
13
+ contextual_agent_max_characters: 200
14
  mock: false
15
+ processing_batch_size: 5
16
  processing_max_workers: 2
17
+ device: mps # or cuda (for Nvidia GPUs) or mps (for Apple M1/M2/M3 chips)
configs/compute_rag_vector_index_openai_contextual_reranked.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ parameters:
2
+ retriever_type: contextual_reranked # Enable re-ranking
3
+ embedding_model_id: text-embedding-3-small
4
+ embedding_model_type: openai
5
+ embedding_model_dim: 1536
6
+ device: mps # or cuda (for Nvidia GPUs) or mps (for Apple M1/M2/M3 chips)
7
+
8
+ # Re-ranking parameters
9
+ enable_reranking: true
10
+ rerank_model_name: "cross-encoder/ms-marco-MiniLM-L-2-v2"
11
+ stage1_limit: 50 # Retrieve 50 candidates in stage 1
12
+ final_k: 10 # Return top 10 after re-ranking
configs/compute_rag_vector_index_openai_contextual_simple.yaml CHANGED
@@ -1,7 +1,7 @@
1
  parameters:
2
- extract_collection_name: raw
3
  fetch_limit: 200
4
- load_collection_name: rag_insights_test
5
  content_quality_score_threshold: 0.6
6
  retriever_type: contextual
7
  embedding_model_id: text-embedding-3-small
 
1
  parameters:
2
+ extract_collection_name: test_intercom_data
3
  fetch_limit: 200
4
+ load_collection_name: rag_intercom
5
  content_quality_score_threshold: 0.6
6
  retriever_type: contextual
7
  embedding_model_id: text-embedding-3-small
conversation_analysis_app.py DELETED
@@ -1,45 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Hugging Face Space app for Conversation Analysis Dashboard.
4
-
5
- This app displays conversation analysis results in a tabular format,
6
- showing insights, summaries, and follow-up emails for all conversations
7
- from the test_intercom_data collection.
8
- """
9
-
10
- import os
11
- import sys
12
- from pathlib import Path
13
-
14
- # Add paths
15
- sys.path.append('.')
16
- sys.path.append('src')
17
-
18
- from second_brain_online.application.ui.conversation_analysis_ui import ConversationAnalysisUI
19
-
20
- def main():
21
- """Main function for HF Space deployment."""
22
- print("🚀 Starting Conversation Analysis Dashboard...")
23
- print("📊 Loading conversation analysis data from MongoDB...")
24
-
25
- try:
26
- # Initialize UI
27
- ui = ConversationAnalysisUI()
28
-
29
- print("✅ UI initialized successfully")
30
- print("🌐 Launching Gradio interface...")
31
-
32
- # Launch the interface
33
- ui.launch(
34
- server_name="0.0.0.0",
35
- server_port=7860,
36
- share=True,
37
- show_error=True
38
- )
39
-
40
- except Exception as e:
41
- print(f"❌ Error starting the application: {e}")
42
- raise
43
-
44
- if __name__ == "__main__":
45
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
correct_init.py DELETED
@@ -1,9 +0,0 @@
1
- from . import agents, rag
2
-
3
- # Optional import for evaluation - may cause issues in some environments
4
- try:
5
- from .evaluation import evaluate
6
- __all__ = ["rag", "agents", "evaluate"]
7
- except ImportError as e:
8
- print(f"Warning: Could not import evaluation module: {e}")
9
- __all__ = ["rag", "agents"]
 
 
 
 
 
 
 
 
 
 
init_fixed.py DELETED
@@ -1,9 +0,0 @@
1
- from . import agents, rag
2
-
3
- # Optional import for evaluation - may cause issues in some environments
4
- try:
5
- from .evaluation import evaluate
6
- __all__ = ["rag", "agents", "evaluate"]
7
- except ImportError as e:
8
- print(f"Warning: Could not import evaluation module: {e}")
9
- __all__ = ["rag", "agents"]
 
 
 
 
 
 
 
 
 
 
migrate_mongodb_data.py DELETED
@@ -1,139 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Script to migrate test_intercom_data from contextdb instance to keshavchhaparia instance.
4
- """
5
-
6
- import sys
7
- from pymongo import MongoClient
8
- from loguru import logger
9
-
10
- # Source MongoDB (contextdb instance)
11
- SOURCE_URI = "mongodb+srv://contextdb:HOqIgSH01CoEiMb1@cluster0.d9cmff.mongodb.net/"
12
- SOURCE_DB = "second_brain_course"
13
- SOURCE_COLLECTION = "test_intercom_data"
14
-
15
- # Target MongoDB (keshavchhaparia instance)
16
- TARGET_URI = "mongodb+srv://keshavchhaparia:bUSBXeVCGWDyQhDG@saaslabs.awtivxf.mongodb.net/"
17
- TARGET_DB = "second_brain_course"
18
- TARGET_COLLECTION = "test_intercom_data"
19
-
20
- def migrate_data():
21
- """Migrate test_intercom_data collection from source to target MongoDB."""
22
-
23
- logger.info("🚀 Starting MongoDB data migration...")
24
-
25
- # Connect to source MongoDB
26
- logger.info(f"📡 Connecting to source MongoDB: {SOURCE_URI}")
27
- try:
28
- source_client = MongoClient(SOURCE_URI)
29
- source_db = source_client[SOURCE_DB]
30
- source_collection = source_db[SOURCE_COLLECTION]
31
- logger.info("✅ Connected to source MongoDB")
32
- except Exception as e:
33
- logger.error(f"❌ Failed to connect to source MongoDB: {e}")
34
- return False
35
-
36
- # Connect to target MongoDB
37
- logger.info(f"📡 Connecting to target MongoDB: {TARGET_URI}")
38
- try:
39
- target_client = MongoClient(TARGET_URI)
40
- target_db = target_client[TARGET_DB]
41
- target_collection = target_db[TARGET_COLLECTION]
42
- logger.info("✅ Connected to target MongoDB")
43
- except Exception as e:
44
- logger.error(f"❌ Failed to connect to target MongoDB: {e}")
45
- return False
46
-
47
- try:
48
- # Get document count from source
49
- source_count = source_collection.count_documents({})
50
- logger.info(f"📊 Source collection has {source_count} documents")
51
-
52
- if source_count == 0:
53
- logger.warning("⚠️ Source collection is empty, nothing to migrate")
54
- return True
55
-
56
- # Delete existing target collection
57
- logger.info(f"🗑️ Deleting existing target collection: {TARGET_COLLECTION}")
58
- target_collection.drop()
59
- logger.info("✅ Target collection deleted")
60
-
61
- # Copy documents from source to target
62
- logger.info("📋 Copying documents from source to target...")
63
-
64
- # Process in batches to avoid memory issues
65
- batch_size = 100
66
- total_copied = 0
67
-
68
- for skip in range(0, source_count, batch_size):
69
- # Get batch of documents
70
- documents = list(source_collection.find().skip(skip).limit(batch_size))
71
-
72
- if documents:
73
- # Insert batch into target
74
- target_collection.insert_many(documents)
75
- total_copied += len(documents)
76
- logger.info(f"📦 Copied batch: {len(documents)} documents (Total: {total_copied}/{source_count})")
77
-
78
- # Verify migration
79
- target_count = target_collection.count_documents({})
80
- logger.info(f"✅ Migration completed! Target collection has {target_count} documents")
81
-
82
- if target_count == source_count:
83
- logger.info("🎉 Migration successful - document counts match!")
84
- return True
85
- else:
86
- logger.error(f"❌ Migration failed - document count mismatch: {target_count} vs {source_count}")
87
- return False
88
-
89
- except Exception as e:
90
- logger.error(f"❌ Migration failed: {e}")
91
- return False
92
-
93
- finally:
94
- # Close connections
95
- source_client.close()
96
- target_client.close()
97
- logger.info("🔌 MongoDB connections closed")
98
-
99
- def verify_migration():
100
- """Verify the migration was successful."""
101
- logger.info("🔍 Verifying migration...")
102
-
103
- try:
104
- # Connect to target MongoDB
105
- target_client = MongoClient(TARGET_URI)
106
- target_db = target_client[TARGET_DB]
107
- target_collection = target_db[TARGET_COLLECTION]
108
-
109
- # Get sample documents
110
- sample_docs = list(target_collection.find().limit(3))
111
- logger.info(f"📋 Sample documents in target collection:")
112
-
113
- for i, doc in enumerate(sample_docs, 1):
114
- conversation_id = doc.get('metadata', {}).get('properties', {}).get('conversation_id', 'N/A')
115
- has_analysis = 'conversation_analysis' in doc
116
- quality_score = doc.get('content_quality_score', 'N/A')
117
- logger.info(f" {i}. Conversation ID: {conversation_id}, Has Analysis: {has_analysis}, Quality: {quality_score}")
118
-
119
- target_client.close()
120
- logger.info("✅ Verification completed")
121
-
122
- except Exception as e:
123
- logger.error(f"❌ Verification failed: {e}")
124
-
125
- if __name__ == "__main__":
126
- logger.info("=" * 60)
127
- logger.info("🔄 MongoDB Data Migration Script")
128
- logger.info("=" * 60)
129
-
130
- # Run migration
131
- success = migrate_data()
132
-
133
- if success:
134
- # Verify migration
135
- verify_migration()
136
- logger.info("🎉 Migration completed successfully!")
137
- else:
138
- logger.error("❌ Migration failed!")
139
- sys.exit(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pyproject.toml CHANGED
@@ -26,6 +26,7 @@ dependencies = [
26
  "comet_ml>=3.47.6",
27
  "langchain-huggingface>=0.1.2",
28
  "huggingface-hub>=0.27.1",
 
29
  ]
30
 
31
  [dependency-groups]
 
26
  "comet_ml>=3.47.6",
27
  "langchain-huggingface>=0.1.2",
28
  "huggingface-hub>=0.27.1",
29
+ "sentence-transformers>=3.0.0",
30
  ]
31
 
32
  [dependency-groups]
src/second_brain_online/application/agents/tools/mongodb_retriever.py CHANGED
@@ -44,6 +44,10 @@ class MongoDBRetrieverTool(Tool):
44
  retriever_type=config["retriever_type"],
45
  k=5,
46
  device=config["device"],
 
 
 
 
47
  )
48
 
49
  @track(name="MongoDBRetrieverTool.forward")
 
44
  retriever_type=config["retriever_type"],
45
  k=5,
46
  device=config["device"],
47
+ enable_reranking=config.get("enable_reranking", False),
48
+ rerank_model_name=config.get("rerank_model_name", "cross-encoder/ms-marco-MiniLM-L-2-v2"),
49
+ stage1_limit=config.get("stage1_limit", 50),
50
+ final_k=config.get("final_k", 10),
51
  )
52
 
53
  @track(name="MongoDBRetrieverTool.forward")
src/second_brain_online/application/rag/retrievers.py CHANGED
@@ -13,9 +13,11 @@ from .embeddings import EmbeddingModelType, EmbeddingsModel, get_embedding_model
13
  from .splitters import get_splitter
14
 
15
  # Add these type definitions at the top of the file
16
- RetrieverType = Literal["contextual", "parent"]
17
  RetrieverModel = Union[
18
- MongoDBAtlasHybridSearchRetriever, MongoDBAtlasParentDocumentRetriever
 
 
19
  ]
20
 
21
 
@@ -25,6 +27,10 @@ def get_retriever(
25
  retriever_type: RetrieverType = "contextual",
26
  k: int = 3,
27
  device: str = "cpu",
 
 
 
 
28
  ) -> RetrieverModel:
29
  logger.info(
30
  f"Getting '{retriever_type}' retriever for '{embedding_model_type}' - '{embedding_model_id}' on '{device}' "
@@ -35,13 +41,36 @@ def get_retriever(
35
  embedding_model_id, embedding_model_type, device
36
  )
37
 
38
- if retriever_type == "contextual":
39
- return get_hybrid_search_retriever(embedding_model, k)
40
- elif retriever_type == "parent":
41
- return get_parent_document_retriever(embedding_model, k)
 
 
 
 
 
 
 
 
 
42
  else:
43
  raise ValueError(f"Invalid retriever type: {retriever_type}")
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  def get_hybrid_search_retriever(
47
  embedding_model: EmbeddingsModel, k: int
 
13
  from .splitters import get_splitter
14
 
15
  # Add these type definitions at the top of the file
16
+ RetrieverType = Literal["contextual", "parent", "contextual_reranked", "parent_reranked"]
17
  RetrieverModel = Union[
18
+ MongoDBAtlasHybridSearchRetriever,
19
+ MongoDBAtlasParentDocumentRetriever,
20
+ "RerankingRetriever"
21
  ]
22
 
23
 
 
27
  retriever_type: RetrieverType = "contextual",
28
  k: int = 3,
29
  device: str = "cpu",
30
+ enable_reranking: bool = False,
31
+ rerank_model_name: str = "cross-encoder/ms-marco-MiniLM-L-2-v2",
32
+ stage1_limit: int = 50,
33
+ final_k: int = 10,
34
  ) -> RetrieverModel:
35
  logger.info(
36
  f"Getting '{retriever_type}' retriever for '{embedding_model_type}' - '{embedding_model_id}' on '{device}' "
 
41
  embedding_model_id, embedding_model_type, device
42
  )
43
 
44
+ # Determine base retriever type
45
+ base_retriever_type = retriever_type
46
+ if retriever_type in ["contextual_reranked", "parent_reranked"]:
47
+ base_retriever_type = retriever_type.replace("_reranked", "")
48
+ enable_reranking = True
49
+ else:
50
+ enable_reranking = enable_reranking
51
+
52
+ # Create base retriever
53
+ if base_retriever_type == "contextual":
54
+ base_retriever = get_hybrid_search_retriever(embedding_model, k)
55
+ elif base_retriever_type == "parent":
56
+ base_retriever = get_parent_document_retriever(embedding_model, k)
57
  else:
58
  raise ValueError(f"Invalid retriever type: {retriever_type}")
59
 
60
+ # Wrap with re-ranking if enabled
61
+ if enable_reranking:
62
+ from second_brain_offline.application.rag.reranker import RerankingRetriever
63
+ logger.info(f"Enabling re-ranking with model: {rerank_model_name}")
64
+ logger.info(f"Stage 1 limit: {stage1_limit}, Final k: {final_k}")
65
+ return RerankingRetriever(
66
+ base_retriever=base_retriever,
67
+ rerank_model_name=rerank_model_name,
68
+ stage1_limit=stage1_limit,
69
+ final_k=final_k
70
+ )
71
+
72
+ return base_retriever
73
+
74
 
75
  def get_hybrid_search_retriever(
76
  embedding_model: EmbeddingsModel, k: int
src/second_brain_online/application/ui/custom_gradio_ui.py CHANGED
@@ -1,18 +1,40 @@
1
  import json
2
  import re
3
- from typing import Any, Dict, List, Tuple
 
4
 
5
  import gradio as gr
 
 
6
  from smolagents import ToolCallingAgent
7
 
 
 
8
 
9
  class CustomGradioUI:
10
  """Custom Gradio UI for better formatting of agent responses with source attribution."""
11
 
12
  def __init__(self, agent: ToolCallingAgent):
13
  self.agent = agent
 
 
 
 
14
  self.setup_ui()
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def setup_ui(self):
17
  """Setup the Gradio interface with custom components."""
18
  with gr.Blocks(
@@ -68,8 +90,31 @@ class CustomGradioUI:
68
  with gr.Row():
69
  with gr.Column():
70
  self.answer_output = gr.HTML(label="Answer")
71
- self.sources_output = gr.HTML(label="Sources")
72
- self.tools_output = gr.HTML(label="Tools Used")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  with gr.Accordion("🔍 Debug: Raw Response", open=False):
75
  self.debug_output = gr.Textbox(
@@ -83,19 +128,33 @@ class CustomGradioUI:
83
  self.submit_btn.click(
84
  fn=self.process_query,
85
  inputs=[self.query_input],
86
- outputs=[self.answer_output, self.sources_output, self.tools_output, self.debug_output]
87
  )
88
 
89
  self.query_input.submit(
90
  fn=self.process_query,
91
  inputs=[self.query_input],
92
- outputs=[self.answer_output, self.sources_output, self.tools_output, self.debug_output]
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  )
94
 
95
- def process_query(self, query: str) -> Tuple[str, str, str, str]:
96
  """Process the user query and return formatted response components."""
97
  if not query.strip():
98
- return "", "", "", ""
 
99
 
100
  try:
101
  # Run the agent
@@ -127,11 +186,14 @@ class CustomGradioUI:
127
  tools_html = self.format_tools(tools_used)
128
  debug_text = str(result)
129
 
130
- return answer_html, sources_html, tools_html, debug_text
 
 
 
131
 
132
  except Exception as e:
133
  error_msg = f"<div style='color: #dc3545; padding: 12px; border: 1px solid #f5c6cb; border-radius: 4px; background-color: #f8d7da;'>Error: {str(e)}</div>"
134
- return error_msg, "", "", str(e)
135
 
136
  def parse_agent_response(self, result: Any, agent_logs: List = None) -> Tuple[str, List[Dict], List[str]]:
137
  """Parse the agent response to extract answer, sources, and tools used."""
@@ -173,10 +235,14 @@ class CustomGradioUI:
173
 
174
  # Extract sources from observations
175
  if hasattr(step, 'observations') and step.observations:
 
 
176
  # Look for complete document blocks with all content
177
  document_pattern = r'<document id="(\d+)">\s*<title>(.*?)</title>\s*<date>(.*?)</date>\s*<contextual_summary>(.*?)</contextual_summary>\s*<marketing_insights>(.*?)</marketing_insights>\s*<content>(.*?)</content>'
178
  document_matches = re.findall(document_pattern, step.observations, re.DOTALL)
179
 
 
 
180
  for doc_id, doc_title, doc_date, contextual_summary, marketing_insights, content in document_matches:
181
  # Clean up the basic fields
182
  clean_title = doc_title.strip()
@@ -209,6 +275,40 @@ class CustomGradioUI:
209
  "key_findings": key_findings,
210
  "quotes": quotes
211
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
  # Fallback: Try to extract from result string if no logs provided
214
  if not agent_logs:
@@ -311,9 +411,9 @@ class CustomGradioUI:
311
  def format_sources(self, sources: List[Dict]) -> str:
312
  """Format the sources with rich information including key findings and marketing insights."""
313
  if not sources:
314
- return "<div><h3>📚 Sources</h3><p>No sources found.</p></div>"
315
 
316
- sources_html = "<div><h3>📚 Sources</h3>"
317
 
318
  for i, source in enumerate(sources, 1):
319
  title = source.get("title", "Unknown")
@@ -369,9 +469,9 @@ class CustomGradioUI:
369
  def format_tools(self, tools_used: List[str]) -> str:
370
  """Format the tools used with proper HTML structure."""
371
  if not tools_used:
372
- return "<div><h3>🛠️ Tools Used</h3><p>No tools used.</p></div>"
373
 
374
- tools_html = "<div><h3>🛠️ Tools Used</h3>"
375
 
376
  for tool in tools_used:
377
  tools_html += f"""
@@ -383,6 +483,276 @@ class CustomGradioUI:
383
  tools_html += "</div>"
384
  return tools_html
385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  def launch(self, **kwargs):
387
  """Launch the Gradio interface."""
388
  return self.interface.launch(**kwargs)
 
1
  import json
2
  import re
3
+ from typing import Any, Dict, List, Tuple, Optional
4
+ from datetime import datetime
5
 
6
  import gradio as gr
7
+ import pandas as pd
8
+ from pymongo import MongoClient
9
  from smolagents import ToolCallingAgent
10
 
11
+ from second_brain_online.config import settings
12
+
13
 
14
  class CustomGradioUI:
15
  """Custom Gradio UI for better formatting of agent responses with source attribution."""
16
 
17
  def __init__(self, agent: ToolCallingAgent):
18
  self.agent = agent
19
+ self.mongodb_client = None
20
+ self.database = None
21
+ self.conversation_collection = None
22
+ self.setup_mongodb()
23
  self.setup_ui()
24
 
25
+ def setup_mongodb(self):
26
+ """Setup MongoDB connection."""
27
+ try:
28
+ self.mongodb_client = MongoClient(settings.MONGODB_URI)
29
+ self.database = self.mongodb_client[settings.MONGODB_DATABASE_NAME]
30
+ self.conversation_collection = self.database["test_conversation_documents"]
31
+ print("✅ MongoDB connection established successfully")
32
+ except Exception as e:
33
+ print(f"❌ Failed to connect to MongoDB: {e}")
34
+ self.mongodb_client = None
35
+ self.database = None
36
+ self.conversation_collection = None
37
+
38
  def setup_ui(self):
39
  """Setup the Gradio interface with custom components."""
40
  with gr.Blocks(
 
90
  with gr.Row():
91
  with gr.Column():
92
  self.answer_output = gr.HTML(label="Answer")
93
+
94
+ with gr.Accordion("📊 Conversations", open=False):
95
+ with gr.Row():
96
+ self.conversation_search = gr.Textbox(
97
+ label="Search Conversations",
98
+ placeholder="Search by conversation ID, customer info, summary, or key findings...",
99
+ scale=4
100
+ )
101
+ self.clear_search_btn = gr.Button("Clear", scale=1)
102
+
103
+ self.conversation_table = gr.Dataframe(
104
+ headers=["Conversation ID", "Customer Info", "Summary", "Key Findings", "Follow-up Email"],
105
+ datatype=["str", "str", "str", "str", "str"],
106
+ interactive=False,
107
+ label="Available Conversations",
108
+ wrap=True,
109
+ max_height=400,
110
+ value=self.load_conversations()
111
+ )
112
+
113
+ with gr.Accordion("📚 Sources", open=False):
114
+ self.sources_output = gr.HTML(label="Sources")
115
+
116
+ with gr.Accordion("🛠️ Tools Used", open=False):
117
+ self.tools_output = gr.HTML(label="Tools Used")
118
 
119
  with gr.Accordion("🔍 Debug: Raw Response", open=False):
120
  self.debug_output = gr.Textbox(
 
128
  self.submit_btn.click(
129
  fn=self.process_query,
130
  inputs=[self.query_input],
131
+ outputs=[self.answer_output, self.sources_output, self.tools_output, self.debug_output, self.conversation_table]
132
  )
133
 
134
  self.query_input.submit(
135
  fn=self.process_query,
136
  inputs=[self.query_input],
137
+ outputs=[self.answer_output, self.sources_output, self.tools_output, self.debug_output, self.conversation_table]
138
+ )
139
+
140
+ # Conversation search handlers
141
+ self.conversation_search.change(
142
+ fn=self.filter_conversations,
143
+ inputs=[self.conversation_search],
144
+ outputs=[self.conversation_table]
145
+ )
146
+
147
+ self.clear_search_btn.click(
148
+ fn=self.clear_conversation_search,
149
+ inputs=[],
150
+ outputs=[self.conversation_search, self.conversation_table]
151
  )
152
 
153
+ def process_query(self, query: str) -> Tuple[str, str, str, str, pd.DataFrame]:
154
  """Process the user query and return formatted response components."""
155
  if not query.strip():
156
+ # Clear all outputs when query is empty
157
+ return "", "", "", "", self.load_conversations()
158
 
159
  try:
160
  # Run the agent
 
186
  tools_html = self.format_tools(tools_used)
187
  debug_text = str(result)
188
 
189
+ # Filter conversations based on sources used
190
+ filtered_conversations = self.filter_conversations_by_sources(sources)
191
+
192
+ return answer_html, sources_html, tools_html, debug_text, filtered_conversations
193
 
194
  except Exception as e:
195
  error_msg = f"<div style='color: #dc3545; padding: 12px; border: 1px solid #f5c6cb; border-radius: 4px; background-color: #f8d7da;'>Error: {str(e)}</div>"
196
+ return error_msg, "", "", str(e), self.load_conversations()
197
 
198
  def parse_agent_response(self, result: Any, agent_logs: List = None) -> Tuple[str, List[Dict], List[str]]:
199
  """Parse the agent response to extract answer, sources, and tools used."""
 
235
 
236
  # Extract sources from observations
237
  if hasattr(step, 'observations') and step.observations:
238
+ print(f"DEBUG: Processing observations: {step.observations[:500]}...")
239
+
240
  # Look for complete document blocks with all content
241
  document_pattern = r'<document id="(\d+)">\s*<title>(.*?)</title>\s*<date>(.*?)</date>\s*<contextual_summary>(.*?)</contextual_summary>\s*<marketing_insights>(.*?)</marketing_insights>\s*<content>(.*?)</content>'
242
  document_matches = re.findall(document_pattern, step.observations, re.DOTALL)
243
 
244
+ print(f"DEBUG: Found {len(document_matches)} document matches with full pattern")
245
+
246
  for doc_id, doc_title, doc_date, contextual_summary, marketing_insights, content in document_matches:
247
  # Clean up the basic fields
248
  clean_title = doc_title.strip()
 
275
  "key_findings": key_findings,
276
  "quotes": quotes
277
  })
278
+
279
+ # Fallback: Look for simpler document patterns if the full pattern didn't match
280
+ if not document_matches:
281
+ print("DEBUG: Trying fallback document patterns...")
282
+
283
+ # Pattern 1: Simple document with ID and title
284
+ simple_pattern = r'<document id="(\d+)">\s*<title>(.*?)</title>'
285
+ simple_matches = re.findall(simple_pattern, step.observations, re.DOTALL)
286
+ print(f"DEBUG: Found {len(simple_matches)} simple document matches")
287
+
288
+ for doc_id, doc_title in simple_matches:
289
+ sources.append({
290
+ "id": doc_id,
291
+ "title": doc_title.strip(),
292
+ "date": "",
293
+ "summary": "",
294
+ "key_findings": [],
295
+ "quotes": []
296
+ })
297
+
298
+ # Pattern 2: Look for conversation IDs in the content
299
+ conv_id_pattern = r'conversation[_\s]*id[:\s]*(\d+)'
300
+ conv_id_matches = re.findall(conv_id_pattern, step.observations, re.IGNORECASE)
301
+ print(f"DEBUG: Found {len(conv_id_matches)} conversation ID matches: {conv_id_matches}")
302
+
303
+ for conv_id in conv_id_matches:
304
+ sources.append({
305
+ "id": conv_id,
306
+ "title": f"Conversation {conv_id}",
307
+ "date": "",
308
+ "summary": "",
309
+ "key_findings": [],
310
+ "quotes": []
311
+ })
312
 
313
  # Fallback: Try to extract from result string if no logs provided
314
  if not agent_logs:
 
411
  def format_sources(self, sources: List[Dict]) -> str:
412
  """Format the sources with rich information including key findings and marketing insights."""
413
  if not sources:
414
+ return "<div><p>No sources found.</p></div>"
415
 
416
+ sources_html = "<div>"
417
 
418
  for i, source in enumerate(sources, 1):
419
  title = source.get("title", "Unknown")
 
469
  def format_tools(self, tools_used: List[str]) -> str:
470
  """Format the tools used with proper HTML structure."""
471
  if not tools_used:
472
+ return "<div><p>No tools used.</p></div>"
473
 
474
+ tools_html = "<div>"
475
 
476
  for tool in tools_used:
477
  tools_html += f"""
 
483
  tools_html += "</div>"
484
  return tools_html
485
 
486
+ def load_conversations(self, limit: int = 50) -> pd.DataFrame:
487
+ """Load conversations from MongoDB and format for display."""
488
+ if self.conversation_collection is None:
489
+ return pd.DataFrame(columns=["Conversation ID", "Customer Info", "Summary", "Key Findings", "Follow-up Email"])
490
+
491
+ try:
492
+ # Query for documents with conversation_analysis
493
+ pipeline = [
494
+ {"$match": {"conversation_analysis": {"$exists": True}}},
495
+ {"$limit": limit},
496
+ {"$project": {
497
+ "conversation_id": "$metadata.properties.conversation_id",
498
+ "user_id": "$metadata.properties.user_id",
499
+ "icp_region": "$metadata.properties.icp_region",
500
+ "icp_country": "$metadata.properties.icp_country",
501
+ "team_size": "$metadata.properties.team_size",
502
+ "summary": "$conversation_analysis.aggregated_contextual_summary",
503
+ "key_findings": "$conversation_analysis.aggregated_marketing_insights.key_findings",
504
+ "follow_up_email": "$conversation_analysis.follow_up_email"
505
+ }}
506
+ ]
507
+
508
+ docs = list(self.conversation_collection.aggregate(pipeline))
509
+
510
+ data = []
511
+ for doc in docs:
512
+ conversation_id = doc.get("conversation_id", "Unknown")
513
+ user_id = doc.get("user_id", "N/A")
514
+ icp_region = doc.get("icp_region", "N/A")
515
+ icp_country = doc.get("icp_country", "N/A")
516
+ team_size = doc.get("team_size", "N/A")
517
+ summary = doc.get("summary", "No summary available")
518
+ follow_up_email = doc.get("follow_up_email", "No follow-up email available")
519
+
520
+ # Format customer info into a single column
521
+ customer_info_parts = []
522
+ if user_id != "N/A":
523
+ customer_info_parts.append(f"User: {user_id}")
524
+ if icp_region != "N/A":
525
+ customer_info_parts.append(f"Region: {icp_region}")
526
+ if icp_country != "N/A":
527
+ customer_info_parts.append(f"Country: {icp_country}")
528
+ if team_size != "N/A":
529
+ customer_info_parts.append(f"Team Size: {team_size}")
530
+
531
+ customer_info = "\n".join(customer_info_parts) if customer_info_parts else "No customer info available"
532
+
533
+ # Format key findings
534
+ key_findings = doc.get("key_findings", [])
535
+ if key_findings and isinstance(key_findings, list):
536
+ findings_text = "\n".join([f"• {finding.get('finding', '')}" for finding in key_findings[:3]]) # Limit to 3 findings
537
+ if len(key_findings) > 3:
538
+ findings_text += f"\n... and {len(key_findings) - 3} more"
539
+ else:
540
+ findings_text = "No key findings available"
541
+
542
+ # Truncate summary for table display
543
+ if len(summary) > 200:
544
+ summary = summary[:200] + "..."
545
+
546
+ # Truncate follow-up email for table display
547
+ if len(follow_up_email) > 150:
548
+ follow_up_email = follow_up_email[:150] + "..."
549
+
550
+ data.append({
551
+ "Conversation ID": conversation_id,
552
+ "Customer Info": customer_info,
553
+ "Summary": summary,
554
+ "Key Findings": findings_text,
555
+ "Follow-up Email": follow_up_email
556
+ })
557
+
558
+ return pd.DataFrame(data)
559
+
560
+ except Exception as e:
561
+ print(f"Error loading conversations: {e}")
562
+ return pd.DataFrame(columns=["Conversation ID", "Customer Info", "Summary", "Key Findings", "Follow-up Email"])
563
+
564
+ def filter_conversations_by_sources(self, sources: List[Dict]) -> pd.DataFrame:
565
+ """Filter conversations to show only those used in the current query."""
566
+ if not sources or self.conversation_collection is None:
567
+ return self.load_conversations()
568
+
569
+ try:
570
+ # Extract conversation IDs from sources
571
+ source_conversation_ids = set()
572
+
573
+ print(f"DEBUG: Filtering conversations based on {len(sources)} sources")
574
+
575
+ for source in sources:
576
+ print(f"DEBUG: Processing source: {source}")
577
+
578
+ # Try to extract conversation ID from various possible fields
579
+ doc_id = source.get("id", "")
580
+ title = source.get("title", "")
581
+
582
+ # Method 1: Try to extract conversation ID from title (if it contains conversation ID)
583
+ if title and "conversation" in title.lower():
584
+ # Look for conversation ID pattern in title
585
+ import re
586
+ conv_id_match = re.search(r'conversation[_\s]*(\d+)', title, re.IGNORECASE)
587
+ if conv_id_match:
588
+ conv_id = conv_id_match.group(1)
589
+ source_conversation_ids.add(conv_id)
590
+ print(f"DEBUG: Found conversation ID from title: {conv_id}")
591
+ continue
592
+
593
+ # Method 2: Query the RAG collection to find the conversation ID for this document
594
+ if doc_id:
595
+ try:
596
+ # Use the correct collection name for RAG data
597
+ rag_collection = self.database["rag_conversations"]
598
+
599
+ # Try different query patterns
600
+ doc = None
601
+
602
+ # Try by _id if it's a valid ObjectId
603
+ if doc_id.isdigit():
604
+ doc = rag_collection.find_one({"_id": int(doc_id)})
605
+
606
+ if not doc:
607
+ # Try by properties.conversation_id
608
+ doc = rag_collection.find_one({"properties.conversation_id": doc_id})
609
+
610
+ if not doc:
611
+ # Try by conversation_id in properties
612
+ doc = rag_collection.find_one({"properties.conversation_id": str(doc_id)})
613
+
614
+ if doc and "properties" in doc and "conversation_id" in doc["properties"]:
615
+ conv_id = doc["properties"]["conversation_id"]
616
+ if conv_id:
617
+ source_conversation_ids.add(str(conv_id))
618
+ print(f"DEBUG: Found conversation ID from RAG query: {conv_id}")
619
+ else:
620
+ print(f"DEBUG: No conversation ID found for doc_id: {doc_id}")
621
+
622
+ except Exception as e:
623
+ print(f"DEBUG: Error querying RAG collection for doc_id {doc_id}: {e}")
624
+
625
+ print(f"DEBUG: Found {len(source_conversation_ids)} unique conversation IDs: {source_conversation_ids}")
626
+
627
+ if not source_conversation_ids:
628
+ print("DEBUG: No conversation IDs found, returning all conversations")
629
+ return self.load_conversations()
630
+
631
+ # Query for conversations that match the source conversation IDs
632
+ pipeline = [
633
+ {"$match": {
634
+ "conversation_analysis": {"$exists": True},
635
+ "metadata.properties.conversation_id": {"$in": list(source_conversation_ids)}
636
+ }},
637
+ {"$project": {
638
+ "conversation_id": "$metadata.properties.conversation_id",
639
+ "user_id": "$metadata.properties.user_id",
640
+ "icp_region": "$metadata.properties.icp_region",
641
+ "icp_country": "$metadata.properties.icp_country",
642
+ "team_size": "$metadata.properties.team_size",
643
+ "summary": "$conversation_analysis.aggregated_contextual_summary",
644
+ "key_findings": "$conversation_analysis.aggregated_marketing_insights.key_findings",
645
+ "follow_up_email": "$conversation_analysis.follow_up_email"
646
+ }}
647
+ ]
648
+
649
+ docs = list(self.conversation_collection.aggregate(pipeline))
650
+ print(f"DEBUG: Found {len(docs)} matching conversation documents")
651
+
652
+ data = []
653
+ for doc in docs:
654
+ conversation_id = doc.get("conversation_id", "Unknown")
655
+ user_id = doc.get("user_id", "N/A")
656
+ icp_region = doc.get("icp_region", "N/A")
657
+ icp_country = doc.get("icp_country", "N/A")
658
+ team_size = doc.get("team_size", "N/A")
659
+ summary = doc.get("summary", "No summary available")
660
+ follow_up_email = doc.get("follow_up_email", "No follow-up email available")
661
+
662
+ # Format customer info into a single column
663
+ customer_info_parts = []
664
+ if user_id != "N/A":
665
+ customer_info_parts.append(f"User: {user_id}")
666
+ if icp_region != "N/A":
667
+ customer_info_parts.append(f"Region: {icp_region}")
668
+ if icp_country != "N/A":
669
+ customer_info_parts.append(f"Country: {icp_country}")
670
+ if team_size != "N/A":
671
+ customer_info_parts.append(f"Team Size: {team_size}")
672
+
673
+ customer_info = "\n".join(customer_info_parts) if customer_info_parts else "No customer info available"
674
+
675
+ # Format key findings
676
+ key_findings = doc.get("key_findings", [])
677
+ if key_findings and isinstance(key_findings, list):
678
+ findings_text = "\n".join([f"• {finding.get('finding', '')}" for finding in key_findings[:3]])
679
+ if len(key_findings) > 3:
680
+ findings_text += f"\n... and {len(key_findings) - 3} more"
681
+ else:
682
+ findings_text = "No key findings available"
683
+
684
+ # Truncate summary for table display
685
+ if len(summary) > 200:
686
+ summary = summary[:200] + "..."
687
+
688
+ # Truncate follow-up email for table display
689
+ if len(follow_up_email) > 150:
690
+ follow_up_email = follow_up_email[:150] + "..."
691
+
692
+ data.append({
693
+ "Conversation ID": conversation_id,
694
+ "Customer Info": customer_info,
695
+ "Summary": summary,
696
+ "Key Findings": findings_text,
697
+ "Follow-up Email": follow_up_email
698
+ })
699
+
700
+ print(f"DEBUG: Returning {len(data)} filtered conversations")
701
+ return pd.DataFrame(data)
702
+
703
+ except Exception as e:
704
+ print(f"Error filtering conversations: {e}")
705
+ import traceback
706
+ traceback.print_exc()
707
+ return self.load_conversations()
708
+
709
+ def filter_conversations(self, search_term: str) -> pd.DataFrame:
710
+ """Filter conversations based on search term."""
711
+ if not search_term or not search_term.strip():
712
+ return self.load_conversations()
713
+
714
+ try:
715
+ # Load all conversations first
716
+ all_conversations = self.load_conversations(limit=1000) # Load more for filtering
717
+
718
+ if all_conversations.empty:
719
+ return all_conversations
720
+
721
+ # Convert search term to lowercase for case-insensitive search
722
+ search_lower = search_term.lower().strip()
723
+
724
+ # Filter conversations based on search term
725
+ filtered_data = []
726
+ for _, row in all_conversations.iterrows():
727
+ # Search in conversation ID, customer info, summary, key findings, and follow-up email
728
+ conversation_id = str(row.get("Conversation ID", "")).lower()
729
+ customer_info = str(row.get("Customer Info", "")).lower()
730
+ summary = str(row.get("Summary", "")).lower()
731
+ key_findings = str(row.get("Key Findings", "")).lower()
732
+ follow_up_email = str(row.get("Follow-up Email", "")).lower()
733
+
734
+ # Check if search term matches any field
735
+ if (search_lower in conversation_id or
736
+ search_lower in customer_info or
737
+ search_lower in summary or
738
+ search_lower in key_findings or
739
+ search_lower in follow_up_email):
740
+ filtered_data.append(row.to_dict())
741
+
742
+ return pd.DataFrame(filtered_data)
743
+
744
+ except Exception as e:
745
+ print(f"Error filtering conversations: {e}")
746
+ return self.load_conversations()
747
+
748
+ def clear_conversation_search(self) -> Tuple[str, pd.DataFrame]:
749
+ """Clear the search and show all conversations."""
750
+ return "", self.load_conversations()
751
+
752
+ def reset_ui_state(self) -> Tuple[str, str, str, str, pd.DataFrame]:
753
+ """Reset the UI state to show all conversations and clear outputs."""
754
+ return "", "", "", "", self.load_conversations()
755
+
756
  def launch(self, **kwargs):
757
  """Launch the Gradio interface."""
758
  return self.interface.launch(**kwargs)
src/second_brain_online/config.py CHANGED
@@ -44,11 +44,12 @@ class Settings(BaseSettings):
44
  description="Name of the MongoDB database.",
45
  )
46
  MONGODB_COLLECTION_NAME: str = Field(
47
- default="rag_intercom",
48
  description="Name of the MongoDB collection for RAG documents.",
49
  )
50
  MONGODB_URI: str = Field(
51
- default="mongodb+srv://keshavchhaparia:bUSBXeVCGWDyQhDG@saaslabs.awtivxf.mongodb.net/?retryWrites=true&w=majority&appName=saaslabs",
 
52
  description="Connection URI for the MongoDB Atlas instance.",
53
  )
54
 
 
44
  description="Name of the MongoDB database.",
45
  )
46
  MONGODB_COLLECTION_NAME: str = Field(
47
+ default="rag_conversations",
48
  description="Name of the MongoDB collection for RAG documents.",
49
  )
50
  MONGODB_URI: str = Field(
51
+ default="mongodb+srv://keshavchhaparia:bUSBXeVCGWDyQhDG@saaslabs.awtivxf.mongodb.net/",
52
+ # default="mongodb+srv://contextdb:HOqIgSH01CoEiMb1@cluster0.d9cmff.mongodb.net/",
53
  description="Connection URI for the MongoDB Atlas instance.",
54
  )
55
 
temp_init.py DELETED
@@ -1,9 +0,0 @@
1
- from . import agents, rag
2
-
3
- # Optional import for evaluation - may cause issues in some environments
4
- try:
5
- from .evaluation import evaluate
6
- __all__ = ["rag", "agents", "evaluate"]
7
- except ImportError as e:
8
- print(f"Warning: Could not import evaluation module: {e}")
9
- __all__ = ["rag", "agents"]
 
 
 
 
 
 
 
 
 
 
uv.lock CHANGED
@@ -2344,6 +2344,7 @@ dependencies = [
2344
  { name = "pydantic" },
2345
  { name = "pydantic-settings" },
2346
  { name = "pymongo" },
 
2347
  { name = "smolagents" },
2348
  ]
2349
 
@@ -2369,6 +2370,7 @@ requires-dist = [
2369
  { name = "pydantic", specifier = ">=2.8.2" },
2370
  { name = "pydantic-settings", specifier = ">=2.7.0" },
2371
  { name = "pymongo", specifier = ">=4.10.1" },
 
2372
  { name = "smolagents", specifier = ">=1.4.1" },
2373
  ]
2374
 
 
2344
  { name = "pydantic" },
2345
  { name = "pydantic-settings" },
2346
  { name = "pymongo" },
2347
+ { name = "sentence-transformers" },
2348
  { name = "smolagents" },
2349
  ]
2350
 
 
2370
  { name = "pydantic", specifier = ">=2.8.2" },
2371
  { name = "pydantic-settings", specifier = ">=2.7.0" },
2372
  { name = "pymongo", specifier = ">=4.10.1" },
2373
+ { name = "sentence-transformers", specifier = ">=3.0.0" },
2374
  { name = "smolagents", specifier = ">=1.4.1" },
2375
  ]
2376
 
what_can_i_do.py DELETED
@@ -1,60 +0,0 @@
1
- import opik
2
- from smolagents import Tool
3
-
4
-
5
- class WhatCanIDoTool(Tool):
6
- name = "what_can_i_do"
7
- description = """Returns a comprehensive list of available capabilities and topics in the Second Brain system.
8
-
9
- This tool should be used when:
10
- - The user explicitly asks what the system can do
11
- - The user asks about available features or capabilities
12
- - The user seems unsure about what questions they can ask
13
- - The user wants to explore the system's knowledge areas
14
-
15
- This tool should NOT be used when:
16
- - The user asks a specific technical question
17
- - The user already knows what they want to learn about
18
- - The question is about a specific topic covered in the knowledge base"""
19
-
20
- inputs = {
21
- "question": {
22
- "type": "string",
23
- "description": "The user's query about system capabilities. While this parameter is required, the function returns a standard capability list regardless of the specific question."
24
- }
25
- }
26
- output_type = "string"
27
-
28
- @opik.track(name="what_can_i_do")
29
- def forward(self, question: str) -> str:
30
- """Returns a comprehensive list of available capabilities and topics in the Second Brain system."""
31
- return """
32
- You can ask questions about the content in your Second Brain, such as:
33
-
34
- Architecture and Systems:
35
- - What is the feature/training/inference (FTI) architecture?
36
- - How do agentic systems work?
37
- - Detail how does agent memory work in agentic applications?
38
-
39
- LLM Technology:
40
- - What are LLMs?
41
- - What is BERT (Bidirectional Encoder Representations from Transformers)?
42
- - Detail how does RLHF (Reinforcement Learning from Human Feedback) work?
43
- - What are the top LLM frameworks for building applications?
44
- - Write me a paragraph on how can I optimize LLMs during inference?
45
-
46
- RAG and Document Processing:
47
- - What tools are available for processing PDFs for LLMs and RAG?
48
- - What's the difference between vector databases and vector indices?
49
- - How does document chunk overlap affect RAG performance?
50
- - What is chunk reranking and why is it important?
51
- - What are advanced RAG techniques for optimization?
52
- - How can RAG pipelines be evaluated?
53
-
54
- Learning Resources:
55
- - Can you recommend courses on LLMs and RAG?
56
- """
57
-
58
-
59
- # Create an instance for backward compatibility
60
- what_can_i_do = WhatCanIDoTool()