chinmayjha commited on
Commit
1d82c77
·
1 Parent(s): 23323b2

Update entire src directory with fixed __init__.py

Browse files
second_brain_online/__init__.py ADDED
File without changes
second_brain_online/application/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from . import agents, rag
2
+
3
+ # Optional import for evaluation - may cause issues in some environments
4
+ try:
5
+ from .evaluation import evaluate
6
+ __all__ = ["rag", "agents", "evaluate"]
7
+ except ImportError as e:
8
+ print(f"Warning: Could not import evaluation module: {e}")
9
+ __all__ = ["rag", "agents"]
second_brain_online/application/agents/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .agents import extract_tool_responses, get_agent
2
+
3
+ __all__ = ["get_agent", "extract_tool_responses"]
second_brain_online/application/agents/agents.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from typing import Any
3
+
4
+ import opik
5
+ from loguru import logger
6
+ from opik import opik_context
7
+ from smolagents import LiteLLMModel, MessageRole, MultiStepAgent, ToolCallingAgent
8
+
9
+ from second_brain_online.config import settings
10
+
11
+ from .tools import (
12
+ HuggingFaceEndpointSummarizerTool,
13
+ MongoDBRetrieverTool,
14
+ OpenAISummarizerTool,
15
+ what_can_i_do,
16
+ )
17
+
18
+
19
+ def get_agent(retriever_config_path: Path) -> "AgentWrapper":
20
+ agent = AgentWrapper.build_from_smolagents(
21
+ retriever_config_path=retriever_config_path
22
+ )
23
+
24
+ return agent
25
+
26
+
27
+ class AgentWrapper:
28
+ def __init__(self, agent: MultiStepAgent) -> None:
29
+ self.__agent = agent
30
+
31
+ @property
32
+ def input_messages(self) -> list[dict]:
33
+ return self.__agent.input_messages
34
+
35
+ @property
36
+ def agent_name(self) -> str:
37
+ return self.__agent.agent_name
38
+
39
+ @property
40
+ def max_steps(self) -> str:
41
+ return self.__agent.max_steps
42
+
43
+ @classmethod
44
+ def build_from_smolagents(cls, retriever_config_path: Path) -> "AgentWrapper":
45
+ retriever_tool = MongoDBRetrieverTool(config_path=retriever_config_path)
46
+ if settings.USE_HUGGINGFACE_DEDICATED_ENDPOINT:
47
+ logger.warning(
48
+ f"Using Hugging Face dedicated endpoint as the summarizer with URL: {settings.HUGGINGFACE_DEDICATED_ENDPOINT}"
49
+ )
50
+ summarizer_tool = HuggingFaceEndpointSummarizerTool()
51
+ else:
52
+ logger.warning(
53
+ f"Using OpenAI as the summarizer with model: {settings.OPENAI_MODEL_ID}"
54
+ )
55
+ summarizer_tool = OpenAISummarizerTool(stream=False)
56
+
57
+ model = LiteLLMModel(
58
+ model_id=settings.OPENAI_MODEL_ID,
59
+ api_base="https://api.openai.com/v1",
60
+ api_key=settings.OPENAI_API_KEY,
61
+ )
62
+
63
+ agent = ToolCallingAgent(
64
+ tools=[what_can_i_do, retriever_tool], # Remove summarizer - it's redundant
65
+ model=model,
66
+ max_steps=2, # Reduce steps since we removed summarizer
67
+ verbosity_level=2,
68
+ )
69
+
70
+ return cls(agent)
71
+
72
+ @opik.track(name="Agent.run")
73
+ def run(self, task: str, **kwargs) -> Any:
74
+ result = self.__agent.run(task, **kwargs)
75
+
76
+ model = self.__agent.model
77
+ metadata = {
78
+ "system_prompt": self.__agent.system_prompt,
79
+ "system_prompt_template": self.__agent.system_prompt_template,
80
+ "tool_description_template": self.__agent.tool_description_template,
81
+ "tools": self.__agent.tools,
82
+ "model_id": self.__agent.model.model_id,
83
+ "api_base": self.__agent.model.api_base,
84
+ "input_token_count": model.last_input_token_count,
85
+ "output_token_count": model.last_output_token_count,
86
+ }
87
+ if hasattr(self.__agent, "step_number"):
88
+ metadata["step_number"] = self.__agent.step_number
89
+ opik_context.update_current_trace(
90
+ tags=["agent"],
91
+ metadata=metadata,
92
+ )
93
+
94
+ return result
95
+
96
+
97
+ def extract_tool_responses(agent: ToolCallingAgent) -> str:
98
+ """
99
+ Extracts and concatenates all tool response contents with numbered observation delimiters.
100
+
101
+ Args:
102
+ input_messages (List[Dict]): List of message dictionaries containing 'role' and 'content' keys
103
+
104
+ Returns:
105
+ str: Tool response contents separated by numbered observation delimiters
106
+
107
+ Example:
108
+ >>> messages = [
109
+ ... {"role": MessageRole.TOOL_RESPONSE, "content": "First response"},
110
+ ... {"role": MessageRole.USER, "content": "Question"},
111
+ ... {"role": MessageRole.TOOL_RESPONSE, "content": "Second response"}
112
+ ... ]
113
+ >>> extract_tool_responses(messages)
114
+ "-------- OBSERVATION 1 --------\nFirst response\n-------- OBSERVATION 2 --------\nSecond response"
115
+ """
116
+
117
+ tool_responses = [
118
+ msg["content"]
119
+ for msg in agent.input_messages
120
+ if msg["role"] == MessageRole.TOOL_RESPONSE
121
+ ]
122
+
123
+ return "\n".join(
124
+ f"-------- OBSERVATION {i + 1} --------\n{response}"
125
+ for i, response in enumerate(tool_responses)
126
+ )
127
+
128
+
129
+ class OpikAgentMonitorCallback:
130
+ def __init__(self) -> None:
131
+ self.output_state: dict = {}
132
+
133
+ def __call__(self, step_log) -> None:
134
+ input_state = {
135
+ "agent_memory": step_log.agent_memory,
136
+ "tool_calls": step_log.tool_calls,
137
+ }
138
+ self.output_state = {"observations": step_log.observations}
139
+
140
+ self.trace(input_state)
141
+
142
+ @opik.track(name="Callback.agent_step")
143
+ def trace(self, step_log) -> dict:
144
+ return self.output_state
second_brain_online/application/agents/tools/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from .mongodb_retriever import MongoDBRetrieverTool
2
+ from .summarizer import HuggingFaceEndpointSummarizerTool, OpenAISummarizerTool
3
+ from .what_can_i_do import what_can_i_do
4
+
5
+ __all__ = [
6
+ "what_can_i_do",
7
+ "MongoDBRetrieverTool",
8
+ "HuggingFaceEndpointSummarizerTool",
9
+ "OpenAISummarizerTool",
10
+ ]
second_brain_online/application/agents/tools/mongodb_retriever.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+
4
+ import yaml
5
+ from loguru import logger
6
+ from opik import opik_context, track
7
+ from smolagents import Tool
8
+
9
+ from second_brain_online.application.rag import get_retriever
10
+
11
+
12
+ class MongoDBRetrieverTool(Tool):
13
+ name = "mongodb_vector_search_retriever"
14
+ description = """Use this tool to search and retrieve relevant documents from a knowledge base using semantic search.
15
+ This tool performs similarity-based search to find the most relevant documents matching the query.
16
+ Best used when you need to:
17
+ - Find specific information from stored documents
18
+ - Get context about a topic
19
+ - Research historical data or documentation
20
+ The tool will return multiple relevant document snippets."""
21
+
22
+ inputs = {
23
+ "query": {
24
+ "type": "string",
25
+ "description": """The search query to find relevant documents for using semantic search.
26
+ Should be a clear, specific question or statement about the information you're looking for.""",
27
+ }
28
+ }
29
+ output_type = "string"
30
+
31
+ def __init__(self, config_path: Path, **kwargs):
32
+ super().__init__(**kwargs)
33
+
34
+ self.config_path = config_path
35
+ self.retriever = self.__load_retriever(config_path)
36
+
37
+ def __load_retriever(self, config_path: Path):
38
+ config = yaml.safe_load(config_path.read_text())
39
+ config = config["parameters"]
40
+
41
+ return get_retriever(
42
+ embedding_model_id=config["embedding_model_id"],
43
+ embedding_model_type=config["embedding_model_type"],
44
+ retriever_type=config["retriever_type"],
45
+ k=5,
46
+ device=config["device"],
47
+ )
48
+
49
+ @track(name="MongoDBRetrieverTool.forward")
50
+ def forward(self, query: str) -> str:
51
+ if hasattr(self.retriever, "search_kwargs"):
52
+ search_kwargs = self.retriever.search_kwargs
53
+ else:
54
+ try:
55
+ search_kwargs = {
56
+ "fulltext_penalty": self.retriever.fulltext_penalty,
57
+ "vector_score_penalty": self.retriever.vector_penalty,
58
+ "top_k": self.retriever.top_k,
59
+ }
60
+ except AttributeError:
61
+ logger.warning("Could not extract search kwargs from retriever.")
62
+
63
+ search_kwargs = {}
64
+
65
+ opik_context.update_current_trace(
66
+ tags=["agent"],
67
+ metadata={
68
+ "search": search_kwargs,
69
+ "embedding_model_id": self.retriever.vectorstore.embeddings.model,
70
+ },
71
+ )
72
+
73
+ try:
74
+ query = self.__parse_query(query)
75
+ relevant_docs = self.retriever.invoke(query)
76
+
77
+ formatted_docs = []
78
+ for i, doc in enumerate(relevant_docs, 1):
79
+ # Extract metadata
80
+ title = doc.metadata.get("title", "Untitled")
81
+ datetime = doc.metadata.get("datetime", "unknown")
82
+ contextual_summary = doc.metadata.get("contextual_summary", "")
83
+ marketing_insights = doc.metadata.get("marketing_insights", {})
84
+ content = doc.page_content.strip()
85
+
86
+ # Format marketing insights if available
87
+ marketing_insights_text = ""
88
+ if marketing_insights:
89
+ marketing_insights_text = "\n<marketing_insights>\n"
90
+
91
+ # Add quotes
92
+ quotes = marketing_insights.get("quotes", [])
93
+ if quotes:
94
+ marketing_insights_text += "<quotes>\n"
95
+ for quote in quotes:
96
+ marketing_insights_text += f"- \"{quote.get('quote', '')}\" (Sentiment: {quote.get('sentiment', 'Unknown')})\n"
97
+ marketing_insights_text += "</quotes>\n"
98
+
99
+ # Add key findings
100
+ findings = marketing_insights.get("key_findings", [])
101
+ if findings:
102
+ marketing_insights_text += "<key_findings>\n"
103
+ for finding in findings:
104
+ marketing_insights_text += f"- {finding.get('finding', '')} (Impact: {finding.get('impact', 'Unknown')})\n"
105
+ marketing_insights_text += "</key_findings>\n"
106
+
107
+ marketing_insights_text += "</marketing_insights>\n"
108
+
109
+ # Create optimized document structure - truncate content to avoid token overload
110
+ content_preview = content[:500] + "..." if len(content) > 500 else content
111
+ formatted_docs.append(
112
+ f"""
113
+ <document id="{i}">
114
+ <title>{title}</title>
115
+ <date>{datetime}</date>
116
+ <contextual_summary>
117
+ {contextual_summary}
118
+ </contextual_summary>
119
+ {marketing_insights_text}
120
+ <content>
121
+ {content_preview}
122
+ </content>
123
+ </document>
124
+ """
125
+ )
126
+
127
+ result = "\n".join(formatted_docs)
128
+ result = f"""
129
+ <search_results>
130
+ {result}
131
+ </search_results>
132
+ When using context from any document, reference the document title and date for attribution.
133
+ """
134
+ return result
135
+ except Exception:
136
+ logger.opt(exception=True).debug("Error retrieving documents.")
137
+
138
+ return "Error retrieving documents."
139
+
140
+ @track(name="MongoDBRetrieverTool.parse_query")
141
+ def __parse_query(self, query: str) -> str:
142
+ try:
143
+ # Try to parse as JSON first
144
+ query_dict = json.loads(query)
145
+ return query_dict["query"]
146
+ except (json.JSONDecodeError, KeyError):
147
+ # If JSON parsing fails, return the query as-is
148
+ return query
second_brain_online/application/agents/tools/summarizer.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ from opik import track
3
+ from smolagents import Tool
4
+
5
+ from second_brain_online.config import settings
6
+
7
+
8
+ class HuggingFaceEndpointSummarizerTool(Tool):
9
+ name = "huggingface_summarizer"
10
+ description = """Use this tool to summarize a piece of text. Especially useful when you need to summarize a document."""
11
+
12
+ inputs = {
13
+ "text": {
14
+ "type": "string",
15
+ "description": """The text to summarize.""",
16
+ }
17
+ }
18
+ output_type = "string"
19
+
20
+ SYSTEM_PROMPT = """
21
+
22
+ Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
23
+
24
+ ### Instruction:
25
+ You are a helpful assistant specialized in summarizing documents. Generate a concise TL;DR summary in markdown format having a maximum of 512 characters of the key findings from the provided documents, highlighting the most significant insights
26
+
27
+ ### Input:
28
+ {content}
29
+
30
+ ### Response:
31
+ """
32
+
33
+ def __init__(self, *args, **kwargs) -> None:
34
+ super().__init__(*args, **kwargs)
35
+
36
+ assert settings.HUGGINGFACE_ACCESS_TOKEN is not None, (
37
+ "HUGGINGFACE_ACCESS_TOKEN is required to use the dedicated endpoint. Add it to the .env file."
38
+ )
39
+ assert settings.HUGGINGFACE_DEDICATED_ENDPOINT is not None, (
40
+ "HUGGINGFACE_DEDICATED_ENDPOINT is required to use the dedicated endpoint. Add it to the .env file."
41
+ )
42
+
43
+ self.__client = OpenAI(
44
+ base_url=settings.HUGGINGFACE_DEDICATED_ENDPOINT,
45
+ api_key=settings.HUGGINGFACE_ACCESS_TOKEN,
46
+ )
47
+
48
+ @track
49
+ def forward(self, text: str) -> str:
50
+ result = self.__client.chat.completions.create(
51
+ model="tgi",
52
+ messages=[
53
+ {
54
+ "role": "user",
55
+ "content": self.SYSTEM_PROMPT.format(content=text),
56
+ },
57
+ ],
58
+ )
59
+
60
+ return result.choices[0].message.content
61
+
62
+
63
+ class OpenAISummarizerTool(Tool):
64
+ name = "openai_summarizer"
65
+ description = """Use this tool to summarize search results in XML format. This tool is especially useful when you need to analyze multiple documents from search results. The tool will parse XML search results, identify topics that are directly relevant to the user's query, and create a focused summary with document references. It filters out irrelevant topics to ensure the summary directly answers the user's question."""
66
+
67
+ inputs = {
68
+ "text": {
69
+ "type": "string",
70
+ "description": """The text to summarize.""",
71
+ }
72
+ }
73
+ output_type = "string"
74
+
75
+ SYSTEM_PROMPT = """You are an expert document analyst specialized in query-focused summarization.
76
+
77
+ Your task is to analyze search results and create a focused summary that directly answers the user's question.
78
+
79
+ When you receive XML search results, you should:
80
+ 1. Parse ALL documents from the XML structure
81
+ 2. Identify topics that are directly relevant to the user's query
82
+ 3. Filter out irrelevant topics that don't relate to the question
83
+ 4. Group related information by relevant topics
84
+ 5. Extract key insights that directly answer the user's question
85
+ 6. Include document references with titles and dates when available
86
+
87
+ Analysis Guidelines:
88
+ - Focus on information that directly answers the user's question
89
+ - Only include topics that are relevant to the query
90
+ - Use specific document titles and dates from the XML metadata when available
91
+ - Ignore irrelevant information like cookie policies, privacy policies, HTTP errors, etc.
92
+ - Create a well-structured, readable summary
93
+ - Group similar topics together when appropriate
94
+
95
+ Document content:
96
+ {content}
97
+
98
+ Generate a focused summary that directly answers the user's question, organized by relevant topics with document references. Exclude any topics that don't directly relate to the question."""
99
+
100
+ def __init__(self, *args, **kwargs) -> None:
101
+ super().__init__(*args, **kwargs)
102
+
103
+ self.__client = OpenAI(
104
+ base_url="https://api.openai.com/v1",
105
+ api_key=settings.OPENAI_API_KEY,
106
+ )
107
+
108
+ @track
109
+ def forward(self, text: str) -> str:
110
+ result = self.__client.chat.completions.create(
111
+ model=settings.OPENAI_MODEL_ID,
112
+ messages=[
113
+ {
114
+ "role": "system",
115
+ "content": "You are an expert document analyst specialized in query-focused topic-based summarization. You excel at parsing XML search results, identifying relevant topics, and creating structured summaries with proper document references."
116
+ },
117
+ {
118
+ "role": "user",
119
+ "content": self.SYSTEM_PROMPT.format(content=text),
120
+ },
121
+ ],
122
+ temperature=0.1, # Lower temperature for more consistent, focused output
123
+ max_tokens=2000, # Increased token limit for more detailed summaries
124
+ )
125
+
126
+ return result.choices[0].message.content
second_brain_online/application/agents/tools/what_can_i_do.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import opik
2
+ from smolagents import tool
3
+
4
+
5
+ @opik.track(name="what_can_i_do")
6
+ @tool
7
+ def what_can_i_do(question: str) -> str:
8
+ """Returns a comprehensive list of available capabilities and topics in the Second Brain system.
9
+
10
+ This tool should be used when:
11
+ - The user explicitly asks what the system can do
12
+ - The user asks about available features or capabilities
13
+ - The user seems unsure about what questions they can ask
14
+ - The user wants to explore the system's knowledge areas
15
+
16
+ This tool should NOT be used when:
17
+ - The user asks a specific technical question
18
+ - The user already knows what they want to learn about
19
+ - The question is about a specific topic covered in the knowledge base
20
+
21
+ Args:
22
+ question: The user's query about system capabilities. While this parameter is required,
23
+ the function returns a standard capability list regardless of the specific question.
24
+
25
+ Returns:
26
+ str: A formatted string containing categorized lists of example questions and topics
27
+ that users can explore within the Second Brain system.
28
+
29
+ Examples:
30
+ >>> what_can_i_do("What can this system do?")
31
+ >>> what_can_i_do("What kind of questions can I ask?")
32
+ >>> what_can_i_do("Help me understand what I can learn here")
33
+ """
34
+
35
+ return """
36
+ You can ask questions about the content in your Second Brain, such as:
37
+
38
+ Architecture and Systems:
39
+ - What is the feature/training/inference (FTI) architecture?
40
+ - How do agentic systems work?
41
+ - Detail how does agent memory work in agentic applications?
42
+
43
+ LLM Technology:
44
+ - What are LLMs?
45
+ - What is BERT (Bidirectional Encoder Representations from Transformers)?
46
+ - Detail how does RLHF (Reinforcement Learning from Human Feedback) work?
47
+ - What are the top LLM frameworks for building applications?
48
+ - Write me a paragraph on how can I optimize LLMs during inference?
49
+
50
+ RAG and Document Processing:
51
+ - What tools are available for processing PDFs for LLMs and RAG?
52
+ - What's the difference between vector databases and vector indices?
53
+ - How does document chunk overlap affect RAG performance?
54
+ - What is chunk reranking and why is it important?
55
+ - What are advanced RAG techniques for optimization?
56
+ - How can RAG pipelines be evaluated?
57
+
58
+ Learning Resources:
59
+ - Can you recommend courses on LLMs and RAG?
60
+ """
second_brain_online/application/evaluation/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .evaluate import evaluate_agent
2
+ from .summary_density_heuristic import SummaryDensityHeuristic
3
+ from .summary_density_judge import SummaryDensityJudge
4
+
5
+ __all__ = ["evaluate_agent", "SummaryDensityHeuristic", "SummaryDensityJudge"]
second_brain_online/application/evaluation/evaluate.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from opik.evaluation import evaluate
5
+ from opik.evaluation.metrics import AnswerRelevance, Hallucination, Moderation
6
+
7
+ from second_brain_online import opik_utils
8
+ from second_brain_online.application.agents import agents, extract_tool_responses
9
+ from second_brain_online.config import settings
10
+
11
+ from .summary_density_heuristic import SummaryDensityHeuristic
12
+ from .summary_density_judge import SummaryDensityJudge
13
+
14
+ opik_utils.configure()
15
+
16
+
17
+ def evaluate_agent(prompts: list[str], retriever_config_path: Path) -> None:
18
+ assert settings.COMET_API_KEY, (
19
+ "COMET_API_KEY is not set. We need it to track the experiment with Opik."
20
+ )
21
+
22
+ logger.info("Starting evaluation...")
23
+ logger.info(f"Evaluating agent with {len(prompts)} prompts.")
24
+
25
+ def evaluation_task(x: dict) -> dict:
26
+ """Call agentic app logic to evaluate."""
27
+
28
+ agent = agents.get_agent(retriever_config_path=retriever_config_path)
29
+ response = agent.run(x["input"])
30
+ context = extract_tool_responses(agent)
31
+
32
+ return {
33
+ "input": x["input"],
34
+ "context": context,
35
+ "output": response,
36
+ }
37
+
38
+ # Get or create dataset
39
+ dataset_name = "second_brain_rag_agentic_app_evaluation_dataset"
40
+ dataset = opik_utils.get_or_create_dataset(name=dataset_name, prompts=prompts)
41
+
42
+ # Evaluate
43
+ agent = agents.get_agent(retriever_config_path=retriever_config_path)
44
+ experiment_config = {
45
+ "model_id": settings.OPENAI_MODEL_ID,
46
+ "retriever_config_path": retriever_config_path,
47
+ "agent_config": {
48
+ "max_steps": agent.max_steps,
49
+ "agent_name": agent.agent_name,
50
+ },
51
+ }
52
+ scoring_metrics = [
53
+ Hallucination(),
54
+ AnswerRelevance(),
55
+ Moderation(),
56
+ SummaryDensityHeuristic(),
57
+ SummaryDensityJudge(),
58
+ ]
59
+
60
+ if dataset:
61
+ logger.info("Evaluation details:")
62
+ logger.info(f"Dataset: {dataset_name}")
63
+ logger.info(f"Metrics: {[m.__class__.__name__ for m in scoring_metrics]}")
64
+
65
+ evaluate(
66
+ dataset=dataset,
67
+ task=evaluation_task,
68
+ scoring_metrics=scoring_metrics,
69
+ experiment_config=experiment_config,
70
+ task_threads=2,
71
+ )
72
+ else:
73
+ logger.error("Can't run the evaluation as the dataset items are empty.")
second_brain_online/application/evaluation/summary_density_heuristic.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+
3
+ from opik.evaluation.metrics import base_metric, score_result
4
+
5
+
6
+ class SummaryDensityHeuristic(base_metric.BaseMetric):
7
+ """
8
+ A metric that evaluates whether an LLM's output has appropriate length and density.
9
+
10
+ This metric uses an heuristic to determine if the output length is appropriate for the given instruction.
11
+ It returns a normalized score between 0 and 1, where:
12
+ - 0.0 (Poor): Output is either too short and incomplete, or too long with unnecessary information
13
+ - 0.5 (Good): Output has decent length balance but still slightly too short or too long
14
+ - 1.0 (Excellent): Output length is appropriate, answering the question concisely without being verbose
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ name: str = "summary_density_heuristic",
20
+ min_length: int = 128,
21
+ max_length: int = 1024,
22
+ ) -> None:
23
+ self.name = name
24
+ self.min_length = min_length
25
+ self.max_length = max_length
26
+
27
+ def score(
28
+ self, input: str, output: str, **ignored_kwargs: Any
29
+ ) -> score_result.ScoreResult:
30
+ """
31
+ Score the output of an LLM.
32
+
33
+ Args:
34
+ input: The input prompt given to the LLM.
35
+ output: The output of an LLM to score.
36
+ **ignored_kwargs: Any additional keyword arguments.
37
+
38
+ Returns:
39
+ ScoreResult: The computed score with explanation.
40
+ """
41
+
42
+ length_score = self._compute_length_score(output)
43
+
44
+ reason = f"Output length: {len(output)} chars. "
45
+ if length_score == 1.0:
46
+ reason += "Length is within ideal range."
47
+ elif length_score >= 0.5:
48
+ reason += "Length is slightly outside ideal range."
49
+ else:
50
+ reason += "Length is significantly outside ideal range."
51
+
52
+ return score_result.ScoreResult(
53
+ name=self.name,
54
+ value=length_score,
55
+ reason=reason,
56
+ )
57
+
58
+ def _compute_length_score(self, text: str) -> float:
59
+ """
60
+ Compute a score based on text length relative to min and max boundaries.
61
+
62
+ Args:
63
+ text: The text to evaluate.
64
+
65
+ Returns:
66
+ float: A score between 0 and 1, where:
67
+ - 0.0: Text length is significantly outside the boundaries
68
+ - 0.5: Text length is slightly outside the boundaries
69
+ - 1.0: Text length is within the ideal range
70
+ """
71
+ length = len(text)
72
+
73
+ # If length is within bounds, return perfect score
74
+ if self.min_length <= length <= self.max_length:
75
+ return 1.0
76
+
77
+ if length < self.min_length:
78
+ deviation = (self.min_length - length) / self.min_length
79
+ else:
80
+ deviation = (length - self.max_length) / self.max_length
81
+
82
+ # Convert deviation to a score between 0 and 1
83
+ # deviation <= 0.5 -> score between 0.5 and 1.0
84
+ # deviation > 0.5 -> score between 0.0 and 0.5
85
+ score = max(0.0, 1.0 - deviation)
86
+
87
+ return score
second_brain_online/application/evaluation/summary_density_judge.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import Any
3
+
4
+ from opik.evaluation.metrics import base_metric, exceptions, score_result
5
+ from opik.evaluation.models import LiteLLMChatModel
6
+ from pydantic import BaseModel
7
+
8
+ from second_brain_online.config import settings
9
+
10
+
11
+ class LLMJudgeStyleOutputResult(BaseModel):
12
+ score: int
13
+ reason: str
14
+
15
+
16
+ class SummaryDensityJudge(base_metric.BaseMetric):
17
+ """
18
+ A metric that evaluates whether an LLM's output has appropriate length and density.
19
+
20
+ This metric uses another LLM to judge if the output length is appropriate for the given instruction.
21
+ It returns a normalized score between 0 and 1, where:
22
+ - 0.0 (Poor): Output is either too short and incomplete, or too long with unnecessary information
23
+ - 0.5 (Good): Output has decent length balance but still slightly too short or too long
24
+ - 1.0 (Excellent): Output length is appropriate, answering the question concisely without being verbose
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ name: str = "summary_density_judge",
30
+ model_name: str = settings.OPENAI_MODEL_ID,
31
+ ) -> None:
32
+ self.name = name
33
+ self.llm_client = LiteLLMChatModel(model_name=model_name)
34
+ self.prompt_template = """
35
+ You are an impartial expert judge. Evaluate the quality of a given answer to an instruction based on how long the answer it is.
36
+
37
+ How to decide whether the lengths of the answer is appropriate:
38
+ 1 (Poor): Too short, does not answer the question OR too long, it contains too much noise and unrequired information, where the answer could be more concise.
39
+ 2 (Good): Good lengthbalance of the answer, but the answer is still too short OR too long.
40
+ 3 (Excellent): The length of the answer is appropriate, it answers the question and is not too long or too short.
41
+
42
+ Example of bad answer that is too short:
43
+ <answer>
44
+ LangChain, LlamaIndex, Haystack
45
+ </answer>
46
+
47
+ Example of bad answer that is too long:
48
+ <answer>
49
+ LangChain is a powerful and versatile framework designed specifically for building sophisticated LLM applications. It provides comprehensive abstractions for essential components like prompting, memory management, agent behaviors, and chain orchestration. The framework boasts an impressive ecosystem with extensive integrations across various tools and services, making it highly flexible for diverse use cases. However, this extensive functionality comes with a steeper learning curve that might require dedicated time to master.
50
+
51
+ LlamaIndex (which was formerly known as GPTIndex) has carved out a specialized niche in the LLM tooling landscape, focusing primarily on data ingestion and advanced indexing capabilities for Large Language Models. It offers a rich set of sophisticated mechanisms to structure and query your data, including vector stores for semantic similarity search, keyword indices for traditional text matching, and tree indices for hierarchical data organization. While it particularly shines in Retrieval-Augmented Generation (RAG) applications, its comprehensive feature set might be excessive for more straightforward implementation needs.
52
+
53
+ Haystack stands out as a robust end-to-end framework that places particular emphasis on question-answering systems and semantic search capabilities. It provides a comprehensive suite of document processing tools and comes equipped with production-ready pipelines that can be deployed with minimal configuration. The framework includes advanced features like multi-stage retrieval, document ranking, and reader-ranker architectures. While these capabilities make it powerful for complex information retrieval tasks, new users might find the initial configuration and architecture decisions somewhat challenging to navigate.
54
+
55
+ Each of these frameworks brings unique strengths to the table while sharing some overlapping functionality. The choice between them often depends on specific use cases, technical requirements, and team expertise. LangChain offers the broadest general-purpose toolkit, LlamaIndex excels in data handling and RAG, while Haystack provides the most streamlined experience for question-answering systems.
56
+ </answer>
57
+
58
+ Example of excellent answer that is appropriate:
59
+ <answer>
60
+ 1. LangChain is a powerful framework for building LLM applications that provides abstractions for prompting, memory, agents, and chains. It has extensive integrations with various tools and services, making it highly flexible but potentially complex to learn.
61
+ 2. LlamaIndex specializes in data ingestion and indexing for LLMs, offering sophisticated ways to structure and query your data through vector stores, keyword indices, and tree indices. It excels at RAG applications but may be overkill for simpler use cases.
62
+ 3. Haystack is an end-to-end framework focused on question-answering and semantic search, with strong document processing capabilities and ready-to-use pipelines. While powerful, its learning curve can be steep for beginners.
63
+ </answer>
64
+
65
+ Instruction: {input}
66
+
67
+ Answer: {output}
68
+
69
+ Provide your evaluation in JSON format with the following structure:
70
+ {{
71
+ "accuracy": {{
72
+ "reason": "...",
73
+ "score": 0
74
+ }},
75
+ "style": {{
76
+ "reason": "...",
77
+ "score": 0
78
+ }}
79
+ }}
80
+ """
81
+
82
+ def score(self, input: str, output: str, **ignored_kwargs: Any):
83
+ """
84
+ Score the output of an LLM.
85
+
86
+ Args:
87
+ output: The output of an LLM to score.
88
+ **ignored_kwargs: Any additional keyword arguments. This is important so that the metric can be used in the `evaluate` function.
89
+ """
90
+
91
+ prompt = self.prompt_template.format(input=input, output=output)
92
+
93
+ model_output = self.llm_client.generate_string(
94
+ input=prompt, response_format=LLMJudgeStyleOutputResult
95
+ )
96
+
97
+ return self._parse_model_output(model_output)
98
+
99
+ def _parse_model_output(self, content: str) -> score_result.ScoreResult:
100
+ try:
101
+ dict_content = json.loads(content)
102
+ except Exception:
103
+ raise exceptions.MetricComputationError("Failed to parse the model output.")
104
+
105
+ score = dict_content["score"]
106
+ try:
107
+ assert 1 <= score <= 3, f"Invalid score value: {score}"
108
+ except AssertionError as e:
109
+ raise exceptions.MetricComputationError(str(e))
110
+
111
+ score = (score - 1) / 2.0 # Normalize the score to be between 0 and 1
112
+
113
+ return score_result.ScoreResult(
114
+ name=self.name,
115
+ value=score,
116
+ reason=dict_content["reason"],
117
+ )
second_brain_online/application/rag/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .embeddings import EmbeddingModelType, get_embedding_model
2
+ from .retrievers import RetrieverType, get_retriever
3
+ from .splitters import get_splitter
4
+
5
+ __all__ = [
6
+ "get_retriever",
7
+ "get_splitter",
8
+ "EmbeddingModelType",
9
+ "get_embedding_model",
10
+ "RetrieverType",
11
+ ]
second_brain_online/application/rag/embeddings.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Literal, Union
2
+
3
+ from langchain_huggingface import HuggingFaceEmbeddings
4
+ from langchain_openai import OpenAIEmbeddings
5
+
6
+ EmbeddingModelType = Literal["openai", "huggingface"]
7
+ EmbeddingsModel = Union[OpenAIEmbeddings, HuggingFaceEmbeddings]
8
+
9
+
10
+ def get_embedding_model(
11
+ model_id: str,
12
+ model_type: EmbeddingModelType = "huggingface",
13
+ device: str = "cpu",
14
+ ) -> EmbeddingsModel:
15
+ """Gets an instance of the configured embedding model.
16
+
17
+ The function returns either an OpenAI or HuggingFace embedding model based on the
18
+ provided model type.
19
+
20
+ Args:
21
+ model_id (str): The ID/name of the embedding model to use
22
+ model_type (EmbeddingModelType): The type of embedding model to use.
23
+ Must be either "openai" or "huggingface". Defaults to "huggingface"
24
+ device (str): The device to use for the embedding model. Defaults to "cpu"
25
+
26
+ Returns:
27
+ EmbeddingsModel: An embedding model instance based on the configuration settings
28
+
29
+ Raises:
30
+ ValueError: If model_type is not "openai" or "huggingface"
31
+ """
32
+
33
+ if model_type == "openai":
34
+ return get_openai_embedding_model(model_id)
35
+ elif model_type == "huggingface":
36
+ return get_huggingface_embedding_model(model_id, device)
37
+ else:
38
+ raise ValueError(f"Invalid embedding model type: {model_type}")
39
+
40
+
41
+ def get_openai_embedding_model(model_id: str) -> OpenAIEmbeddings:
42
+ """Gets an OpenAI embedding model instance.
43
+
44
+ Args:
45
+ model_id (str): The ID/name of the OpenAI embedding model to use
46
+
47
+ Returns:
48
+ OpenAIEmbeddings: A configured OpenAI embeddings model instance with
49
+ special token handling enabled
50
+ """
51
+ return OpenAIEmbeddings(
52
+ model=model_id,
53
+ allowed_special={"<|endoftext|>"},
54
+ )
55
+
56
+
57
+ def get_huggingface_embedding_model(
58
+ model_id: str, device: str
59
+ ) -> HuggingFaceEmbeddings:
60
+ """Gets a HuggingFace embedding model instance.
61
+
62
+ Args:
63
+ model_id (str): The ID/name of the HuggingFace embedding model to use
64
+ device (str): The compute device to run the model on (e.g. "cpu", "cuda")
65
+
66
+ Returns:
67
+ HuggingFaceEmbeddings: A configured HuggingFace embeddings model instance
68
+ with remote code trust enabled and embedding normalization disabled
69
+ """
70
+ return HuggingFaceEmbeddings(
71
+ model_name=model_id,
72
+ model_kwargs={"device": device, "trust_remote_code": True},
73
+ encode_kwargs={"normalize_embeddings": False},
74
+ )
second_brain_online/application/rag/retrievers.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Literal, Union
2
+
3
+ from langchain_mongodb import MongoDBAtlasVectorSearch
4
+ from langchain_mongodb.retrievers import (
5
+ MongoDBAtlasHybridSearchRetriever,
6
+ MongoDBAtlasParentDocumentRetriever,
7
+ )
8
+ from loguru import logger
9
+
10
+ from second_brain_online.config import settings
11
+
12
+ from .embeddings import EmbeddingModelType, EmbeddingsModel, get_embedding_model
13
+ from .splitters import get_splitter
14
+
15
+ # Add these type definitions at the top of the file
16
+ RetrieverType = Literal["contextual", "parent"]
17
+ RetrieverModel = Union[
18
+ MongoDBAtlasHybridSearchRetriever, MongoDBAtlasParentDocumentRetriever
19
+ ]
20
+
21
+
22
+ def get_retriever(
23
+ embedding_model_id: str,
24
+ embedding_model_type: EmbeddingModelType = "huggingface",
25
+ retriever_type: RetrieverType = "contextual",
26
+ k: int = 3,
27
+ device: str = "cpu",
28
+ ) -> RetrieverModel:
29
+ logger.info(
30
+ f"Getting '{retriever_type}' retriever for '{embedding_model_type}' - '{embedding_model_id}' on '{device}' "
31
+ f"with {k} top results"
32
+ )
33
+
34
+ embedding_model = get_embedding_model(
35
+ embedding_model_id, embedding_model_type, device
36
+ )
37
+
38
+ if retriever_type == "contextual":
39
+ return get_hybrid_search_retriever(embedding_model, k)
40
+ elif retriever_type == "parent":
41
+ return get_parent_document_retriever(embedding_model, k)
42
+ else:
43
+ raise ValueError(f"Invalid retriever type: {retriever_type}")
44
+
45
+
46
+ def get_hybrid_search_retriever(
47
+ embedding_model: EmbeddingsModel, k: int
48
+ ) -> MongoDBAtlasHybridSearchRetriever:
49
+ vectorstore = MongoDBAtlasVectorSearch.from_connection_string(
50
+ connection_string=settings.MONGODB_URI,
51
+ embedding=embedding_model,
52
+ namespace=f"{settings.MONGODB_DATABASE_NAME}.{settings.MONGODB_COLLECTION_NAME}",
53
+ text_key="chunk",
54
+ embedding_key="embedding",
55
+ relevance_score_fn="dotProduct",
56
+ )
57
+
58
+ retriever = MongoDBAtlasHybridSearchRetriever(
59
+ vectorstore=vectorstore,
60
+ search_index_name="chunk_text_search",
61
+ top_k=k,
62
+ vector_penalty=50,
63
+ fulltext_penalty=50,
64
+ )
65
+
66
+ return retriever
67
+
68
+
69
+ def get_parent_document_retriever(
70
+ embedding_model: EmbeddingsModel, k: int = 3
71
+ ) -> MongoDBAtlasParentDocumentRetriever:
72
+ retriever = MongoDBAtlasParentDocumentRetriever.from_connection_string(
73
+ connection_string=settings.MONGODB_URI,
74
+ embedding_model=embedding_model,
75
+ child_splitter=get_splitter(200),
76
+ parent_splitter=get_splitter(800),
77
+ database_name=settings.MONGODB_DATABASE_NAME,
78
+ collection_name=settings.MONGODB_COLLECTION_NAME,
79
+ text_key="chunk",
80
+ search_kwargs={"k": k},
81
+ )
82
+
83
+ return retriever
second_brain_online/application/rag/splitters.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
2
+ from loguru import logger
3
+
4
+
5
+ def get_splitter(chunk_size: int) -> RecursiveCharacterTextSplitter:
6
+ """Returns a token-based text splitter with overlap.
7
+
8
+ Args:
9
+ chunk_size: Number of tokens for each text chunk.
10
+ summarization_type: Type of summarization to use ("contextual" or "simple").
11
+ **kwargs: Additional keyword arguments passed to the summarization agent.
12
+
13
+ Returns:
14
+ RecursiveCharacterTextSplitter: A configured text splitter instance with
15
+ summarization capabilities.
16
+ """
17
+
18
+ chunk_overlap = int(0.15 * chunk_size)
19
+
20
+ logger.info(
21
+ f"Getting splitter with chunk size: {chunk_size} and overlap: {chunk_overlap}"
22
+ )
23
+
24
+ return RecursiveCharacterTextSplitter.from_tiktoken_encoder(
25
+ encoding_name="cl100k_base",
26
+ chunk_size=chunk_size,
27
+ chunk_overlap=chunk_overlap,
28
+ )
second_brain_online/application/ui/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .custom_gradio_ui import CustomGradioUI
2
+
3
+ __all__ = ["CustomGradioUI"]
second_brain_online/application/ui/custom_gradio_ui.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from typing import Any, Dict, List, Tuple
4
+
5
+ import gradio as gr
6
+ from smolagents import ToolCallingAgent
7
+
8
+
9
+ class CustomGradioUI:
10
+ """Custom Gradio UI for better formatting of agent responses with source attribution."""
11
+
12
+ def __init__(self, agent: ToolCallingAgent):
13
+ self.agent = agent
14
+ self.setup_ui()
15
+
16
+ def setup_ui(self):
17
+ """Setup the Gradio interface with custom components."""
18
+ with gr.Blocks(
19
+ title="Second Brain AI Assistant",
20
+ theme=gr.themes.Soft(),
21
+ css="""
22
+ .source-card {
23
+ border: 1px solid #e0e0e0;
24
+ border-radius: 8px;
25
+ padding: 12px;
26
+ margin: 8px 0;
27
+ background-color: #f8f9fa;
28
+ }
29
+ .source-title {
30
+ font-weight: bold;
31
+ color: #2c3e50;
32
+ margin-bottom: 4px;
33
+ }
34
+ .source-date {
35
+ font-size: 0.9em;
36
+ color: #6c757d;
37
+ margin-bottom: 8px;
38
+ }
39
+ .answer-section {
40
+ background-color: #ffffff;
41
+ border: 1px solid #dee2e6;
42
+ border-radius: 8px;
43
+ padding: 16px;
44
+ margin-bottom: 16px;
45
+ }
46
+ .tool-usage {
47
+ background-color: #e3f2fd;
48
+ border-left: 4px solid #2196f3;
49
+ padding: 8px 12px;
50
+ margin: 8px 0;
51
+ border-radius: 4px;
52
+ font-size: 0.9em;
53
+ }
54
+ """
55
+ ) as self.interface:
56
+
57
+ gr.Markdown("# 🧠 Second Brain AI Assistant")
58
+ gr.Markdown("Ask questions about your documents and get AI-powered insights with source attribution.")
59
+
60
+ with gr.Row():
61
+ with gr.Column(scale=4):
62
+ self.query_input = gr.Textbox(
63
+ label="Ask a question",
64
+ placeholder="What pricing objections were raised in the meetings?",
65
+ lines=2
66
+ )
67
+ with gr.Column(scale=1):
68
+ self.submit_btn = gr.Button("Ask", variant="primary", size="lg")
69
+
70
+ with gr.Row():
71
+ with gr.Column():
72
+ self.answer_output = gr.HTML(label="Answer")
73
+ self.sources_output = gr.HTML(label="Sources")
74
+ self.tools_output = gr.HTML(label="Tools Used")
75
+
76
+ with gr.Accordion("🔍 Debug: Raw Response", open=False):
77
+ self.debug_output = gr.Textbox(
78
+ label="Raw Agent Response",
79
+ lines=10,
80
+ max_lines=20,
81
+ interactive=False
82
+ )
83
+
84
+ # Event handlers
85
+ self.submit_btn.click(
86
+ fn=self.process_query,
87
+ inputs=[self.query_input],
88
+ outputs=[self.answer_output, self.sources_output, self.tools_output, self.debug_output]
89
+ )
90
+
91
+ self.query_input.submit(
92
+ fn=self.process_query,
93
+ inputs=[self.query_input],
94
+ outputs=[self.answer_output, self.sources_output, self.tools_output, self.debug_output]
95
+ )
96
+
97
+ def process_query(self, query: str) -> Tuple[str, str, str, str]:
98
+ """Process the user query and return formatted response components."""
99
+ if not query.strip():
100
+ return "", "", "", ""
101
+
102
+ try:
103
+ # Run the agent
104
+ result = self.agent.run(query)
105
+
106
+ # Parse the result
107
+ answer, sources, tools_used = self.parse_agent_response(result)
108
+
109
+ # Debug information
110
+ print(f"DEBUG - Raw result: {str(result)[:200]}...")
111
+ print(f"DEBUG - Parsed answer: {answer[:100]}...")
112
+ print(f"DEBUG - Sources found: {len(sources)}")
113
+ print(f"DEBUG - Tools found: {tools_used}")
114
+
115
+ # Format outputs
116
+ answer_html = self.format_answer(answer)
117
+ sources_html = self.format_sources(sources)
118
+ tools_html = self.format_tools(tools_used)
119
+ debug_text = str(result)
120
+
121
+ return answer_html, sources_html, tools_html, debug_text
122
+
123
+ except Exception as e:
124
+ error_msg = f"<div style='color: #dc3545; padding: 12px; border: 1px solid #f5c6cb; border-radius: 4px; background-color: #f8d7da;'>Error: {str(e)}</div>"
125
+ return error_msg, "", "", str(e)
126
+
127
+ def parse_agent_response(self, result: Any) -> Tuple[str, List[Dict], List[str]]:
128
+ """Parse the agent response to extract answer, sources, and tools used."""
129
+ answer = ""
130
+ sources = []
131
+ tools_used = []
132
+
133
+ # Convert result to string if it's not already
134
+ result_str = str(result)
135
+
136
+ # Extract tool usage from the result first
137
+ # Pattern 1: 🛠️ Used tool toolname
138
+ tool_pattern1 = r'🛠️ Used tool (\w+)'
139
+ tool_matches1 = re.findall(tool_pattern1, result_str)
140
+
141
+ # Pattern 2: Calling tool: 'toolname'
142
+ tool_pattern2 = r"Calling tool:\s*'([^']+)'"
143
+ tool_matches2 = re.findall(tool_pattern2, result_str)
144
+
145
+ # Combine both patterns
146
+ all_tool_matches = tool_matches1 + tool_matches2
147
+ tools_used = list(set(all_tool_matches)) # Remove duplicates
148
+
149
+ # Try multiple patterns to extract the answer
150
+ # Pattern 1: JSON format with "answer" key
151
+ json_match = re.search(r'{"answer":\s*"([^"]+)"}', result_str)
152
+ if json_match:
153
+ answer = json_match.group(1)
154
+ # Unescape the JSON string
155
+ answer = answer.replace('\\n', '\n').replace('\\"', '"')
156
+ else:
157
+ # Pattern 2: Look for "Final answer:" followed by content
158
+ final_answer_match = re.search(r'Final answer:\s*(.+?)(?=\n\n|\Z)', result_str, re.DOTALL)
159
+ if final_answer_match:
160
+ answer = final_answer_match.group(1).strip()
161
+ # Try to extract JSON from final answer
162
+ json_in_final = re.search(r'{"answer":\s*"([^"]+)"}', answer)
163
+ if json_in_final:
164
+ answer = json_in_final.group(1).replace('\\n', '\n').replace('\\"', '"')
165
+ else:
166
+ # Pattern 3: Use the entire result as answer if no specific pattern matches
167
+ answer = result_str
168
+
169
+ # Extract sources from the answer text using multiple patterns
170
+ # Pattern 1: (Document: "Title", Date)
171
+ source_pattern1 = r'\(Document:\s*"([^"]+)",\s*([^)]+)\)'
172
+ source_matches1 = re.findall(source_pattern1, answer)
173
+
174
+ # Pattern 2: (Document: Title, Date) - without quotes
175
+ source_pattern2 = r'\(Document:\s*([^,]+),\s*([^)]+)\)'
176
+ source_matches2 = re.findall(source_pattern2, answer)
177
+
178
+ # Pattern 3: (Document 1, Date) - numbered format
179
+ source_pattern3 = r'\(Document\s+(\d+),\s*([^)]+)\)'
180
+ source_matches3 = re.findall(source_pattern3, answer)
181
+
182
+ # Pattern 4: (from "Title" on Date) - new format seen in output
183
+ source_pattern4 = r'\(from\s+"([^"]+)"\s+on\s+([^)]+)\)'
184
+ source_matches4 = re.findall(source_pattern4, answer)
185
+
186
+ # Pattern 5: (from "Title" on Date) - without quotes
187
+ source_pattern5 = r'\(from\s+([^"]+)\s+on\s+([^)]+)\)'
188
+ source_matches5 = re.findall(source_pattern5, answer)
189
+
190
+ # Combine all patterns
191
+ all_source_matches = source_matches1 + source_matches2 + source_matches3 + source_matches4 + source_matches5
192
+
193
+ for doc_title, doc_date in all_source_matches:
194
+ # Clean up the title and date
195
+ clean_title = doc_title.strip().strip('"')
196
+ clean_date = doc_date.strip()
197
+
198
+ # Handle numbered documents (Document 1, Document 2, etc.)
199
+ if clean_title.isdigit():
200
+ clean_title = f"Document {clean_title}"
201
+
202
+ sources.append({
203
+ "title": clean_title,
204
+ "date": clean_date
205
+ })
206
+
207
+ # Remove duplicates based on title and date
208
+ unique_sources = []
209
+ seen = set()
210
+ for source in sources:
211
+ key = (source["title"], source["date"])
212
+ if key not in seen:
213
+ seen.add(key)
214
+ unique_sources.append(source)
215
+
216
+ return answer, unique_sources, tools_used
217
+
218
+ def format_answer(self, answer: str) -> str:
219
+ """Format the answer with proper HTML structure."""
220
+ if not answer:
221
+ return "<div class='answer-section'><p>No answer provided.</p></div>"
222
+
223
+ # Remove source references from the answer text for cleaner display
224
+ answer = re.sub(r'\(Document:[^)]+\)', '', answer)
225
+
226
+ # Clean up extra whitespace
227
+ answer = re.sub(r'\s+', ' ', answer).strip()
228
+
229
+ # Format numbered lists and bullet points
230
+ answer = re.sub(r'\n\s*\d+\.\s*', '<br><br><strong>', answer) # Numbered lists
231
+ answer = re.sub(r'\n\s*•\s*', '<br>• ', answer) # Bullet points
232
+ answer = re.sub(r'\n\s*-\s*', '<br>• ', answer) # Dash points
233
+
234
+ # Format bold text (markdown style)
235
+ answer = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', answer)
236
+
237
+ # Format line breaks
238
+ answer = answer.replace('\n', '<br>')
239
+
240
+ # Clean up multiple line breaks
241
+ answer = re.sub(r'(<br>){3,}', '<br><br>', answer)
242
+
243
+ return f"""
244
+ <div class='answer-section'>
245
+ <h3>📝 Answer</h3>
246
+ <div style='line-height: 1.6; font-size: 16px;'>{answer}</div>
247
+ </div>
248
+ """
249
+
250
+ def format_sources(self, sources: List[Dict]) -> str:
251
+ """Format the sources with proper HTML structure."""
252
+ if not sources:
253
+ return "<div><h3>📚 Sources</h3><p>No sources found.</p></div>"
254
+
255
+ sources_html = "<div><h3>📚 Sources</h3>"
256
+
257
+ for i, source in enumerate(sources, 1):
258
+ sources_html += f"""
259
+ <div class='source-card'>
260
+ <div class='source-title'>{i}. {source['title']}</div>
261
+ <div class='source-date'>📅 {source['date']}</div>
262
+ </div>
263
+ """
264
+
265
+ sources_html += "</div>"
266
+ return sources_html
267
+
268
+ def format_tools(self, tools_used: List[str]) -> str:
269
+ """Format the tools used with proper HTML structure."""
270
+ if not tools_used:
271
+ return "<div><h3>🛠️ Tools Used</h3><p>No tools used.</p></div>"
272
+
273
+ tools_html = "<div><h3>🛠️ Tools Used</h3>"
274
+
275
+ for tool in tools_used:
276
+ tools_html += f"""
277
+ <div class='tool-usage'>
278
+ 🔧 {tool.replace('_', ' ').title()}
279
+ </div>
280
+ """
281
+
282
+ tools_html += "</div>"
283
+ return tools_html
284
+
285
+ def launch(self, **kwargs):
286
+ """Launch the Gradio interface."""
287
+ return self.interface.launch(**kwargs)
second_brain_online/config.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from loguru import logger
2
+ from pydantic import Field, field_validator
3
+ from pydantic_settings import BaseSettings, SettingsConfigDict
4
+
5
+
6
+ class Settings(BaseSettings):
7
+ """
8
+ A Pydantic-based settings class for managing application configurations.
9
+ """
10
+
11
+ # --- Pydantic Settings ---
12
+ model_config: SettingsConfigDict = SettingsConfigDict(
13
+ env_file=".env", env_file_encoding="utf-8"
14
+ )
15
+
16
+ # --- Comet ML & Opik Configuration ---
17
+ COMET_API_KEY: str | None = Field(
18
+ default=None, description="API key for Comet ML and Opik services."
19
+ )
20
+ COMET_PROJECT: str = Field(
21
+ default="second_brain_course",
22
+ description="Project name for Comet ML and Opik tracking.",
23
+ )
24
+
25
+ # --- Hugging Face Configuration ---
26
+ HUGGINGFACE_ACCESS_TOKEN: str | None = Field(
27
+ default=None, description="Access token for Hugging Face API authentication."
28
+ )
29
+ USE_HUGGINGFACE_DEDICATED_ENDPOINT: bool = Field(
30
+ default=False,
31
+ description="Whether to use the dedicated endpoint for summarizing responses. If True, we will use the dedicated endpoint instead of OpenAI.",
32
+ )
33
+ HUGGINGFACE_DEDICATED_ENDPOINT: str | None = Field(
34
+ default=None,
35
+ description="Dedicated endpoint URL for real-time inference. "
36
+ "If provided, we will use the dedicated endpoint instead of OpenAI. "
37
+ "For example, https://um18v2aeit3f6g1b.eu-west-1.aws.endpoints.huggingface.cloud/v1/, "
38
+ "with /v1 after the endpoint URL.",
39
+ )
40
+
41
+ # --- MongoDB Atlas Configuration ---
42
+ MONGODB_DATABASE_NAME: str = Field(
43
+ default="second_brain_course",
44
+ description="Name of the MongoDB database.",
45
+ )
46
+ MONGODB_COLLECTION_NAME: str = Field(
47
+ default="rag",
48
+ description="Name of the MongoDB collection for RAG documents.",
49
+ )
50
+ MONGODB_URI: str = Field(
51
+ default="mongodb+srv://contextdb:HOqIgSH01CoEiMb1@cluster0.d9cmff.mongodb.net/",
52
+ description="Connection URI for the MongoDB Atlas instance.",
53
+ )
54
+
55
+ # --- OpenAI API Configuration ---
56
+ OPENAI_API_KEY: str = Field(
57
+ description="API key for OpenAI service authentication.",
58
+ )
59
+ OPENAI_MODEL_ID: str = Field(
60
+ default="gpt-4o", description="Identifier for the OpenAI model to be used."
61
+ )
62
+
63
+ @field_validator("OPENAI_API_KEY")
64
+ @classmethod
65
+ def check_not_empty(cls, value: str, info) -> str:
66
+ if not value or value.strip() == "":
67
+ logger.error(f"{info.field_name} cannot be empty.")
68
+ raise ValueError(f"{info.field_name} cannot be empty.")
69
+ return value
70
+
71
+
72
+ try:
73
+ settings = Settings()
74
+ except Exception as e:
75
+ logger.error(f"Failed to load configuration: {e}")
76
+ raise SystemExit(e)
second_brain_online/opik_utils.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import opik
4
+ from loguru import logger
5
+ from opik.configurator.configure import OpikConfigurator
6
+
7
+ from second_brain_online.config import settings
8
+
9
+
10
+ def configure() -> None:
11
+ if settings.COMET_API_KEY and settings.COMET_PROJECT:
12
+ try:
13
+ client = OpikConfigurator(api_key=settings.COMET_API_KEY)
14
+ default_workspace = client._get_default_workspace()
15
+ except Exception:
16
+ logger.warning(
17
+ "Default workspace not found. Setting workspace to None and enabling interactive mode."
18
+ )
19
+ default_workspace = None
20
+
21
+ os.environ["OPIK_PROJECT_NAME"] = settings.COMET_PROJECT
22
+
23
+ opik.configure(
24
+ api_key=settings.COMET_API_KEY,
25
+ workspace=default_workspace,
26
+ use_local=False,
27
+ force=True,
28
+ )
29
+ logger.info(
30
+ f"Opik configured successfully using workspace '{default_workspace}'"
31
+ )
32
+ else:
33
+ logger.warning(
34
+ "COMET_API_KEY and COMET_PROJECT are not set. Set them to enable prompt monitoring with Opik (powered by Comet ML)."
35
+ )
36
+
37
+
38
+ def get_or_create_dataset(name: str, prompts: list[str]) -> opik.Dataset | None:
39
+ client = opik.Opik()
40
+ try:
41
+ dataset = client.get_dataset(name=name)
42
+ except Exception:
43
+ dataset = None
44
+
45
+ if dataset:
46
+ logger.warning(f"Dataset '{name}' already exists. Skipping dataset creation.")
47
+
48
+ return dataset
49
+
50
+ assert prompts, "Prompts are required to create a dataset."
51
+
52
+ dataset_items = []
53
+ for prompt in prompts:
54
+ dataset_items.append(
55
+ {
56
+ "input": prompt,
57
+ }
58
+ )
59
+
60
+ dataset = create_dataset(
61
+ name=name,
62
+ description="Dataset for evaluating the agentic app.",
63
+ items=dataset_items,
64
+ )
65
+
66
+ return dataset
67
+
68
+
69
+ def create_dataset(name: str, description: str, items: list[dict]) -> opik.Dataset:
70
+ client = opik.Opik()
71
+
72
+ dataset = client.get_or_create_dataset(name=name, description=description)
73
+ dataset.insert(items)
74
+
75
+ dataset = client.get_dataset(name=name)
76
+
77
+ return dataset