Spaces:

chinmayjha
/

context-ai

Sleeping

App Files Files Community

chinmayjha commited on 28 days ago

Commit

8c6064d

unverified ·

1 Parent(s): 4af9b90

Improve agent output formatting with inline citations and full sources

Browse files

- Extract answer_with_sources output directly from agent steps to bypass final_answer reformatting
- Add inline [Doc X] citations in the answer section
- Include full Sources section with document metadata, summaries, and key findings
- Update AgentWrapper to extract Step 2 output when answer_with_sources is found
- Increase max_steps from 2 to 4 to allow for more complex queries
- Pass AgentWrapper directly to UI instead of unwrapping to use custom run() method
- Update both tools/app.py and app.py (HF entry point) to use AgentWrapper
- Simplify UI to display raw agent output without additional parsing

Files changed (4) hide show

app.py +2 -4
src/second_brain_online/application/agents/agents.py +35 -4
src/second_brain_online/application/ui/custom_gradio_ui.py +17 -2
tools/app.py +5 -27

app.py CHANGED Viewed

@@ -54,11 +54,9 @@ def main():
         # Initialize agent
         agent = get_agent(retriever_config_path=Path(retriever_config_path))
-        # Get the actual agent from the wrapper
-        actual_agent = agent._AgentWrapper__agent
         # Launch custom UI
-        CustomGradioUI(actual_agent).launch(
             server_name="0.0.0.0",
             server_port=7860,
             share=False

         # Initialize agent
         agent = get_agent(retriever_config_path=Path(retriever_config_path))
+        # Pass the AgentWrapper directly so it uses our custom run() method with extraction logic
         # Launch custom UI
+        CustomGradioUI(agent).launch(
             server_name="0.0.0.0",
             server_port=7860,
             share=False

src/second_brain_online/application/agents/agents.py CHANGED Viewed

@@ -73,19 +73,50 @@ class AgentWrapper:
     def run(self, task: str, **kwargs) -> Any:
         result = self.__agent.run(task, return_full_result=True, **kwargs)
         # Extract the raw output from answer_with_sources (Step 2) instead of using final_answer
         if hasattr(result, 'steps') and len(result.steps) >= 2:
             # Find the step where answer_with_sources was called
-            for step in result.steps:
-                if 'tool_calls' in step and step['tool_calls']:
                     for tool_call in step['tool_calls']:
-                        if tool_call.get('function', {}).get('name') == 'answer_with_sources':
                             # Found the answer_with_sources step - return its observations
                             if 'observations' in step and step['observations']:
-                                logger.info("Returning raw answer_with_sources output (bypassing final reformatting)")
                                 return step['observations']
         # Fallback to regular result.output
         if hasattr(result, 'output'):
             return result.output

     def run(self, task: str, **kwargs) -> Any:
         result = self.__agent.run(task, return_full_result=True, **kwargs)
+        # Debug: Print step structure to understand the data
+        logger.info(f"Result type: {type(result)}")
+        if hasattr(result, 'steps'):
+            logger.info(f"Number of steps: {len(result.steps)}")
+            for i, step in enumerate(result.steps):
+                logger.info(f"Step {i}: type={type(step)}, keys={step.keys() if isinstance(step, dict) else 'not a dict'}")
+                if isinstance(step, dict) and 'tool_calls' in step:
+                    logger.info(f"  Tool calls: {step['tool_calls']}")
+                    if step['tool_calls']:
+                        for tc in step['tool_calls']:
+                            tc_type = type(tc)
+                            if isinstance(tc, dict):
+                                logger.info(f"    Tool call dict: {tc}")
+                            else:
+                                logger.info(f"    Tool call object: {tc}, type: {tc_type}")
+                                if hasattr(tc, 'function'):
+                                    logger.info(f"      Function: {tc.function}")
+                                if hasattr(tc, 'name'):
+                                    logger.info(f"      Name: {tc.name}")
         # Extract the raw output from answer_with_sources (Step 2) instead of using final_answer
         if hasattr(result, 'steps') and len(result.steps) >= 2:
             # Find the step where answer_with_sources was called
+            for step_idx, step in enumerate(result.steps):
+                if isinstance(step, dict) and 'tool_calls' in step and step['tool_calls']:
                     for tool_call in step['tool_calls']:
+                        # Handle both dict and object formats
+                        tool_name = None
+                        if isinstance(tool_call, dict):
+                            tool_name = tool_call.get('function', {}).get('name')
+                        elif hasattr(tool_call, 'function'):
+                            if hasattr(tool_call.function, 'name'):
+                                tool_name = tool_call.function.name
+                        elif hasattr(tool_call, 'name'):
+                            tool_name = tool_call.name
+                        if tool_name == 'answer_with_sources':
                             # Found the answer_with_sources step - return its observations
                             if 'observations' in step and step['observations']:
+                                logger.info(f"✅ Found answer_with_sources at step {step_idx}, returning its observations")
                                 return step['observations']
         # Fallback to regular result.output
+        logger.warning("⚠️ answer_with_sources output not found, falling back to result.output")
         if hasattr(result, 'output'):
             return result.output

src/second_brain_online/application/ui/custom_gradio_ui.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 import re
-from typing import Any, Dict, List, Tuple, Optional
 from datetime import datetime
 import gradio as gr
@@ -14,7 +14,12 @@ from second_brain_online.config import settings
 class CustomGradioUI:
     """Custom Gradio UI for better formatting of agent responses with source attribution."""
-    def __init__(self, agent: ToolCallingAgent):
         self.agent = agent
         self.mongodb_client = None
         self.database = None
@@ -170,6 +175,16 @@ class CustomGradioUI:
             # Quick post-processing steps
             progress(0.8, desc="✨ Displaying results...")
             # Convert result to string
             result_str = str(result)

 import json
 import re
+from typing import Any, Dict, List, Tuple, Optional, Union
 from datetime import datetime
 import gradio as gr
 class CustomGradioUI:
     """Custom Gradio UI for better formatting of agent responses with source attribution."""
+    def __init__(self, agent: Union[ToolCallingAgent, Any]):
+        """Initialize the UI with either a ToolCallingAgent or AgentWrapper.
+        Args:
+            agent: Either a raw ToolCallingAgent or an AgentWrapper that wraps it.
+        """
         self.agent = agent
         self.mongodb_client = None
         self.database = None
             # Quick post-processing steps
             progress(0.8, desc="✨ Displaying results...")
+            # CRITICAL DEBUG: Print what result actually is
+            print("\n" + "="*80)
+            print("DEBUG: WHAT IS RESULT?")
+            print("="*80)
+            print(f"Type: {type(result)}")
+            print(f"Is string? {isinstance(result, str)}")
+            print(f"Has 📚 Sources? {'📚 Sources' in str(result) if result else False}")
+            print(f"First 1500 chars of result:\n{str(result)[:1500]}")
+            print("="*80)
             # Convert result to string
             result_str = str(result)

tools/app.py CHANGED Viewed

@@ -35,9 +35,8 @@ def main(retriever_config_path: Path, ui: bool, query: str) -> None:
     """
     agent = get_agent(retriever_config_path=Path(retriever_config_path))
     if ui:
-        # Get the actual agent from the wrapper
-        actual_agent = agent._AgentWrapper__agent
-        CustomGradioUI(actual_agent).launch()
     else:
         assert query, "Query is required in CLI mode"
@@ -71,33 +70,12 @@ def main(retriever_config_path: Path, ui: bool, query: str) -> None:
                 print(f"State: {actual_agent.state}")
         print("="*80)
-        # Parse the result using the same logic as the UI
-        ui_instance = CustomGradioUI(None)  # We don't need the agent for parsing
-        # Get agent logs if available
-        agent_logs = []
-        if hasattr(agent, '_AgentWrapper__agent'):
-            actual_agent = agent._AgentWrapper__agent
-            if hasattr(actual_agent, 'logs'):
-                agent_logs = actual_agent.logs
-        answer, sources, tools_used = ui_instance.parse_agent_response(result, agent_logs)
-        print("\n" + "="*80)
-        print("DEBUG: PARSED RESULTS")
-        print("="*80)
-        print(f"Answer: {answer}")
-        print(f"Sources ({len(sources)}): {sources}")
-        print(f"Tools Used: {tools_used}")
-        print("="*80)
         print("\n" + "="*80)
         print("FINAL OUTPUT")
         print("="*80)
-        # Format the answer for better display
-        formatted_answer = ui_instance.format_answer(answer)
-        print(formatted_answer)
 if __name__ == "__main__":

     """
     agent = get_agent(retriever_config_path=Path(retriever_config_path))
     if ui:
+        # Pass the AgentWrapper directly so it uses our custom run() method
+        CustomGradioUI(agent).launch()
     else:
         assert query, "Query is required in CLI mode"
                 print(f"State: {actual_agent.state}")
         print("="*80)
+        # Display the raw result directly (it's already perfectly formatted)
         print("\n" + "="*80)
         print("FINAL OUTPUT")
         print("="*80)
+        print(result)
+        print("="*80)
 if __name__ == "__main__":