Spaces:

KeenWoo
/

AD_Multimodal_Chatbot

Running

App Files Files Community

KeenWoo commited on Sep 18

Commit

98117bf

verified ·

1 Parent(s): bf4c0b9

Update evaluate.py

Browse files

Files changed (1) hide show

evaluate.py +44 -3

evaluate.py CHANGED Viewed

@@ -8,9 +8,11 @@ import pandas as pd
 from typing import List, Dict, Any
 from pathlib import Path
-# --- Imports from the main application ---
-# In evaluate.py
 try:
     from alz_companion.agent import (
         make_rag_chain, route_query_type, detect_tags_from_query,
@@ -379,6 +381,45 @@ def run_comprehensive_evaluation(
             role=current_test_role,
             for_evaluation=True
         )
         t0 = time.time()
         response = answer_query(rag_chain, query, query_type=actual_route, chat_history=api_chat_history, **final_tags)
@@ -483,7 +524,7 @@ def run_comprehensive_evaluation(
             "faithfulness": faithfulness, "hallucination_rate": hallucination_rate,
             "answer_correctness": answer_correctness_score,
             "category": category, "error_class": error_class,
-            "recall_at_5": recall_at_5  # <-- ADD THIS LINE
             "latency_ms": latency_ms
         })

 from typing import List, Dict, Any
 from pathlib import Path
+# --- ADD THIS FLAG ---
+NLU_ONLY_TEST = True
+# ---------------------
+# --- Imports from the main application ---
 try:
     from alz_companion.agent import (
         make_rag_chain, route_query_type, detect_tags_from_query,
             role=current_test_role,
             for_evaluation=True
         )
+        # --- START MODIFICATION ---
+        if NLU_ONLY_TEST:
+            # 1. Run only the NLU parts
+            actual_route = route_query_type(user_query)
+            actual_tags = detect_tags_from_query(user_query, actual_route)
+        # 2. Add the NLU results to your list
+        results.append({
+                "test_id": test_id,
+                "title": title,
+                "user_query": user_query,
+                "actual_route": actual_route,
+                "expected_route": expected_route,
+                "route_correct": 1 if actual_route == expected_route else 0,
+                "actual_tags": actual_tags,
+                "expected_tags": expected_tags,
+                # Set RAG metrics to default/None values
+                "raw_sources": [],
+                "expected_sources": expected_sources,
+                "answer": "(NLU_ONLY_TEST)",
+                "context_precision": None,
+                "context_recall": None,
+                "recall_at_5": None,
+                "answer_correctness": None,
+                "faithfulness_score": None,
+                "latency_ms": 0
+            })
+        # 3. Use 'continue' to skip the rest of the loop and go to the next test case
+        continue
+    # --- END MODIFICATION ---
+    # ####################################################################
+    # ALL OF YOUR ORIGINAL RAG PIPELINE CODE STAYS HERE.
+    # IT IS NOT INDENTED AND ONLY RUNS IF NLU_ONLY_TEST IS FALSE.
+    # ####################################################################
         t0 = time.time()
         response = answer_query(rag_chain, query, query_type=actual_route, chat_history=api_chat_history, **final_tags)
             "faithfulness": faithfulness, "hallucination_rate": hallucination_rate,
             "answer_correctness": answer_correctness_score,
             "category": category, "error_class": error_class,
+            "recall_at_5": recall_at_5,  # <-- ADD THIS LINE
             "latency_ms": latency_ms
         })