KeenWoo commited on
Commit
98117bf
·
verified ·
1 Parent(s): bf4c0b9

Update evaluate.py

Browse files
Files changed (1) hide show
  1. evaluate.py +44 -3
evaluate.py CHANGED
@@ -8,9 +8,11 @@ import pandas as pd
8
  from typing import List, Dict, Any
9
  from pathlib import Path
10
 
11
- # --- Imports from the main application ---
12
- # In evaluate.py
 
13
 
 
14
  try:
15
  from alz_companion.agent import (
16
  make_rag_chain, route_query_type, detect_tags_from_query,
@@ -379,6 +381,45 @@ def run_comprehensive_evaluation(
379
  role=current_test_role,
380
  for_evaluation=True
381
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
 
383
  t0 = time.time()
384
  response = answer_query(rag_chain, query, query_type=actual_route, chat_history=api_chat_history, **final_tags)
@@ -483,7 +524,7 @@ def run_comprehensive_evaluation(
483
  "faithfulness": faithfulness, "hallucination_rate": hallucination_rate,
484
  "answer_correctness": answer_correctness_score,
485
  "category": category, "error_class": error_class,
486
- "recall_at_5": recall_at_5 # <-- ADD THIS LINE
487
  "latency_ms": latency_ms
488
  })
489
 
 
8
  from typing import List, Dict, Any
9
  from pathlib import Path
10
 
11
+ # --- ADD THIS FLAG ---
12
+ NLU_ONLY_TEST = True
13
+ # ---------------------
14
 
15
+ # --- Imports from the main application ---
16
  try:
17
  from alz_companion.agent import (
18
  make_rag_chain, route_query_type, detect_tags_from_query,
 
381
  role=current_test_role,
382
  for_evaluation=True
383
  )
384
+
385
+
386
+ # --- START MODIFICATION ---
387
+ if NLU_ONLY_TEST:
388
+ # 1. Run only the NLU parts
389
+ actual_route = route_query_type(user_query)
390
+ actual_tags = detect_tags_from_query(user_query, actual_route)
391
+
392
+ # 2. Add the NLU results to your list
393
+ results.append({
394
+ "test_id": test_id,
395
+ "title": title,
396
+ "user_query": user_query,
397
+ "actual_route": actual_route,
398
+ "expected_route": expected_route,
399
+ "route_correct": 1 if actual_route == expected_route else 0,
400
+ "actual_tags": actual_tags,
401
+ "expected_tags": expected_tags,
402
+ # Set RAG metrics to default/None values
403
+ "raw_sources": [],
404
+ "expected_sources": expected_sources,
405
+ "answer": "(NLU_ONLY_TEST)",
406
+ "context_precision": None,
407
+ "context_recall": None,
408
+ "recall_at_5": None,
409
+ "answer_correctness": None,
410
+ "faithfulness_score": None,
411
+ "latency_ms": 0
412
+ })
413
+
414
+ # 3. Use 'continue' to skip the rest of the loop and go to the next test case
415
+ continue
416
+ # --- END MODIFICATION ---
417
+
418
+ # ####################################################################
419
+ # ALL OF YOUR ORIGINAL RAG PIPELINE CODE STAYS HERE.
420
+ # IT IS NOT INDENTED AND ONLY RUNS IF NLU_ONLY_TEST IS FALSE.
421
+ # ####################################################################
422
+
423
 
424
  t0 = time.time()
425
  response = answer_query(rag_chain, query, query_type=actual_route, chat_history=api_chat_history, **final_tags)
 
524
  "faithfulness": faithfulness, "hallucination_rate": hallucination_rate,
525
  "answer_correctness": answer_correctness_score,
526
  "category": category, "error_class": error_class,
527
+ "recall_at_5": recall_at_5, # <-- ADD THIS LINE
528
  "latency_ms": latency_ms
529
  })
530