File size: 1,641 Bytes
5f4344d
a1180f7
 
 
 
 
 
 
5f4344d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a1180f7
5f4344d
 
 
a1180f7
5f4344d
 
 
 
 
a1180f7
5f4344d
 
 
a1180f7
5f4344d
a1180f7
5f4344d
 
 
a1180f7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from chromadb.api.types import QueryResult
from dataclasses import dataclass

@dataclass
class ValidationOutcome:
    status : str
    reason : str
    result : QueryResult

def validate_relevance_queryresult(query: str, result: QueryResult, max_distance: float = 0.35):
    """
    Checks if the QueryResult from Chroma is relevant enough using distances.
    Returns the original QueryResult unchanged, along with a status and reason.

    Args:
        query: the user query (for logging)
        result: QueryResult returned from Chroma (dict-like)
        max_distance: maximum acceptable distance for relevance

    Returns:
        dict with:
          - 'status': "ok" | "not_found" | "not_relevant"
          - 'reason': string explanation
          - 'result': the original QueryResult object
    """
    documents = result.get("documents", [])
    distances = result.get("distances", [])

    if not documents:
        return ValidationOutcome(**{
            "status": "not_found",
            "reason": "No results",
            "result": result
        })

    # distances can be List[List[float]]; get the first distance of the first result
    best_distance = distances[0][0] if distances and isinstance(distances[0], list) else (distances[0] if distances else float('inf'))

    if best_distance > max_distance:
        return ValidationOutcome(**{
            "status": "not_relevant",
            "reason": f"Best distance {best_distance:.4f} > {max_distance}",
            "result": result
        })

    return ValidationOutcome(**{
        "status": "ok",
        "reason": "Relevant",
        "result": result
    })