Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,641 Bytes
5f4344d a1180f7 5f4344d a1180f7 5f4344d a1180f7 5f4344d a1180f7 5f4344d a1180f7 5f4344d a1180f7 5f4344d a1180f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
from chromadb.api.types import QueryResult
from dataclasses import dataclass
@dataclass
class ValidationOutcome:
status : str
reason : str
result : QueryResult
def validate_relevance_queryresult(query: str, result: QueryResult, max_distance: float = 0.35):
"""
Checks if the QueryResult from Chroma is relevant enough using distances.
Returns the original QueryResult unchanged, along with a status and reason.
Args:
query: the user query (for logging)
result: QueryResult returned from Chroma (dict-like)
max_distance: maximum acceptable distance for relevance
Returns:
dict with:
- 'status': "ok" | "not_found" | "not_relevant"
- 'reason': string explanation
- 'result': the original QueryResult object
"""
documents = result.get("documents", [])
distances = result.get("distances", [])
if not documents:
return ValidationOutcome(**{
"status": "not_found",
"reason": "No results",
"result": result
})
# distances can be List[List[float]]; get the first distance of the first result
best_distance = distances[0][0] if distances and isinstance(distances[0], list) else (distances[0] if distances else float('inf'))
if best_distance > max_distance:
return ValidationOutcome(**{
"status": "not_relevant",
"reason": f"Best distance {best_distance:.4f} > {max_distance}",
"result": result
})
return ValidationOutcome(**{
"status": "ok",
"reason": "Relevant",
"result": result
})
|