sanatan_ai / modules /db /relevance.py
vikramvasudevan's picture
Upload folder using huggingface_hub
a1180f7 verified
from chromadb.api.types import QueryResult
from dataclasses import dataclass
@dataclass
class ValidationOutcome:
status : str
reason : str
result : QueryResult
def validate_relevance_queryresult(query: str, result: QueryResult, max_distance: float = 0.35):
"""
Checks if the QueryResult from Chroma is relevant enough using distances.
Returns the original QueryResult unchanged, along with a status and reason.
Args:
query: the user query (for logging)
result: QueryResult returned from Chroma (dict-like)
max_distance: maximum acceptable distance for relevance
Returns:
dict with:
- 'status': "ok" | "not_found" | "not_relevant"
- 'reason': string explanation
- 'result': the original QueryResult object
"""
documents = result.get("documents", [])
distances = result.get("distances", [])
if not documents:
return ValidationOutcome(**{
"status": "not_found",
"reason": "No results",
"result": result
})
# distances can be List[List[float]]; get the first distance of the first result
best_distance = distances[0][0] if distances and isinstance(distances[0], list) else (distances[0] if distances else float('inf'))
if best_distance > max_distance:
return ValidationOutcome(**{
"status": "not_relevant",
"reason": f"Best distance {best_distance:.4f} > {max_distance}",
"result": result
})
return ValidationOutcome(**{
"status": "ok",
"reason": "Relevant",
"result": result
})