from chromadb.api.types import QueryResult from dataclasses import dataclass @dataclass class ValidationOutcome: status : str reason : str result : QueryResult def validate_relevance_queryresult(query: str, result: QueryResult, max_distance: float = 0.35): """ Checks if the QueryResult from Chroma is relevant enough using distances. Returns the original QueryResult unchanged, along with a status and reason. Args: query: the user query (for logging) result: QueryResult returned from Chroma (dict-like) max_distance: maximum acceptable distance for relevance Returns: dict with: - 'status': "ok" | "not_found" | "not_relevant" - 'reason': string explanation - 'result': the original QueryResult object """ documents = result.get("documents", []) distances = result.get("distances", []) if not documents: return ValidationOutcome(**{ "status": "not_found", "reason": "No results", "result": result }) # distances can be List[List[float]]; get the first distance of the first result best_distance = distances[0][0] if distances and isinstance(distances[0], list) else (distances[0] if distances else float('inf')) if best_distance > max_distance: return ValidationOutcome(**{ "status": "not_relevant", "reason": f"Best distance {best_distance:.4f} > {max_distance}", "result": result }) return ValidationOutcome(**{ "status": "ok", "reason": "Relevant", "result": result })