chinmayjha's picture
Deploy complete Second Brain AI Assistant with custom UI
b27eb78
raw
history blame
3.02 kB
from typing import Any
from opik.evaluation.metrics import base_metric, score_result
class SummaryDensityHeuristic(base_metric.BaseMetric):
"""
A metric that evaluates whether an LLM's output has appropriate length and density.
This metric uses an heuristic to determine if the output length is appropriate for the given instruction.
It returns a normalized score between 0 and 1, where:
- 0.0 (Poor): Output is either too short and incomplete, or too long with unnecessary information
- 0.5 (Good): Output has decent length balance but still slightly too short or too long
- 1.0 (Excellent): Output length is appropriate, answering the question concisely without being verbose
"""
def __init__(
self,
name: str = "summary_density_heuristic",
min_length: int = 128,
max_length: int = 1024,
) -> None:
self.name = name
self.min_length = min_length
self.max_length = max_length
def score(
self, input: str, output: str, **ignored_kwargs: Any
) -> score_result.ScoreResult:
"""
Score the output of an LLM.
Args:
input: The input prompt given to the LLM.
output: The output of an LLM to score.
**ignored_kwargs: Any additional keyword arguments.
Returns:
ScoreResult: The computed score with explanation.
"""
length_score = self._compute_length_score(output)
reason = f"Output length: {len(output)} chars. "
if length_score == 1.0:
reason += "Length is within ideal range."
elif length_score >= 0.5:
reason += "Length is slightly outside ideal range."
else:
reason += "Length is significantly outside ideal range."
return score_result.ScoreResult(
name=self.name,
value=length_score,
reason=reason,
)
def _compute_length_score(self, text: str) -> float:
"""
Compute a score based on text length relative to min and max boundaries.
Args:
text: The text to evaluate.
Returns:
float: A score between 0 and 1, where:
- 0.0: Text length is significantly outside the boundaries
- 0.5: Text length is slightly outside the boundaries
- 1.0: Text length is within the ideal range
"""
length = len(text)
# If length is within bounds, return perfect score
if self.min_length <= length <= self.max_length:
return 1.0
if length < self.min_length:
deviation = (self.min_length - length) / self.min_length
else:
deviation = (length - self.max_length) / self.max_length
# Convert deviation to a score between 0 and 1
# deviation <= 0.5 -> score between 0.5 and 1.0
# deviation > 0.5 -> score between 0.0 and 0.5
score = max(0.0, 1.0 - deviation)
return score