Spaces:

chinmayjha
/

context-ai

Sleeping

App Files Files Community

context-ai / src /second_brain_online /application /evaluation /summary_density_heuristic.py

chinmayjha

Deploy complete Second Brain AI Assistant with custom UI

b27eb78 3 months ago

raw

history blame

3.02 kB

	from typing import Any

	from opik.evaluation.metrics import base_metric, score_result


	class SummaryDensityHeuristic(base_metric.BaseMetric):
	"""
	A metric that evaluates whether an LLM's output has appropriate length and density.

	This metric uses an heuristic to determine if the output length is appropriate for the given instruction.
	It returns a normalized score between 0 and 1, where:
	- 0.0 (Poor): Output is either too short and incomplete, or too long with unnecessary information
	- 0.5 (Good): Output has decent length balance but still slightly too short or too long
	- 1.0 (Excellent): Output length is appropriate, answering the question concisely without being verbose
	"""

	def __init__(
	self,
	name: str = "summary_density_heuristic",
	min_length: int = 128,
	max_length: int = 1024,
	) -> None:
	self.name = name
	self.min_length = min_length
	self.max_length = max_length

	def score(
	self, input: str, output: str, **ignored_kwargs: Any
	) -> score_result.ScoreResult:
	"""
	Score the output of an LLM.

	Args:
	input: The input prompt given to the LLM.
	output: The output of an LLM to score.
	**ignored_kwargs: Any additional keyword arguments.

	Returns:
	ScoreResult: The computed score with explanation.
	"""

	length_score = self._compute_length_score(output)

	reason = f"Output length: {len(output)} chars. "
	if length_score == 1.0:
	reason += "Length is within ideal range."
	elif length_score >= 0.5:
	reason += "Length is slightly outside ideal range."
	else:
	reason += "Length is significantly outside ideal range."

	return score_result.ScoreResult(
	name=self.name,
	value=length_score,
	reason=reason,
	)

	def _compute_length_score(self, text: str) -> float:
	"""
	Compute a score based on text length relative to min and max boundaries.

	Args:
	text: The text to evaluate.

	Returns:
	float: A score between 0 and 1, where:
	- 0.0: Text length is significantly outside the boundaries
	- 0.5: Text length is slightly outside the boundaries
	- 1.0: Text length is within the ideal range
	"""
	length = len(text)

	# If length is within bounds, return perfect score
	if self.min_length <= length <= self.max_length:
	return 1.0

	if length < self.min_length:
	deviation = (self.min_length - length) / self.min_length
	else:
	deviation = (length - self.max_length) / self.max_length

	# Convert deviation to a score between 0 and 1
	# deviation <= 0.5 -> score between 0.5 and 1.0
	# deviation > 0.5 -> score between 0.0 and 0.5
	score = max(0.0, 1.0 - deviation)

	return score