Spaces:
Runtime error
Runtime error
| from typing import Union | |
| import pysrt | |
| from mllm_tools.litellm import LiteLLMWrapper | |
| from mllm_tools.gemini import GeminiWrapper | |
| from mllm_tools.utils import _prepare_text_inputs | |
| from eval_suite.prompts_raw import _fix_transcript, _text_eval_new | |
| from eval_suite.utils import extract_json, convert_score_fields | |
| def parse_srt_to_text(srt_path) -> str: | |
| """ | |
| Parse an SRT subtitle file into plain text. | |
| Args: | |
| srt_path: Path to the SRT subtitle file. | |
| Returns: | |
| str: The subtitle text with duplicates removed and ellipses replaced. | |
| """ | |
| subs = pysrt.open(srt_path) | |
| full_text = [] | |
| for sub in subs: | |
| sub.text = sub.text.replace("...", ".") | |
| for line in sub.text.splitlines(): | |
| # .srt can contain repeated lines | |
| if full_text and full_text[-1] == line: | |
| continue | |
| full_text.append(line) | |
| return "\n".join(full_text) | |
| def fix_transcript(text_eval_model: Union[LiteLLMWrapper, GeminiWrapper], transcript: str) -> str: | |
| """ | |
| Fix and clean up a transcript using an LLM model. | |
| Args: | |
| text_eval_model: The LLM model wrapper to use for fixing the transcript. | |
| transcript: The input transcript text to fix. | |
| Returns: | |
| str: The fixed and cleaned transcript text. | |
| """ | |
| print("Fixing transcript...") | |
| prompt = _fix_transcript.format(transcript=transcript) | |
| response = text_eval_model(_prepare_text_inputs(prompt)) | |
| fixed_script = response.split("<SCRIPT>", maxsplit=1)[1].split("</SCRIPT>")[0] | |
| return fixed_script | |
| def evaluate_text(text_eval_model: LiteLLMWrapper, transcript: str, retry_limit: int) -> dict: | |
| """ | |
| Evaluate transcript text using an LLM model with retry logic. | |
| Args: | |
| text_eval_model: The LLM model wrapper to use for evaluation. | |
| transcript: The transcript text to evaluate. | |
| retry_limit: Maximum number of retry attempts on failure. | |
| Returns: | |
| dict: The evaluation results as a JSON object. | |
| Raises: | |
| ValueError: If all retry attempts fail. | |
| """ | |
| # prompt = _text_eval.format(transcript=transcript) | |
| prompt = _text_eval_new.format(transcript=transcript) | |
| for attempt in range(retry_limit): | |
| try: | |
| evaluation = text_eval_model(_prepare_text_inputs(prompt)) | |
| evaluation_json = extract_json(evaluation) | |
| evaluation_json = convert_score_fields(evaluation_json) | |
| return evaluation_json | |
| except Exception as e: | |
| print(f"Attempt {attempt + 1} failed: {e.__class__.__name__}: {e}") | |
| if attempt + 1 == retry_limit: | |
| raise ValueError("Reached maximum retry limit. Evaluation failed.") from None | |