Spaces:
Runtime error
Runtime error
| # Diarization_Lib.py | |
| ######################################### | |
| # Diarization Library | |
| # This library is used to perform diarization of audio files. | |
| # Currently, uses FIXME for transcription. | |
| # | |
| #################### | |
| #################### | |
| # Function List | |
| # | |
| # 1. speaker_diarize(video_file_path, segments, embedding_model = "pyannote/embedding", embedding_size=512, num_speakers=0) | |
| # | |
| #################### | |
| # Import necessary libraries | |
| import configparser | |
| import json | |
| import logging | |
| import os | |
| from pathlib import Path | |
| import time | |
| # Import Local | |
| from App_Function_Libraries.Audio_Transcription_Lib import speech_to_text | |
| # | |
| # Import 3rd Party | |
| from pyannote.audio import Model | |
| from pyannote.audio.pipelines.speaker_diarization import SpeakerDiarization | |
| import torch | |
| import yaml | |
| # | |
| ####################################################################################################################### | |
| # Function Definitions | |
| # | |
| def load_pipeline_from_pretrained(path_to_config: str | Path) -> SpeakerDiarization: | |
| path_to_config = Path(path_to_config).resolve() | |
| print(f"Loading pyannote pipeline from {path_to_config}...") | |
| if not path_to_config.exists(): | |
| raise FileNotFoundError(f"Config file not found: {path_to_config}") | |
| # Load the YAML configuration | |
| with open(path_to_config, 'r') as config_file: | |
| config = yaml.safe_load(config_file) | |
| # Store current working directory | |
| cwd = Path.cwd().resolve() | |
| # Change to the directory containing the config file | |
| cd_to = path_to_config.parent.resolve() | |
| print(f"Changing working directory to {cd_to}") | |
| os.chdir(cd_to) | |
| try: | |
| # Create a SpeakerDiarization pipeline | |
| pipeline = SpeakerDiarization() | |
| # Load models explicitly from local paths | |
| embedding_path = Path(config['pipeline']['params']['embedding']).resolve() | |
| segmentation_path = Path(config['pipeline']['params']['segmentation']).resolve() | |
| if not embedding_path.exists(): | |
| raise FileNotFoundError(f"Embedding model file not found: {embedding_path}") | |
| if not segmentation_path.exists(): | |
| raise FileNotFoundError(f"Segmentation model file not found: {segmentation_path}") | |
| # Load the models from local paths using pyannote's Model class | |
| pipeline.embedding = Model.from_pretrained(str(embedding_path), map_location=torch.device('cpu')) | |
| pipeline.segmentation = Model.from_pretrained(str(segmentation_path), map_location=torch.device('cpu')) | |
| # Set other parameters | |
| pipeline.clustering = config['pipeline']['params']['clustering'] | |
| pipeline.embedding_batch_size = config['pipeline']['params']['embedding_batch_size'] | |
| pipeline.embedding_exclude_overlap = config['pipeline']['params']['embedding_exclude_overlap'] | |
| pipeline.segmentation_batch_size = config['pipeline']['params']['segmentation_batch_size'] | |
| # Set additional parameters | |
| pipeline.instantiate(config['params']) | |
| finally: | |
| # Change back to the original working directory | |
| print(f"Changing working directory back to {cwd}") | |
| os.chdir(cwd) | |
| return pipeline | |
| def audio_diarization(audio_file_path): | |
| logging.info('audio-diarization: Loading pyannote pipeline') | |
| config = configparser.ConfigParser() | |
| config.read('config.txt') | |
| processing_choice = config.get('Processing', 'processing_choice', fallback='cpu') | |
| base_dir = Path(__file__).parent.resolve() | |
| config_path = base_dir / 'models' / 'config.yaml' | |
| pipeline = load_pipeline_from_pretrained(config_path) | |
| time_start = time.time() | |
| if audio_file_path is None: | |
| raise ValueError("audio-diarization: No audio file provided") | |
| logging.info("audio-diarization: Audio file path: %s", audio_file_path) | |
| try: | |
| _, file_ending = os.path.splitext(audio_file_path) | |
| out_file = audio_file_path.replace(file_ending, ".diarization.json") | |
| prettified_out_file = audio_file_path.replace(file_ending, ".diarization_pretty.json") | |
| if os.path.exists(out_file): | |
| logging.info("audio-diarization: Diarization file already exists: %s", out_file) | |
| with open(out_file) as f: | |
| global diarization_result | |
| diarization_result = json.load(f) | |
| return diarization_result | |
| logging.info('audio-diarization: Starting diarization...') | |
| diarization_result = pipeline(audio_file_path) | |
| segments = [] | |
| for turn, _, speaker in diarization_result.itertracks(yield_label=True): | |
| chunk = { | |
| "Time_Start": turn.start, | |
| "Time_End": turn.end, | |
| "Speaker": speaker | |
| } | |
| logging.debug("Segment: %s", chunk) | |
| segments.append(chunk) | |
| logging.info("audio-diarization: Diarization completed with pyannote") | |
| output_data = {'segments': segments} | |
| logging.info("audio-diarization: Saving prettified JSON to %s", prettified_out_file) | |
| with open(prettified_out_file, 'w') as f: | |
| json.dump(output_data, f, indent=2) | |
| logging.info("audio-diarization: Saving JSON to %s", out_file) | |
| with open(out_file, 'w') as f: | |
| json.dump(output_data, f) | |
| except Exception as e: | |
| logging.error("audio-diarization: Error performing diarization: %s", str(e)) | |
| raise RuntimeError("audio-diarization: Error performing diarization") | |
| return segments | |
| def combine_transcription_and_diarization(audio_file_path): | |
| logging.info('combine-transcription-and-diarization: Starting transcription and diarization...') | |
| transcription_result = speech_to_text(audio_file_path) | |
| diarization_result = audio_diarization(audio_file_path) | |
| combined_result = [] | |
| for transcription_segment in transcription_result: | |
| for diarization_segment in diarization_result: | |
| if transcription_segment['Time_Start'] >= diarization_segment['Time_Start'] and transcription_segment[ | |
| 'Time_End'] <= diarization_segment['Time_End']: | |
| combined_segment = { | |
| "Time_Start": transcription_segment['Time_Start'], | |
| "Time_End": transcription_segment['Time_End'], | |
| "Speaker": diarization_segment['Speaker'], | |
| "Text": transcription_segment['Text'] | |
| } | |
| combined_result.append(combined_segment) | |
| break | |
| _, file_ending = os.path.splitext(audio_file_path) | |
| out_file = audio_file_path.replace(file_ending, ".combined.json") | |
| prettified_out_file = audio_file_path.replace(file_ending, ".combined_pretty.json") | |
| logging.info("combine-transcription-and-diarization: Saving prettified JSON to %s", prettified_out_file) | |
| with open(prettified_out_file, 'w') as f: | |
| json.dump(combined_result, f, indent=2) | |
| logging.info("combine-transcription-and-diarization: Saving JSON to %s", out_file) | |
| with open(out_file, 'w') as f: | |
| json.dump(combined_result, f) | |
| return combined_result | |
| # | |
| # | |
| ####################################################################################################################### |