Spaces:
Sleeping
Sleeping
| import os | |
| import numpy as np | |
| import pandas as pd | |
| import whisper | |
| # import torchaudio | |
| # import librosa | |
| import sys | |
| import os | |
| os.environ["CUDA_VISIBLE_DEVICES"] = "2" | |
| from tqdm.notebook import tqdm | |
| # from whisper.normalizers import EnglishTextNormalizer | |
| class Decoder: | |
| """ | |
| Class to perform ASR predictions | |
| """ | |
| def __init__(self, model_type, language='mr'): | |
| """Initialization of the class | |
| More details on the whisper model and its types can be found here: https://github.com/openai/whisper | |
| Convert HF model to openai-whisper: https://github.com/openai/whisper/discussions/830 | |
| Args: | |
| model_type (str): Should be one of 'tiny', 'base', 'small', 'medium', 'large', 'large-v2' | |
| """ | |
| assert model_type in ['tiny', 'base', 'small', 'medium', 'large', 'large-v2', 'large-v3'], "Wrong model type" | |
| print('Info: Loading model') | |
| self.model = whisper.load_model(model_type) | |
| self.decode_options = whisper.DecodingOptions(language=language, without_timestamps=True) | |
| self.device = "cuda" # if torch.cuda.is_available() else "cpu" | |
| print("Info: Initialization done") | |
| def decode(self, filepath): | |
| """Get the transcription(in hindi) for the audio file | |
| Args: | |
| filepath (str): Absolute path of the audio file | |
| Returns: | |
| str: transcription of the audio in hindi | |
| """ | |
| print() | |
| result = self.model.transcribe(filepath, language="mr", verbose=False, without_timestamps=True, fp16=False) | |
| return result["text"] | |
| if __name__ == "__main__": | |
| assert len(sys.argv) == 2, "Ohh no, audio file seems to be missing" | |
| audio_folder_path = sys.argv[1] | |
| # Initialize the Decoder | |
| obj = Decoder('large-v3', language='mr') | |
| # Create a DataFrame to store file names and corresponding transcripts | |
| transcripts_df = pd.DataFrame(columns=['MP3_File', 'Transcript']) | |
| count=0 | |
| # Iterate through all MP3 files in the folder | |
| for filename in os.listdir(audio_folder_path): | |
| if filename.endswith(".mp3"): | |
| mp3_file_path = os.path.join(audio_folder_path, filename) | |
| # Decode the MP3 file | |
| asr_output = obj.decode(mp3_file_path) | |
| # print(asr_output) | |
| # Append the file name and transcript to the DataFrame | |
| transcripts_df = transcripts_df.append({'MP3_File': filename, 'Transcript': asr_output}, ignore_index=True) | |
| count+=1 | |
| if count % 10 == 0: | |
| print(f'{count} files done') | |
| # Save the transcript to a text file | |
| output_dir = "./" | |
| # asr_save_path = os.path.join(output_dir, filename.replace(".mp3", ".txt")) | |
| # with open(asr_save_path, 'w') as f: | |
| # f.write(asr_output) | |
| # Save the DataFrame to a CSV file | |
| csv_save_path = os.path.join(output_dir, "transcripts_marathi.csv") | |
| transcripts_df.to_csv(csv_save_path, index=False) | |
| print("Transcription and CSV file creation completed.") | |