Spaces:
Runtime error
Runtime error
| import time | |
| import torch | |
| import torchaudio | |
| import noisereduce as nr | |
| import numpy as np | |
| from models.nllb import nllb_translate | |
| def translate(model_nllb, tokenizer_nllb, text, target_lang): | |
| print("Processing translation...") | |
| start_time = time.time() | |
| translation = nllb_translate(model_nllb, tokenizer_nllb, text, target_lang) | |
| print("Translation:", translation) | |
| print("Translation time:", time.time() - start_time) | |
| return translation | |
| def just_inference(model, original_path, output_dir, text, lang): | |
| print("Inference...") | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model.to(device) | |
| path_to_save = output_dir | |
| t0 = time.time() | |
| try: | |
| # Load the audio | |
| print("Loading audio...") | |
| wav, sr = torchaudio.load(original_path) | |
| print(f"Loaded audio with sample rate: {sr}") | |
| wav = wav.squeeze().numpy() | |
| print(f"Audio shape after squeezing: {wav.shape}") | |
| # Apply noise reduction | |
| print("Applying noise reduction...") | |
| reduced_noise_audio = nr.reduce_noise(y=wav, sr=sr) | |
| reduced_noise_audio = torch.tensor(reduced_noise_audio).unsqueeze(0) | |
| print(f"Reduced noise audio shape: {reduced_noise_audio.shape}") | |
| # Move the reduced noise audio to the correct device | |
| reduced_noise_audio = reduced_noise_audio.to(device) | |
| print("Getting conditioning latents...") | |
| gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[original_path]) | |
| print("Got conditioning latents.") | |
| print("Starting inference stream...") | |
| chunks = model.inference_stream( | |
| text, | |
| lang, | |
| gpt_cond_latent, | |
| speaker_embedding, | |
| stream_chunk_size=15, | |
| speed=0.95 | |
| ) | |
| print("Inference stream started.") | |
| full_audio = torch.Tensor().to(device) | |
| for i, chunk in enumerate(chunks): | |
| try: | |
| if i == 1: | |
| time_to_first_chunk = time.time() - t0 | |
| print(f"Time to first chunk: {time_to_first_chunk}") | |
| full_audio = torch.cat((full_audio, chunk.squeeze().to(device)), dim=-1) | |
| print(f"Processed chunk {i}, chunk shape: {chunk.shape}") | |
| except Exception as e: | |
| print(f"Error processing chunk {i}: {e}") | |
| raise | |
| # Move full_audio to CPU before saving | |
| full_audio = full_audio.cpu() | |
| print(f"Saving full audio to {path_to_save}...") | |
| torchaudio.save(path_to_save, full_audio.unsqueeze(0), 24000) | |
| print("Audio saved.") | |
| print("Inference finished") | |
| return full_audio | |
| except Exception as e: | |
| print(f"Error during processing: {e}") | |
| raise | |