Upload 2 files
Browse files- config.json +1 -0
- handler.py +43 -0
    	
        config.json
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            {}
         | 
    	
        handler.py
    ADDED
    
    | @@ -0,0 +1,43 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from typing import Dict
         | 
| 2 | 
            +
            from pathlib import Path
         | 
| 3 | 
            +
            import tempfile
         | 
| 4 | 
            +
            import torch
         | 
| 5 | 
            +
            import torchaudio
         | 
| 6 | 
            +
            import librosa
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            SAMPLE_RATE = 16000
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            class EndpointHandler():
         | 
| 11 | 
            +
                def __init__(self, path=""):
         | 
| 12 | 
            +
                    # Load the MARS5 model
         | 
| 13 | 
            +
                    self.mars5, self.config_class = torch.hub.load('Camb-ai/mars5-tts', 'mars5_english', trust_repo=True)
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
         | 
| 16 | 
            +
                    """
         | 
| 17 | 
            +
                    Args:
         | 
| 18 | 
            +
                        data (Dict[str, bytes]):
         | 
| 19 | 
            +
                            Includes the text, audio file path, and transcript.
         | 
| 20 | 
            +
                    Returns:
         | 
| 21 | 
            +
                        Dict[str, str]: Path to the synthesized audio file.
         | 
| 22 | 
            +
                    """
         | 
| 23 | 
            +
                    # Process input
         | 
| 24 | 
            +
                    text = data["text"]
         | 
| 25 | 
            +
                    audio_file = data["audio_file"]
         | 
| 26 | 
            +
                    transcript = data["transcript"]
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                    # Load the reference audio
         | 
| 29 | 
            +
                    wav, sr = librosa.load(audio_file, sr=self.mars5.sr, mono=True)
         | 
| 30 | 
            +
                    wav = torch.from_numpy(wav)
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                    # Define the configuration for the TTS model
         | 
| 33 | 
            +
                    deep_clone = True
         | 
| 34 | 
            +
                    cfg = self.config_class(deep_clone=deep_clone, rep_penalty_window=100, top_k=100, temperature=0.7, freq_penalty=3)
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                    # Generate the synthesized audio
         | 
| 37 | 
            +
                    ar_codes, wav_out = self.mars5.tts(text, wav, transcript, cfg=cfg)
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                    # Save the synthesized audio to a temporary file
         | 
| 40 | 
            +
                    output_path = Path(tempfile.mktemp(suffix=".wav"))
         | 
| 41 | 
            +
                    torchaudio.save(output_path, wav_out.unsqueeze(0), self.mars5.sr)
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                    return {"synthesized_audio": str(output_path)}
         | 
