Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Commit 
							
							·
						
						b5b5082
	
1
								Parent(s):
							
							7890f41
								
first attempt
Browse files- app.py +37 -4
- neutts-air +1 -0
- packages.txt +1 -0
- requirements.txt +19 -0
    	
        app.py
    CHANGED
    
    | @@ -1,7 +1,40 @@ | |
|  | |
|  | |
|  | |
| 1 | 
             
            import gradio as gr
         | 
|  | |
| 2 |  | 
| 3 | 
            -
             | 
| 4 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 5 |  | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import sys
         | 
| 2 | 
            +
            sys.path.append("neutts-air")
         | 
| 3 | 
            +
            from neuttsair.neutts import NeuTTSAir
         | 
| 4 | 
             
            import gradio as gr
         | 
| 5 | 
            +
            import spaces
         | 
| 6 |  | 
| 7 | 
            +
            # load model
         | 
| 8 | 
            +
            tts = NeuTTSAir(
         | 
| 9 | 
            +
                backbone_repo=backbone,
         | 
| 10 | 
            +
                backbone_device="gpu",
         | 
| 11 | 
            +
                codec_repo="neuphonic/neucodec",
         | 
| 12 | 
            +
                codec_device="gpu"
         | 
| 13 | 
            +
            )
         | 
| 14 |  | 
| 15 | 
            +
            @spaces.GPU()
         | 
| 16 | 
            +
            def infer(ref_text, ref_audio_path, gen_text):
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                gr.Info("Starting inference request!")
         | 
| 19 | 
            +
                gr.Info("Encoding reference...")
         | 
| 20 | 
            +
                ref_codes = tts.encode_reference(ref_audio_path)
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                gr.Info(f"Generating audio for input text: {input_text}")
         | 
| 23 | 
            +
                wav = tts.infer(input_text, ref_codes, ref_text)
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                return (24_000, wav)
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            demo = gr.Interface(
         | 
| 28 | 
            +
                fn=infer,
         | 
| 29 | 
            +
                inputs=[
         | 
| 30 | 
            +
                    gr.Textbox(label="Reference Text"),
         | 
| 31 | 
            +
                    gr.Audio(source="upload", type="filepath", label="Reference Audio"),
         | 
| 32 | 
            +
                    gr.Textbox(label="Text to Generate"),
         | 
| 33 | 
            +
                ],
         | 
| 34 | 
            +
                outputs=gr.Audio(type="numpy", label="Generated Speech"),
         | 
| 35 | 
            +
                title="NeuTTS-Air",
         | 
| 36 | 
            +
                description="Upload a reference audio sample, provide the reference text, and enter new text to synthesize."
         | 
| 37 | 
            +
            )
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            if __name__ == "__main__":
         | 
| 40 | 
            +
                demo.launch()
         | 
    	
        neutts-air
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            Subproject commit ededc7d354b05cb6d245c2a8563e04c5f8ac12a2
         | 
    	
        packages.txt
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            espeak
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,19 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            datasets==4.0.0
         | 
| 2 | 
            +
            librosa==0.11.0
         | 
| 3 | 
            +
            neucodec>=0.0.3
         | 
| 4 | 
            +
            numpy==2.2.6
         | 
| 5 | 
            +
            pandas==2.3.2
         | 
| 6 | 
            +
            phonemizer==3.3.0
         | 
| 7 | 
            +
            requests==2.32.5
         | 
| 8 | 
            +
            scipy>=1.15
         | 
| 9 | 
            +
            soundfile==0.13.1
         | 
| 10 | 
            +
            torch==2.8.0
         | 
| 11 | 
            +
            torchao==0.13.0
         | 
| 12 | 
            +
            torchaudio==2.8.0
         | 
| 13 | 
            +
            torchtune==0.6.1
         | 
| 14 | 
            +
            tqdm==4.67.1
         | 
| 15 | 
            +
            transformers==4.56.1
         | 
| 16 | 
            +
            vector-quantize-pytorch==1.17.8
         | 
| 17 | 
            +
            resemble-perth==1.0.1
         | 
| 18 | 
            +
            accelerate==1.10.1
         | 
| 19 | 
            +
            gradio
         | 
 
			
