Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	
		aigmixer
		
	commited on
		
		
					Commit 
							
							·
						
						479b96c
	
1
								Parent(s):
							
							f0b147b
								
upload piper scripts
Browse files- piper/__init__.py +5 -0
 - piper/__main__.py +159 -0
 - piper/config.py +53 -0
 - piper/const.py +5 -0
 - piper/download.py +139 -0
 - piper/file_hash.py +46 -0
 - piper/util.py +12 -0
 - piper/voice.py +177 -0
 - piper/voices.json +0 -0
 
    	
        piper/__init__.py
    ADDED
    
    | 
         @@ -0,0 +1,5 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            from .voice import PiperVoice
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            __all__ = [
         
     | 
| 4 | 
         
            +
                "PiperVoice",
         
     | 
| 5 | 
         
            +
            ]
         
     | 
    	
        piper/__main__.py
    ADDED
    
    | 
         @@ -0,0 +1,159 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import argparse
         
     | 
| 2 | 
         
            +
            import logging
         
     | 
| 3 | 
         
            +
            import sys
         
     | 
| 4 | 
         
            +
            import time
         
     | 
| 5 | 
         
            +
            import wave
         
     | 
| 6 | 
         
            +
            from pathlib import Path
         
     | 
| 7 | 
         
            +
            from typing import Any, Dict
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            from . import PiperVoice
         
     | 
| 10 | 
         
            +
            from .download import ensure_voice_exists, find_voice, get_voices
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
            _FILE = Path(__file__)
         
     | 
| 13 | 
         
            +
            _DIR = _FILE.parent
         
     | 
| 14 | 
         
            +
            _LOGGER = logging.getLogger(_FILE.stem)
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
            def main() -> None:
         
     | 
| 18 | 
         
            +
                parser = argparse.ArgumentParser()
         
     | 
| 19 | 
         
            +
                parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
         
     | 
| 20 | 
         
            +
                parser.add_argument("-c", "--config", help="Path to model config file")
         
     | 
| 21 | 
         
            +
                parser.add_argument(
         
     | 
| 22 | 
         
            +
                    "-f",
         
     | 
| 23 | 
         
            +
                    "--output-file",
         
     | 
| 24 | 
         
            +
                    "--output_file",
         
     | 
| 25 | 
         
            +
                    help="Path to output WAV file (default: stdout)",
         
     | 
| 26 | 
         
            +
                )
         
     | 
| 27 | 
         
            +
                parser.add_argument(
         
     | 
| 28 | 
         
            +
                    "-d",
         
     | 
| 29 | 
         
            +
                    "--output-dir",
         
     | 
| 30 | 
         
            +
                    "--output_dir",
         
     | 
| 31 | 
         
            +
                    help="Path to output directory (default: cwd)",
         
     | 
| 32 | 
         
            +
                )
         
     | 
| 33 | 
         
            +
                parser.add_argument(
         
     | 
| 34 | 
         
            +
                    "--output-raw",
         
     | 
| 35 | 
         
            +
                    "--output_raw",
         
     | 
| 36 | 
         
            +
                    action="store_true",
         
     | 
| 37 | 
         
            +
                    help="Stream raw audio to stdout",
         
     | 
| 38 | 
         
            +
                )
         
     | 
| 39 | 
         
            +
                #
         
     | 
| 40 | 
         
            +
                parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)")
         
     | 
| 41 | 
         
            +
                parser.add_argument(
         
     | 
| 42 | 
         
            +
                    "--length-scale", "--length_scale", type=float, help="Phoneme length"
         
     | 
| 43 | 
         
            +
                )
         
     | 
| 44 | 
         
            +
                parser.add_argument(
         
     | 
| 45 | 
         
            +
                    "--noise-scale", "--noise_scale", type=float, help="Generator noise"
         
     | 
| 46 | 
         
            +
                )
         
     | 
| 47 | 
         
            +
                parser.add_argument(
         
     | 
| 48 | 
         
            +
                    "--noise-w", "--noise_w", type=float, help="Phoneme width noise"
         
     | 
| 49 | 
         
            +
                )
         
     | 
| 50 | 
         
            +
                #
         
     | 
| 51 | 
         
            +
                parser.add_argument("--cuda", action="store_true", help="Use GPU")
         
     | 
| 52 | 
         
            +
                #
         
     | 
| 53 | 
         
            +
                parser.add_argument(
         
     | 
| 54 | 
         
            +
                    "--sentence-silence",
         
     | 
| 55 | 
         
            +
                    "--sentence_silence",
         
     | 
| 56 | 
         
            +
                    type=float,
         
     | 
| 57 | 
         
            +
                    default=0.0,
         
     | 
| 58 | 
         
            +
                    help="Seconds of silence after each sentence",
         
     | 
| 59 | 
         
            +
                )
         
     | 
| 60 | 
         
            +
                #
         
     | 
| 61 | 
         
            +
                parser.add_argument(
         
     | 
| 62 | 
         
            +
                    "--data-dir",
         
     | 
| 63 | 
         
            +
                    "--data_dir",
         
     | 
| 64 | 
         
            +
                    action="append",
         
     | 
| 65 | 
         
            +
                    default=[str(Path.cwd())],
         
     | 
| 66 | 
         
            +
                    help="Data directory to check for downloaded models (default: current directory)",
         
     | 
| 67 | 
         
            +
                )
         
     | 
| 68 | 
         
            +
                parser.add_argument(
         
     | 
| 69 | 
         
            +
                    "--download-dir",
         
     | 
| 70 | 
         
            +
                    "--download_dir",
         
     | 
| 71 | 
         
            +
                    help="Directory to download voices into (default: first data dir)",
         
     | 
| 72 | 
         
            +
                )
         
     | 
| 73 | 
         
            +
                #
         
     | 
| 74 | 
         
            +
                parser.add_argument(
         
     | 
| 75 | 
         
            +
                    "--update-voices",
         
     | 
| 76 | 
         
            +
                    action="store_true",
         
     | 
| 77 | 
         
            +
                    help="Download latest voices.json during startup",
         
     | 
| 78 | 
         
            +
                )
         
     | 
| 79 | 
         
            +
                #
         
     | 
| 80 | 
         
            +
                parser.add_argument(
         
     | 
| 81 | 
         
            +
                    "--debug", action="store_true", help="Print DEBUG messages to console"
         
     | 
| 82 | 
         
            +
                )
         
     | 
| 83 | 
         
            +
                args = parser.parse_args()
         
     | 
| 84 | 
         
            +
                logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
         
     | 
| 85 | 
         
            +
                _LOGGER.debug(args)
         
     | 
| 86 | 
         
            +
             
     | 
| 87 | 
         
            +
                if not args.download_dir:
         
     | 
| 88 | 
         
            +
                    # Download to first data directory by default
         
     | 
| 89 | 
         
            +
                    args.download_dir = args.data_dir[0]
         
     | 
| 90 | 
         
            +
             
     | 
| 91 | 
         
            +
                # Download voice if file doesn't exist
         
     | 
| 92 | 
         
            +
                model_path = Path(args.model)
         
     | 
| 93 | 
         
            +
                if not model_path.exists():
         
     | 
| 94 | 
         
            +
                    # Load voice info
         
     | 
| 95 | 
         
            +
                    voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
         
     | 
| 96 | 
         
            +
             
     | 
| 97 | 
         
            +
                    # Resolve aliases for backwards compatibility with old voice names
         
     | 
| 98 | 
         
            +
                    aliases_info: Dict[str, Any] = {}
         
     | 
| 99 | 
         
            +
                    for voice_info in voices_info.values():
         
     | 
| 100 | 
         
            +
                        for voice_alias in voice_info.get("aliases", []):
         
     | 
| 101 | 
         
            +
                            aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
         
     | 
| 102 | 
         
            +
             
     | 
| 103 | 
         
            +
                    voices_info.update(aliases_info)
         
     | 
| 104 | 
         
            +
                    ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
         
     | 
| 105 | 
         
            +
                    args.model, args.config = find_voice(args.model, args.data_dir)
         
     | 
| 106 | 
         
            +
             
     | 
| 107 | 
         
            +
                # Load voice
         
     | 
| 108 | 
         
            +
                voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
         
     | 
| 109 | 
         
            +
                synthesize_args = {
         
     | 
| 110 | 
         
            +
                    "speaker_id": args.speaker,
         
     | 
| 111 | 
         
            +
                    "length_scale": args.length_scale,
         
     | 
| 112 | 
         
            +
                    "noise_scale": args.noise_scale,
         
     | 
| 113 | 
         
            +
                    "noise_w": args.noise_w,
         
     | 
| 114 | 
         
            +
                    "sentence_silence": args.sentence_silence,
         
     | 
| 115 | 
         
            +
                }
         
     | 
| 116 | 
         
            +
             
     | 
| 117 | 
         
            +
                if args.output_raw:
         
     | 
| 118 | 
         
            +
                    # Read line-by-line
         
     | 
| 119 | 
         
            +
                    for line in sys.stdin:
         
     | 
| 120 | 
         
            +
                        line = line.strip()
         
     | 
| 121 | 
         
            +
                        if not line:
         
     | 
| 122 | 
         
            +
                            continue
         
     | 
| 123 | 
         
            +
             
     | 
| 124 | 
         
            +
                        # Write raw audio to stdout as its produced
         
     | 
| 125 | 
         
            +
                        audio_stream = voice.synthesize_stream_raw(line, **synthesize_args)
         
     | 
| 126 | 
         
            +
                        for audio_bytes in audio_stream:
         
     | 
| 127 | 
         
            +
                            sys.stdout.buffer.write(audio_bytes)
         
     | 
| 128 | 
         
            +
                            sys.stdout.buffer.flush()
         
     | 
| 129 | 
         
            +
                elif args.output_dir:
         
     | 
| 130 | 
         
            +
                    output_dir = Path(args.output_dir)
         
     | 
| 131 | 
         
            +
                    output_dir.mkdir(parents=True, exist_ok=True)
         
     | 
| 132 | 
         
            +
             
     | 
| 133 | 
         
            +
                    # Read line-by-line
         
     | 
| 134 | 
         
            +
                    for line in sys.stdin:
         
     | 
| 135 | 
         
            +
                        line = line.strip()
         
     | 
| 136 | 
         
            +
                        if not line:
         
     | 
| 137 | 
         
            +
                            continue
         
     | 
| 138 | 
         
            +
             
     | 
| 139 | 
         
            +
                        wav_path = output_dir / f"{time.monotonic_ns()}.wav"
         
     | 
| 140 | 
         
            +
                        with wave.open(str(wav_path), "wb") as wav_file:
         
     | 
| 141 | 
         
            +
                            voice.synthesize(line, wav_file, **synthesize_args)
         
     | 
| 142 | 
         
            +
             
     | 
| 143 | 
         
            +
                        _LOGGER.info("Wrote %s", wav_path)
         
     | 
| 144 | 
         
            +
                else:
         
     | 
| 145 | 
         
            +
                    # Read entire input
         
     | 
| 146 | 
         
            +
                    text = sys.stdin.read()
         
     | 
| 147 | 
         
            +
             
     | 
| 148 | 
         
            +
                    if (not args.output_file) or (args.output_file == "-"):
         
     | 
| 149 | 
         
            +
                        # Write to stdout
         
     | 
| 150 | 
         
            +
                        with wave.open(sys.stdout.buffer, "wb") as wav_file:
         
     | 
| 151 | 
         
            +
                            voice.synthesize(text, wav_file, **synthesize_args)
         
     | 
| 152 | 
         
            +
                    else:
         
     | 
| 153 | 
         
            +
                        # Write to file
         
     | 
| 154 | 
         
            +
                        with wave.open(args.output_file, "wb") as wav_file:
         
     | 
| 155 | 
         
            +
                            voice.synthesize(text, wav_file, **synthesize_args)
         
     | 
| 156 | 
         
            +
             
     | 
| 157 | 
         
            +
             
     | 
| 158 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 159 | 
         
            +
                main()
         
     | 
    	
        piper/config.py
    ADDED
    
    | 
         @@ -0,0 +1,53 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            """Piper configuration"""
         
     | 
| 2 | 
         
            +
            from dataclasses import dataclass
         
     | 
| 3 | 
         
            +
            from enum import Enum
         
     | 
| 4 | 
         
            +
            from typing import Any, Dict, Mapping, Sequence
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
            class PhonemeType(str, Enum):
         
     | 
| 8 | 
         
            +
                ESPEAK = "espeak"
         
     | 
| 9 | 
         
            +
                TEXT = "text"
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
            @dataclass
         
     | 
| 13 | 
         
            +
            class PiperConfig:
         
     | 
| 14 | 
         
            +
                """Piper configuration"""
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
                num_symbols: int
         
     | 
| 17 | 
         
            +
                """Number of phonemes"""
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
                num_speakers: int
         
     | 
| 20 | 
         
            +
                """Number of speakers"""
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
                sample_rate: int
         
     | 
| 23 | 
         
            +
                """Sample rate of output audio"""
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
                espeak_voice: str
         
     | 
| 26 | 
         
            +
                """Name of espeak-ng voice or alphabet"""
         
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
                length_scale: float
         
     | 
| 29 | 
         
            +
                noise_scale: float
         
     | 
| 30 | 
         
            +
                noise_w: float
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
                phoneme_id_map: Mapping[str, Sequence[int]]
         
     | 
| 33 | 
         
            +
                """Phoneme -> [id,]"""
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
                phoneme_type: PhonemeType
         
     | 
| 36 | 
         
            +
                """espeak or text"""
         
     | 
| 37 | 
         
            +
             
     | 
| 38 | 
         
            +
                @staticmethod
         
     | 
| 39 | 
         
            +
                def from_dict(config: Dict[str, Any]) -> "PiperConfig":
         
     | 
| 40 | 
         
            +
                    inference = config.get("inference", {})
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
                    return PiperConfig(
         
     | 
| 43 | 
         
            +
                        num_symbols=config["num_symbols"],
         
     | 
| 44 | 
         
            +
                        num_speakers=config["num_speakers"],
         
     | 
| 45 | 
         
            +
                        sample_rate=config["audio"]["sample_rate"],
         
     | 
| 46 | 
         
            +
                        noise_scale=inference.get("noise_scale", 0.667),
         
     | 
| 47 | 
         
            +
                        length_scale=inference.get("length_scale", 1.0),
         
     | 
| 48 | 
         
            +
                        noise_w=inference.get("noise_w", 0.8),
         
     | 
| 49 | 
         
            +
                        #
         
     | 
| 50 | 
         
            +
                        espeak_voice=config["espeak"]["voice"],
         
     | 
| 51 | 
         
            +
                        phoneme_id_map=config["phoneme_id_map"],
         
     | 
| 52 | 
         
            +
                        phoneme_type=PhonemeType(config.get("phoneme_type", PhonemeType.ESPEAK)),
         
     | 
| 53 | 
         
            +
                    )
         
     | 
    	
        piper/const.py
    ADDED
    
    | 
         @@ -0,0 +1,5 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            """Constants"""
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            PAD = "_"  # padding (0)
         
     | 
| 4 | 
         
            +
            BOS = "^"  # beginning of sentence
         
     | 
| 5 | 
         
            +
            EOS = "$"  # end of sentence
         
     | 
    	
        piper/download.py
    ADDED
    
    | 
         @@ -0,0 +1,139 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            """Utility for downloading Piper voices."""
         
     | 
| 2 | 
         
            +
            import json
         
     | 
| 3 | 
         
            +
            import logging
         
     | 
| 4 | 
         
            +
            import shutil
         
     | 
| 5 | 
         
            +
            from pathlib import Path
         
     | 
| 6 | 
         
            +
            from typing import Any, Dict, Iterable, Set, Tuple, Union
         
     | 
| 7 | 
         
            +
            from urllib.request import urlopen
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            from .file_hash import get_file_hash
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/{file}"
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            _DIR = Path(__file__).parent
         
     | 
| 14 | 
         
            +
            _LOGGER = logging.getLogger(__name__)
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
            _SKIP_FILES = {"MODEL_CARD"}
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
            class VoiceNotFoundError(Exception):
         
     | 
| 20 | 
         
            +
                pass
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
            def get_voices(
         
     | 
| 24 | 
         
            +
                download_dir: Union[str, Path], update_voices: bool = False
         
     | 
| 25 | 
         
            +
            ) -> Dict[str, Any]:
         
     | 
| 26 | 
         
            +
                """Loads available voices from downloaded or embedded JSON file."""
         
     | 
| 27 | 
         
            +
                download_dir = Path(download_dir)
         
     | 
| 28 | 
         
            +
                voices_download = download_dir / "voices.json"
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
                if update_voices:
         
     | 
| 31 | 
         
            +
                    # Download latest voices.json
         
     | 
| 32 | 
         
            +
                    voices_url = URL_FORMAT.format(file="voices.json")
         
     | 
| 33 | 
         
            +
                    _LOGGER.debug("Downloading %s to %s", voices_url, voices_download)
         
     | 
| 34 | 
         
            +
                    with urlopen(voices_url) as response, open(
         
     | 
| 35 | 
         
            +
                        voices_download, "wb"
         
     | 
| 36 | 
         
            +
                    ) as download_file:
         
     | 
| 37 | 
         
            +
                        shutil.copyfileobj(response, download_file)
         
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
                # Prefer downloaded file to embedded
         
     | 
| 40 | 
         
            +
                voices_embedded = _DIR / "voices.json"
         
     | 
| 41 | 
         
            +
                voices_path = voices_download if voices_download.exists() else voices_embedded
         
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
                _LOGGER.debug("Loading %s", voices_path)
         
     | 
| 44 | 
         
            +
                with open(voices_path, "r", encoding="utf-8") as voices_file:
         
     | 
| 45 | 
         
            +
                    return json.load(voices_file)
         
     | 
| 46 | 
         
            +
             
     | 
| 47 | 
         
            +
             
     | 
| 48 | 
         
            +
            def ensure_voice_exists(
         
     | 
| 49 | 
         
            +
                name: str,
         
     | 
| 50 | 
         
            +
                data_dirs: Iterable[Union[str, Path]],
         
     | 
| 51 | 
         
            +
                download_dir: Union[str, Path],
         
     | 
| 52 | 
         
            +
                voices_info: Dict[str, Any],
         
     | 
| 53 | 
         
            +
            ):
         
     | 
| 54 | 
         
            +
                assert data_dirs, "No data dirs"
         
     | 
| 55 | 
         
            +
                if name not in voices_info:
         
     | 
| 56 | 
         
            +
                    raise VoiceNotFoundError(name)
         
     | 
| 57 | 
         
            +
             
     | 
| 58 | 
         
            +
                voice_info = voices_info[name]
         
     | 
| 59 | 
         
            +
                voice_files = voice_info["files"]
         
     | 
| 60 | 
         
            +
                files_to_download: Set[str] = set()
         
     | 
| 61 | 
         
            +
             
     | 
| 62 | 
         
            +
                for data_dir in data_dirs:
         
     | 
| 63 | 
         
            +
                    data_dir = Path(data_dir)
         
     | 
| 64 | 
         
            +
             
     | 
| 65 | 
         
            +
                    # Check sizes/hashes
         
     | 
| 66 | 
         
            +
                    for file_path, file_info in voice_files.items():
         
     | 
| 67 | 
         
            +
                        if file_path in files_to_download:
         
     | 
| 68 | 
         
            +
                            # Already planning to download
         
     | 
| 69 | 
         
            +
                            continue
         
     | 
| 70 | 
         
            +
             
     | 
| 71 | 
         
            +
                        file_name = Path(file_path).name
         
     | 
| 72 | 
         
            +
                        if file_name in _SKIP_FILES:
         
     | 
| 73 | 
         
            +
                            continue
         
     | 
| 74 | 
         
            +
             
     | 
| 75 | 
         
            +
                        data_file_path = data_dir / file_name
         
     | 
| 76 | 
         
            +
                        _LOGGER.debug("Checking %s", data_file_path)
         
     | 
| 77 | 
         
            +
                        if not data_file_path.exists():
         
     | 
| 78 | 
         
            +
                            _LOGGER.debug("Missing %s", data_file_path)
         
     | 
| 79 | 
         
            +
                            files_to_download.add(file_path)
         
     | 
| 80 | 
         
            +
                            continue
         
     | 
| 81 | 
         
            +
             
     | 
| 82 | 
         
            +
                        expected_size = file_info["size_bytes"]
         
     | 
| 83 | 
         
            +
                        actual_size = data_file_path.stat().st_size
         
     | 
| 84 | 
         
            +
                        if expected_size != actual_size:
         
     | 
| 85 | 
         
            +
                            _LOGGER.warning(
         
     | 
| 86 | 
         
            +
                                "Wrong size (expected=%s, actual=%s) for %s",
         
     | 
| 87 | 
         
            +
                                expected_size,
         
     | 
| 88 | 
         
            +
                                actual_size,
         
     | 
| 89 | 
         
            +
                                data_file_path,
         
     | 
| 90 | 
         
            +
                            )
         
     | 
| 91 | 
         
            +
                            files_to_download.add(file_path)
         
     | 
| 92 | 
         
            +
                            continue
         
     | 
| 93 | 
         
            +
             
     | 
| 94 | 
         
            +
                        expected_hash = file_info["md5_digest"]
         
     | 
| 95 | 
         
            +
                        actual_hash = get_file_hash(data_file_path)
         
     | 
| 96 | 
         
            +
                        if expected_hash != actual_hash:
         
     | 
| 97 | 
         
            +
                            _LOGGER.warning(
         
     | 
| 98 | 
         
            +
                                "Wrong hash (expected=%s, actual=%s) for %s",
         
     | 
| 99 | 
         
            +
                                expected_hash,
         
     | 
| 100 | 
         
            +
                                actual_hash,
         
     | 
| 101 | 
         
            +
                                data_file_path,
         
     | 
| 102 | 
         
            +
                            )
         
     | 
| 103 | 
         
            +
                            files_to_download.add(file_path)
         
     | 
| 104 | 
         
            +
                            continue
         
     | 
| 105 | 
         
            +
             
     | 
| 106 | 
         
            +
                if (not voice_files) and (not files_to_download):
         
     | 
| 107 | 
         
            +
                    raise ValueError(f"Unable to find or download voice: {name}")
         
     | 
| 108 | 
         
            +
             
     | 
| 109 | 
         
            +
                # Download missing files
         
     | 
| 110 | 
         
            +
                download_dir = Path(download_dir)
         
     | 
| 111 | 
         
            +
             
     | 
| 112 | 
         
            +
                for file_path in files_to_download:
         
     | 
| 113 | 
         
            +
                    file_name = Path(file_path).name
         
     | 
| 114 | 
         
            +
                    if file_name in _SKIP_FILES:
         
     | 
| 115 | 
         
            +
                        continue
         
     | 
| 116 | 
         
            +
             
     | 
| 117 | 
         
            +
                    file_url = URL_FORMAT.format(file=file_path)
         
     | 
| 118 | 
         
            +
                    download_file_path = download_dir / file_name
         
     | 
| 119 | 
         
            +
                    download_file_path.parent.mkdir(parents=True, exist_ok=True)
         
     | 
| 120 | 
         
            +
             
     | 
| 121 | 
         
            +
                    _LOGGER.debug("Downloading %s to %s", file_url, download_file_path)
         
     | 
| 122 | 
         
            +
                    with urlopen(file_url) as response, open(
         
     | 
| 123 | 
         
            +
                        download_file_path, "wb"
         
     | 
| 124 | 
         
            +
                    ) as download_file:
         
     | 
| 125 | 
         
            +
                        shutil.copyfileobj(response, download_file)
         
     | 
| 126 | 
         
            +
             
     | 
| 127 | 
         
            +
                    _LOGGER.info("Downloaded %s (%s)", download_file_path, file_url)
         
     | 
| 128 | 
         
            +
             
     | 
| 129 | 
         
            +
             
     | 
| 130 | 
         
            +
            def find_voice(name: str, data_dirs: Iterable[Union[str, Path]]) -> Tuple[Path, Path]:
         
     | 
| 131 | 
         
            +
                for data_dir in data_dirs:
         
     | 
| 132 | 
         
            +
                    data_dir = Path(data_dir)
         
     | 
| 133 | 
         
            +
                    onnx_path = data_dir / f"{name}.onnx"
         
     | 
| 134 | 
         
            +
                    config_path = data_dir / f"{name}.onnx.json"
         
     | 
| 135 | 
         
            +
             
     | 
| 136 | 
         
            +
                    if onnx_path.exists() and config_path.exists():
         
     | 
| 137 | 
         
            +
                        return onnx_path, config_path
         
     | 
| 138 | 
         
            +
             
     | 
| 139 | 
         
            +
                raise ValueError(f"Missing files for voice {name}")
         
     | 
    	
        piper/file_hash.py
    ADDED
    
    | 
         @@ -0,0 +1,46 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import argparse
         
     | 
| 2 | 
         
            +
            import hashlib
         
     | 
| 3 | 
         
            +
            import json
         
     | 
| 4 | 
         
            +
            import sys
         
     | 
| 5 | 
         
            +
            from pathlib import Path
         
     | 
| 6 | 
         
            +
            from typing import Union
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            def get_file_hash(path: Union[str, Path], bytes_per_chunk: int = 8192) -> str:
         
     | 
| 10 | 
         
            +
                """Hash a file in chunks using md5."""
         
     | 
| 11 | 
         
            +
                path_hash = hashlib.md5()
         
     | 
| 12 | 
         
            +
                with open(path, "rb") as path_file:
         
     | 
| 13 | 
         
            +
                    chunk = path_file.read(bytes_per_chunk)
         
     | 
| 14 | 
         
            +
                    while chunk:
         
     | 
| 15 | 
         
            +
                        path_hash.update(chunk)
         
     | 
| 16 | 
         
            +
                        chunk = path_file.read(bytes_per_chunk)
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
                return path_hash.hexdigest()
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            # -----------------------------------------------------------------------------
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
             
     | 
| 24 | 
         
            +
            def main():
         
     | 
| 25 | 
         
            +
                parser = argparse.ArgumentParser()
         
     | 
| 26 | 
         
            +
                parser.add_argument("file", nargs="+")
         
     | 
| 27 | 
         
            +
                parser.add_argument("--dir", help="Parent directory")
         
     | 
| 28 | 
         
            +
                args = parser.parse_args()
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
                if args.dir:
         
     | 
| 31 | 
         
            +
                    args.dir = Path(args.dir)
         
     | 
| 32 | 
         
            +
             
     | 
| 33 | 
         
            +
                hashes = {}
         
     | 
| 34 | 
         
            +
                for path_str in args.file:
         
     | 
| 35 | 
         
            +
                    path = Path(path_str)
         
     | 
| 36 | 
         
            +
                    path_hash = get_file_hash(path)
         
     | 
| 37 | 
         
            +
                    if args.dir:
         
     | 
| 38 | 
         
            +
                        path = path.relative_to(args.dir)
         
     | 
| 39 | 
         
            +
             
     | 
| 40 | 
         
            +
                    hashes[str(path)] = path_hash
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
                json.dump(hashes, sys.stdout)
         
     | 
| 43 | 
         
            +
             
     | 
| 44 | 
         
            +
             
     | 
| 45 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 46 | 
         
            +
                main()
         
     | 
    	
        piper/util.py
    ADDED
    
    | 
         @@ -0,0 +1,12 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            """Utilities"""
         
     | 
| 2 | 
         
            +
            import numpy as np
         
     | 
| 3 | 
         
            +
             
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            def audio_float_to_int16(
         
     | 
| 6 | 
         
            +
                audio: np.ndarray, max_wav_value: float = 32767.0
         
     | 
| 7 | 
         
            +
            ) -> np.ndarray:
         
     | 
| 8 | 
         
            +
                """Normalize audio and convert to int16 range"""
         
     | 
| 9 | 
         
            +
                audio_norm = audio * (max_wav_value / max(0.01, np.max(np.abs(audio))))
         
     | 
| 10 | 
         
            +
                audio_norm = np.clip(audio_norm, -max_wav_value, max_wav_value)
         
     | 
| 11 | 
         
            +
                audio_norm = audio_norm.astype("int16")
         
     | 
| 12 | 
         
            +
                return audio_norm
         
     | 
    	
        piper/voice.py
    ADDED
    
    | 
         @@ -0,0 +1,177 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import json
         
     | 
| 2 | 
         
            +
            import logging
         
     | 
| 3 | 
         
            +
            import wave
         
     | 
| 4 | 
         
            +
            from dataclasses import dataclass
         
     | 
| 5 | 
         
            +
            from pathlib import Path
         
     | 
| 6 | 
         
            +
            from typing import Iterable, List, Optional, Union
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            import numpy as np
         
     | 
| 9 | 
         
            +
            import onnxruntime
         
     | 
| 10 | 
         
            +
            from piper_phonemize import phonemize_codepoints, phonemize_espeak, tashkeel_run
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
            from .config import PhonemeType, PiperConfig
         
     | 
| 13 | 
         
            +
            from .const import BOS, EOS, PAD
         
     | 
| 14 | 
         
            +
            from .util import audio_float_to_int16
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
            _LOGGER = logging.getLogger(__name__)
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
            @dataclass
         
     | 
| 20 | 
         
            +
            class PiperVoice:
         
     | 
| 21 | 
         
            +
                session: onnxruntime.InferenceSession
         
     | 
| 22 | 
         
            +
                config: PiperConfig
         
     | 
| 23 | 
         
            +
             
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def load(
         
     | 
| 26 | 
         
            +
                    model_path: Union[str, Path],
         
     | 
| 27 | 
         
            +
                    config_path: Optional[Union[str, Path]] = None,
         
     | 
| 28 | 
         
            +
                    use_cuda: bool = False,
         
     | 
| 29 | 
         
            +
                ) -> "PiperVoice":
         
     | 
| 30 | 
         
            +
                    """Load an ONNX model and config."""
         
     | 
| 31 | 
         
            +
                    if config_path is None:
         
     | 
| 32 | 
         
            +
                        config_path = f"{model_path}.json"
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
                    with open(config_path, "r", encoding="utf-8") as config_file:
         
     | 
| 35 | 
         
            +
                        config_dict = json.load(config_file)
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
                    return PiperVoice(
         
     | 
| 38 | 
         
            +
                        config=PiperConfig.from_dict(config_dict),
         
     | 
| 39 | 
         
            +
                        session=onnxruntime.InferenceSession(
         
     | 
| 40 | 
         
            +
                            str(model_path),
         
     | 
| 41 | 
         
            +
                            sess_options=onnxruntime.SessionOptions(),
         
     | 
| 42 | 
         
            +
                            providers=["CPUExecutionProvider"]
         
     | 
| 43 | 
         
            +
                            if not use_cuda
         
     | 
| 44 | 
         
            +
                            else ["CUDAExecutionProvider"],
         
     | 
| 45 | 
         
            +
                        ),
         
     | 
| 46 | 
         
            +
                    )
         
     | 
| 47 | 
         
            +
             
     | 
| 48 | 
         
            +
                def phonemize(self, text: str) -> List[List[str]]:
         
     | 
| 49 | 
         
            +
                    """Text to phonemes grouped by sentence."""
         
     | 
| 50 | 
         
            +
                    if self.config.phoneme_type == PhonemeType.ESPEAK:
         
     | 
| 51 | 
         
            +
                        if self.config.espeak_voice == "ar":
         
     | 
| 52 | 
         
            +
                            # Arabic diacritization
         
     | 
| 53 | 
         
            +
                            # https://github.com/mush42/libtashkeel/
         
     | 
| 54 | 
         
            +
                            text = tashkeel_run(text)
         
     | 
| 55 | 
         
            +
             
     | 
| 56 | 
         
            +
                        return phonemize_espeak(text, self.config.espeak_voice)
         
     | 
| 57 | 
         
            +
             
     | 
| 58 | 
         
            +
                    if self.config.phoneme_type == PhonemeType.TEXT:
         
     | 
| 59 | 
         
            +
                        return phonemize_codepoints(text)
         
     | 
| 60 | 
         
            +
             
     | 
| 61 | 
         
            +
                    raise ValueError(f"Unexpected phoneme type: {self.config.phoneme_type}")
         
     | 
| 62 | 
         
            +
             
     | 
| 63 | 
         
            +
                def phonemes_to_ids(self, phonemes: List[str]) -> List[int]:
         
     | 
| 64 | 
         
            +
                    """Phonemes to ids."""
         
     | 
| 65 | 
         
            +
                    id_map = self.config.phoneme_id_map
         
     | 
| 66 | 
         
            +
                    ids: List[int] = list(id_map[BOS])
         
     | 
| 67 | 
         
            +
             
     | 
| 68 | 
         
            +
                    for phoneme in phonemes:
         
     | 
| 69 | 
         
            +
                        if phoneme not in id_map:
         
     | 
| 70 | 
         
            +
                            _LOGGER.warning("Missing phoneme from id map: %s", phoneme)
         
     | 
| 71 | 
         
            +
                            continue
         
     | 
| 72 | 
         
            +
             
     | 
| 73 | 
         
            +
                        ids.extend(id_map[phoneme])
         
     | 
| 74 | 
         
            +
                        ids.extend(id_map[PAD])
         
     | 
| 75 | 
         
            +
             
     | 
| 76 | 
         
            +
                    ids.extend(id_map[EOS])
         
     | 
| 77 | 
         
            +
             
     | 
| 78 | 
         
            +
                    return ids
         
     | 
| 79 | 
         
            +
             
     | 
| 80 | 
         
            +
                def synthesize(
         
     | 
| 81 | 
         
            +
                    self,
         
     | 
| 82 | 
         
            +
                    text: str,
         
     | 
| 83 | 
         
            +
                    wav_file: wave.Wave_write,
         
     | 
| 84 | 
         
            +
                    speaker_id: Optional[int] = None,
         
     | 
| 85 | 
         
            +
                    length_scale: Optional[float] = None,
         
     | 
| 86 | 
         
            +
                    noise_scale: Optional[float] = None,
         
     | 
| 87 | 
         
            +
                    noise_w: Optional[float] = None,
         
     | 
| 88 | 
         
            +
                    sentence_silence: float = 0.0,
         
     | 
| 89 | 
         
            +
                ):
         
     | 
| 90 | 
         
            +
                    """Synthesize WAV audio from text."""
         
     | 
| 91 | 
         
            +
                    wav_file.setframerate(self.config.sample_rate)
         
     | 
| 92 | 
         
            +
                    wav_file.setsampwidth(2)  # 16-bit
         
     | 
| 93 | 
         
            +
                    wav_file.setnchannels(1)  # mono
         
     | 
| 94 | 
         
            +
             
     | 
| 95 | 
         
            +
                    for audio_bytes in self.synthesize_stream_raw(
         
     | 
| 96 | 
         
            +
                        text,
         
     | 
| 97 | 
         
            +
                        speaker_id=speaker_id,
         
     | 
| 98 | 
         
            +
                        length_scale=length_scale,
         
     | 
| 99 | 
         
            +
                        noise_scale=noise_scale,
         
     | 
| 100 | 
         
            +
                        noise_w=noise_w,
         
     | 
| 101 | 
         
            +
                        sentence_silence=sentence_silence,
         
     | 
| 102 | 
         
            +
                    ):
         
     | 
| 103 | 
         
            +
                        wav_file.writeframes(audio_bytes)
         
     | 
| 104 | 
         
            +
             
     | 
| 105 | 
         
            +
                def synthesize_stream_raw(
         
     | 
| 106 | 
         
            +
                    self,
         
     | 
| 107 | 
         
            +
                    text: str,
         
     | 
| 108 | 
         
            +
                    speaker_id: Optional[int] = None,
         
     | 
| 109 | 
         
            +
                    length_scale: Optional[float] = None,
         
     | 
| 110 | 
         
            +
                    noise_scale: Optional[float] = None,
         
     | 
| 111 | 
         
            +
                    noise_w: Optional[float] = None,
         
     | 
| 112 | 
         
            +
                    sentence_silence: float = 0.0,
         
     | 
| 113 | 
         
            +
                ) -> Iterable[bytes]:
         
     | 
| 114 | 
         
            +
                    """Synthesize raw audio per sentence from text."""
         
     | 
| 115 | 
         
            +
                    sentence_phonemes = self.phonemize(text)
         
     | 
| 116 | 
         
            +
             
     | 
| 117 | 
         
            +
                    # 16-bit mono
         
     | 
| 118 | 
         
            +
                    num_silence_samples = int(sentence_silence * self.config.sample_rate)
         
     | 
| 119 | 
         
            +
                    silence_bytes = bytes(num_silence_samples * 2)
         
     | 
| 120 | 
         
            +
             
     | 
| 121 | 
         
            +
                    for phonemes in sentence_phonemes:
         
     | 
| 122 | 
         
            +
                        phoneme_ids = self.phonemes_to_ids(phonemes)
         
     | 
| 123 | 
         
            +
                        yield self.synthesize_ids_to_raw(
         
     | 
| 124 | 
         
            +
                            phoneme_ids,
         
     | 
| 125 | 
         
            +
                            speaker_id=speaker_id,
         
     | 
| 126 | 
         
            +
                            length_scale=length_scale,
         
     | 
| 127 | 
         
            +
                            noise_scale=noise_scale,
         
     | 
| 128 | 
         
            +
                            noise_w=noise_w,
         
     | 
| 129 | 
         
            +
                        ) + silence_bytes
         
     | 
| 130 | 
         
            +
             
     | 
| 131 | 
         
            +
                def synthesize_ids_to_raw(
         
     | 
| 132 | 
         
            +
                    self,
         
     | 
| 133 | 
         
            +
                    phoneme_ids: List[int],
         
     | 
| 134 | 
         
            +
                    speaker_id: Optional[int] = None,
         
     | 
| 135 | 
         
            +
                    length_scale: Optional[float] = None,
         
     | 
| 136 | 
         
            +
                    noise_scale: Optional[float] = None,
         
     | 
| 137 | 
         
            +
                    noise_w: Optional[float] = None,
         
     | 
| 138 | 
         
            +
                ) -> bytes:
         
     | 
| 139 | 
         
            +
                    """Synthesize raw audio from phoneme ids."""
         
     | 
| 140 | 
         
            +
                    if length_scale is None:
         
     | 
| 141 | 
         
            +
                        length_scale = self.config.length_scale
         
     | 
| 142 | 
         
            +
             
     | 
| 143 | 
         
            +
                    if noise_scale is None:
         
     | 
| 144 | 
         
            +
                        noise_scale = self.config.noise_scale
         
     | 
| 145 | 
         
            +
             
     | 
| 146 | 
         
            +
                    if noise_w is None:
         
     | 
| 147 | 
         
            +
                        noise_w = self.config.noise_w
         
     | 
| 148 | 
         
            +
             
     | 
| 149 | 
         
            +
                    phoneme_ids_array = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0)
         
     | 
| 150 | 
         
            +
                    phoneme_ids_lengths = np.array([phoneme_ids_array.shape[1]], dtype=np.int64)
         
     | 
| 151 | 
         
            +
                    scales = np.array(
         
     | 
| 152 | 
         
            +
                        [noise_scale, length_scale, noise_w],
         
     | 
| 153 | 
         
            +
                        dtype=np.float32,
         
     | 
| 154 | 
         
            +
                    )
         
     | 
| 155 | 
         
            +
             
     | 
| 156 | 
         
            +
                    if (self.config.num_speakers > 1) and (speaker_id is None):
         
     | 
| 157 | 
         
            +
                        # Default speaker
         
     | 
| 158 | 
         
            +
                        speaker_id = 0
         
     | 
| 159 | 
         
            +
             
     | 
| 160 | 
         
            +
                    sid = None
         
     | 
| 161 | 
         
            +
             
     | 
| 162 | 
         
            +
                    if speaker_id is not None:
         
     | 
| 163 | 
         
            +
                        sid = np.array([speaker_id], dtype=np.int64)
         
     | 
| 164 | 
         
            +
             
     | 
| 165 | 
         
            +
                    # Synthesize through Onnx
         
     | 
| 166 | 
         
            +
                    audio = self.session.run(
         
     | 
| 167 | 
         
            +
                        None,
         
     | 
| 168 | 
         
            +
                        {
         
     | 
| 169 | 
         
            +
                            "input": phoneme_ids_array,
         
     | 
| 170 | 
         
            +
                            "input_lengths": phoneme_ids_lengths,
         
     | 
| 171 | 
         
            +
                            "scales": scales,
         
     | 
| 172 | 
         
            +
                            "sid": sid,
         
     | 
| 173 | 
         
            +
                        },
         
     | 
| 174 | 
         
            +
                    )[0].squeeze((0, 1))
         
     | 
| 175 | 
         
            +
                    audio = audio_float_to_int16(audio.squeeze())
         
     | 
| 176 | 
         
            +
             
     | 
| 177 | 
         
            +
                    return audio.tobytes()
         
     | 
    	
        piper/voices.json
    ADDED
    
    | 
         The diff for this file is too large to render. 
		See raw diff 
     | 
| 
         |