Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -133,13 +133,13 @@ def get_audio_prompt(model, audio_file, device, dtype):
|
|
| 133 |
prompt_wav = prompt_wav.mean(dim=0, keepdim=True)
|
| 134 |
with torch.no_grad():
|
| 135 |
style_prompt_embed = model(wavs = prompt_wav)
|
| 136 |
-
return style_prompt_embed.squeeze(0)
|
| 137 |
|
| 138 |
@spaces.GPU
|
| 139 |
def get_text_prompt(model, text, device, dtype):
|
| 140 |
with torch.no_grad():
|
| 141 |
style_prompt_embed = model(texts = [text])
|
| 142 |
-
return style_prompt_embed.squeeze(0)
|
| 143 |
|
| 144 |
@spaces.GPU
|
| 145 |
def make_fake_stereo(audio, sampling_rate):
|
|
@@ -176,11 +176,11 @@ def inference(
|
|
| 176 |
cfg_strength=cfg_strength,
|
| 177 |
odeint_method=odeint_method
|
| 178 |
)
|
| 179 |
-
latent = latent.transpose(1, 2)
|
| 180 |
-
audio = decoder.decode_audio(latent, overlap=5, chunk_size=20)
|
| 181 |
|
| 182 |
num_channels = 1
|
| 183 |
-
audio = audio.float().cpu().squeeze()[None, :]
|
| 184 |
if fake_stereo:
|
| 185 |
audio = make_fake_stereo(audio, decoder.h.sampling_rate)
|
| 186 |
num_channels = 2
|
|
|
|
| 133 |
prompt_wav = prompt_wav.mean(dim=0, keepdim=True)
|
| 134 |
with torch.no_grad():
|
| 135 |
style_prompt_embed = model(wavs = prompt_wav)
|
| 136 |
+
return style_prompt_embed.squeeze(0).detach()
|
| 137 |
|
| 138 |
@spaces.GPU
|
| 139 |
def get_text_prompt(model, text, device, dtype):
|
| 140 |
with torch.no_grad():
|
| 141 |
style_prompt_embed = model(texts = [text])
|
| 142 |
+
return style_prompt_embed.squeeze(0).detach()
|
| 143 |
|
| 144 |
@spaces.GPU
|
| 145 |
def make_fake_stereo(audio, sampling_rate):
|
|
|
|
| 176 |
cfg_strength=cfg_strength,
|
| 177 |
odeint_method=odeint_method
|
| 178 |
)
|
| 179 |
+
latent = latent.transpose(1, 2).detach()
|
| 180 |
+
audio = decoder.decode_audio(latent, overlap=5, chunk_size=20).detach()
|
| 181 |
|
| 182 |
num_channels = 1
|
| 183 |
+
audio = audio.float().cpu().detach().squeeze()[None, :]
|
| 184 |
if fake_stereo:
|
| 185 |
audio = make_fake_stereo(audio, decoder.h.sampling_rate)
|
| 186 |
num_channels = 2
|