ASLP-lab commited on
Commit
1f98bdf
·
verified ·
1 Parent(s): 8070f6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -133,13 +133,13 @@ def get_audio_prompt(model, audio_file, device, dtype):
133
  prompt_wav = prompt_wav.mean(dim=0, keepdim=True)
134
  with torch.no_grad():
135
  style_prompt_embed = model(wavs = prompt_wav)
136
- return style_prompt_embed.squeeze(0)
137
 
138
  @spaces.GPU
139
  def get_text_prompt(model, text, device, dtype):
140
  with torch.no_grad():
141
  style_prompt_embed = model(texts = [text])
142
- return style_prompt_embed.squeeze(0)
143
 
144
  @spaces.GPU
145
  def make_fake_stereo(audio, sampling_rate):
@@ -176,11 +176,11 @@ def inference(
176
  cfg_strength=cfg_strength,
177
  odeint_method=odeint_method
178
  )
179
- latent = latent.transpose(1, 2)
180
- audio = decoder.decode_audio(latent, overlap=5, chunk_size=20)
181
 
182
  num_channels = 1
183
- audio = audio.float().cpu().squeeze()[None, :]
184
  if fake_stereo:
185
  audio = make_fake_stereo(audio, decoder.h.sampling_rate)
186
  num_channels = 2
 
133
  prompt_wav = prompt_wav.mean(dim=0, keepdim=True)
134
  with torch.no_grad():
135
  style_prompt_embed = model(wavs = prompt_wav)
136
+ return style_prompt_embed.squeeze(0).detach()
137
 
138
  @spaces.GPU
139
  def get_text_prompt(model, text, device, dtype):
140
  with torch.no_grad():
141
  style_prompt_embed = model(texts = [text])
142
+ return style_prompt_embed.squeeze(0).detach()
143
 
144
  @spaces.GPU
145
  def make_fake_stereo(audio, sampling_rate):
 
176
  cfg_strength=cfg_strength,
177
  odeint_method=odeint_method
178
  )
179
+ latent = latent.transpose(1, 2).detach()
180
+ audio = decoder.decode_audio(latent, overlap=5, chunk_size=20).detach()
181
 
182
  num_channels = 1
183
+ audio = audio.float().cpu().detach().squeeze()[None, :]
184
  if fake_stereo:
185
  audio = make_fake_stereo(audio, decoder.h.sampling_rate)
186
  num_channels = 2