root commited on
Commit
45a12c7
·
1 Parent(s): 1ca8a57

update params

Browse files
app.py CHANGED
@@ -89,7 +89,7 @@ def save_as_flac(sample_rate, audio_data):
89
 
90
 
91
  # 模拟歌曲生成函数
92
- def generate_song(lyric, description=None, prompt_audio=None, genre=None, cfg_coef=None, temperature=None, top_k=None, gen_type="mixed", progress=gr.Progress(track_tqdm=True)):
93
  global MODEL
94
  global STRUCTS
95
  params = {'cfg_coef':cfg_coef, 'temperature':temperature, 'top_k':top_k}
@@ -225,19 +225,19 @@ lyrics
225
  minimum=0.1,
226
  maximum=2.0,
227
  step=0.1,
228
- value=0.9,
229
  interactive=True,
230
  elem_id="temperature",
231
  )
232
- top_k = gr.Slider(
233
- label="Top-K",
234
- minimum=1,
235
- maximum=100,
236
- step=1,
237
- value=50,
238
- interactive=True,
239
- elem_id="top_k",
240
- )
241
  with gr.Row():
242
  generate_btn = gr.Button("Generate Song", variant="primary")
243
  generate_bgm_btn = gr.Button("Generate Pure Music", variant="primary")
@@ -268,12 +268,12 @@ lyrics
268
  # 生成按钮点击事件
269
  generate_btn.click(
270
  fn=generate_song,
271
- inputs=[lyric, description, prompt_audio, genre, cfg_coef, temperature, top_k],
272
  outputs=[output_audio, output_json]
273
  )
274
  generate_bgm_btn.click(
275
  fn=generate_song,
276
- inputs=[lyric, description, prompt_audio, genre, cfg_coef, temperature, top_k, gr.State("bgm")],
277
  outputs=[output_audio, output_json]
278
  )
279
 
 
89
 
90
 
91
  # 模拟歌曲生成函数
92
+ def generate_song(lyric, description=None, prompt_audio=None, genre=None, cfg_coef=None, temperature=0.1, top_k=-1, gen_type="mixed", progress=gr.Progress(track_tqdm=True)):
93
  global MODEL
94
  global STRUCTS
95
  params = {'cfg_coef':cfg_coef, 'temperature':temperature, 'top_k':top_k}
 
225
  minimum=0.1,
226
  maximum=2.0,
227
  step=0.1,
228
+ value=0.75,
229
  interactive=True,
230
  elem_id="temperature",
231
  )
232
+ # top_k = gr.Slider(
233
+ # label="Top-K",
234
+ # minimum=1,
235
+ # maximum=100,
236
+ # step=1,
237
+ # value=50,
238
+ # interactive=True,
239
+ # elem_id="top_k",
240
+ # )
241
  with gr.Row():
242
  generate_btn = gr.Button("Generate Song", variant="primary")
243
  generate_bgm_btn = gr.Button("Generate Pure Music", variant="primary")
 
268
  # 生成按钮点击事件
269
  generate_btn.click(
270
  fn=generate_song,
271
+ inputs=[lyric, description, prompt_audio, genre, cfg_coef, temperature, gr.State(-1)],
272
  outputs=[output_audio, output_json]
273
  )
274
  generate_bgm_btn.click(
275
  fn=generate_song,
276
+ inputs=[lyric, description, prompt_audio, genre, cfg_coef, temperature, gr.State(-1), gr.State("bgm")],
277
  outputs=[output_audio, output_json]
278
  )
279
 
codeclm/tokenizer/Flow1dVAE/model_septoken.py CHANGED
@@ -146,41 +146,52 @@ class BASECFM(torch.nn.Module, ABC):
146
  mu (torch.Tensor): output of encoder
147
  shape: (batch_size, n_channels, mel_timesteps, n_feats)
148
  """
149
- t, _, dt = t_span[0], t_span[-1], t_span[1] - t_span[0]
 
 
 
 
 
 
 
 
 
150
  noise = x.clone()
151
 
152
- # I am storing this because I can later plot it by putting a debugger here and saving it to a file
153
- # Or in future might add like a return_all_steps flag
154
- sol = []
155
-
156
- for step in tqdm(range(1, len(t_span))):
157
- x[:,0:incontext_length,:] = (1 - (1 - self.sigma_min) * t) * noise[:,0:incontext_length,:] + t * incontext_x[:,0:incontext_length,:]
158
- if(guidance_scale > 1.0):
159
-
160
- model_input = torch.cat([ \
161
- torch.cat([latent_mask_input, latent_mask_input], 0), \
162
- torch.cat([incontext_x, incontext_x], 0), \
163
- torch.cat([torch.zeros_like(mu), mu], 0), \
164
- torch.cat([x, x], 0), \
165
- ], 2)
166
- timestep=t.unsqueeze(-1).repeat(2)
167
-
168
- dphi_dt = self.estimator(inputs_embeds=model_input, attention_mask=attention_mask,time_step=timestep).last_hidden_state
169
- dphi_dt_uncond, dhpi_dt_cond = dphi_dt.chunk(2,0)
170
- dphi_dt = dphi_dt_uncond + guidance_scale * (dhpi_dt_cond - dphi_dt_uncond)
171
  else:
172
- model_input = torch.cat([latent_mask_input, incontext_x, mu, x], 2)
173
- timestep=t.unsqueeze(-1)
174
- dphi_dt = self.estimator(inputs_embeds=model_input, attention_mask=attention_mask,time_step=timestep).last_hidden_state
175
-
176
- dphi_dt = dphi_dt[: ,:, -x.shape[2]:]
177
- x = x + dt * dphi_dt
178
- t = t + dt
179
- sol.append(x)
180
- if step < len(t_span) - 1:
181
- dt = t_span[step + 1] - t
182
-
183
- return sol[-1]
 
 
 
 
 
184
 
185
  def projection_loss(self,hidden_proj, bestrq_emb):
186
  bsz = hidden_proj.shape[0]
 
146
  mu (torch.Tensor): output of encoder
147
  shape: (batch_size, n_channels, mel_timesteps, n_feats)
148
  """
149
+ dt = t_span[1:] - t_span[:-1]
150
+ t = t_span[:-1]
151
+ B = x.shape[0]
152
+
153
+ if guidance_scale > 1.0:
154
+ def double(z):
155
+ return torch.cat([z, z], 0) if z is not None else None
156
+ attention_mask = double(attention_mask)
157
+
158
+ x_next = x.clone()
159
  noise = x.clone()
160
 
161
+ for i in tqdm(range(len(dt))):
162
+ ti = t[i]
163
+
164
+ x_next[:, :incontext_length] = (
165
+ (1 - (1 - self.sigma_min) * ti) * noise[:, :incontext_length] +
166
+ ti * incontext_x[:, :incontext_length]
167
+ )
168
+
169
+ if guidance_scale > 1.0:
170
+ model_input = torch.cat([
171
+ double(latent_mask_input),
172
+ double(incontext_x),
173
+ torch.cat([torch.zeros_like(mu), mu], 0),
174
+ double(x_next),
175
+ ], dim=2)
176
+ timestep = ti.expand(2 * B)
 
 
 
177
  else:
178
+ model_input = torch.cat([
179
+ latent_mask_input, incontext_x, mu, x_next
180
+ ], dim=2)
181
+ timestep = ti.expand(B)
182
+
183
+ v = self.estimator(inputs_embeds=model_input,
184
+ attention_mask=attention_mask,
185
+ time_step=timestep).last_hidden_state
186
+ v = v[..., -x.shape[2]:]
187
+
188
+ if guidance_scale > 1.0:
189
+ v_uncond, v_cond = v.chunk(2, 0)
190
+ v = v_uncond + guidance_scale * (v_cond - v_uncond)
191
+
192
+ x_next = x_next + dt[i] * v
193
+
194
+ return x_next
195
 
196
  def projection_loss(self,hidden_proj, bestrq_emb):
197
  bsz = hidden_proj.shape[0]