Spaces:

fffiloni
/

RB-Modulation

Build error

App Files Files Community

fffiloni commited on Sep 2, 2024

Commit

40d9091

verified ·

1 Parent(s): 1f2d931

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -10

app.py CHANGED Viewed

@@ -182,7 +182,9 @@ def infer(ref_style_file, style_description, caption, progress):
             lam_style=1, lam_txt_alignment=1.0,
             use_ddim_sampler=True,
         )
-        for (sampled_c, _, _) in enumerate(tqdm(sampling_c, total=extras.sampling_configs['timesteps']), 1):
             sampled_c = sampled_c
         progress(0.7, "Starting Stage B reverse process")
@@ -195,7 +197,9 @@ def infer(ref_style_file, style_description, caption, progress):
                 models_b.generator, conditions_b, stage_b_latent_shape,
                 unconditions_b, device=device, **extras_b.sampling_configs,
             )
-            for (sampled_b, _, _) in enumerate(tqdm(sampling_b, total=extras_b.sampling_configs['timesteps']), 1):
                 sampled_b = sampled_b
             sampled = models_b.stage_a.decode(sampled_b).float()
@@ -222,7 +226,7 @@ def infer(ref_style_file, style_description, caption, progress):
         # Clear CUDA cache
         torch.cuda.empty_cache()
-def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
     global models_rbm, models_b, device, sam_model
     if low_vram:
         models_to(models_rbm, device=device)
@@ -246,13 +250,15 @@ def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
         extras_b.sampling_configs['timesteps'] = 10
         extras_b.sampling_configs['t_start'] = 1.0
         ref_style = resize_image(PIL.Image.open(ref_style_file).convert("RGB")).unsqueeze(0).expand(batch_size, -1, -1, -1).to(device)
         ref_images = resize_image(PIL.Image.open(ref_sub_file).convert("RGB")).unsqueeze(0).expand(batch_size, -1, -1, -1).to(device)
         batch = {'captions': [caption] * batch_size}
         batch['style'] = ref_style
         batch['images'] = ref_images
         x0_forward = models_rbm.effnet(extras.effnet_preprocess(ref_images.to(device)))
         x0_style_forward = models_rbm.effnet(extras.effnet_preprocess(ref_style.to(device)))
@@ -264,7 +270,8 @@ def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
         sam_mask, boxes, phrases, logits = sam_model.predict(x0_preview_pil, sam_prompt)
         # sam_mask, boxes, phrases, logits = sam_model.predict(transform(x0_preview[0]), sam_prompt)
         sam_mask = sam_mask.detach().unsqueeze(dim=0).to(device)
         conditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=False, eval_image_embeds=True, eval_subject_style=True, eval_csd=False)
         unconditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=True, eval_image_embeds=False, eval_subject_style=True)
         conditions_b = core_b.get_conditions(batch, models_b, extras_b, is_eval=True, is_unconditional=False)
@@ -274,7 +281,8 @@ def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
             models_to(models_rbm, device="cpu", excepts=["generator", "previewer"])
             models_to(sam_model, device="cpu")
             models_to(sam_model.sam, device="cpu")
         # Stage C reverse process.
         sampling_c = extras.gdf.sample(
             models_rbm.generator, conditions, stage_c_latent_shape,
@@ -291,9 +299,12 @@ def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
             sam_prompt=sam_prompt
         )
-        for (sampled_c, _, _) in tqdm(sampling_c, total=extras.sampling_configs['timesteps']):
             sampled_c = sampled_c
         # Stage B reverse process.
         with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.bfloat16):
             conditions_b['effnet'] = sampled_c
@@ -303,10 +314,13 @@ def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
                 models_b.generator, conditions_b, stage_b_latent_shape,
                 unconditions_b, device=device, **extras_b.sampling_configs,
             )
-            for (sampled_b, _, _) in tqdm(sampling_b, total=extras_b.sampling_configs['timesteps']):
                 sampled_b = sampled_b
             sampled = models_b.stage_a.decode(sampled_b).float()
         sampled = torch.cat([
             torch.nn.functional.interpolate(ref_images.cpu(), size=(height, width)),
             torch.nn.functional.interpolate(ref_style.cpu(), size=(height, width)),
@@ -332,9 +346,9 @@ def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
 def run(style_reference_image, style_description, subject_prompt, subject_reference, use_subject_ref):
     result = None
-    progress = gr.Progress(track_tqdm=True)
     if use_subject_ref is True:
-        result = infer_compo(style_description, style_reference_image, subject_prompt, subject_reference)
     else:
         result = infer(style_reference_image, style_description, subject_prompt, progress)
     return result

             lam_style=1, lam_txt_alignment=1.0,
             use_ddim_sampler=True,
         )
+        for i, (sampled_c, _, _) in enumerate(sampling_c, 1):
+            if i % 5 == 0:  # Update progress every 5 steps
+                progress(0.4 + 0.3 * (i / extras.sampling_configs['timesteps']), f"Stage C reverse process: step {i}/{extras.sampling_configs['timesteps']}")
             sampled_c = sampled_c
         progress(0.7, "Starting Stage B reverse process")
                 models_b.generator, conditions_b, stage_b_latent_shape,
                 unconditions_b, device=device, **extras_b.sampling_configs,
             )
+            for i, (sampled_b, _, _) in enumerate(sampling_b, 1):
+                if i % 5 == 0:  # Update progress every 5 steps
+                    progress(0.7 + 0.2 * (i / extras_b.sampling_configs['timesteps']), f"Stage B reverse process: step {i}/{extras_b.sampling_configs['timesteps']}")
                 sampled_b = sampled_b
             sampled = models_b.stage_a.decode(sampled_b).float()
         # Clear CUDA cache
         torch.cuda.empty_cache()
+def infer_compo(style_description, ref_style_file, caption, ref_sub_file, progress):
     global models_rbm, models_b, device, sam_model
     if low_vram:
         models_to(models_rbm, device=device)
         extras_b.sampling_configs['timesteps'] = 10
         extras_b.sampling_configs['t_start'] = 1.0
+        progress(0.1, "Loading style and subject reference images")
         ref_style = resize_image(PIL.Image.open(ref_style_file).convert("RGB")).unsqueeze(0).expand(batch_size, -1, -1, -1).to(device)
         ref_images = resize_image(PIL.Image.open(ref_sub_file).convert("RGB")).unsqueeze(0).expand(batch_size, -1, -1, -1).to(device)
         batch = {'captions': [caption] * batch_size}
         batch['style'] = ref_style
         batch['images'] = ref_images
+        progress(0.2, "Processing reference images")
         x0_forward = models_rbm.effnet(extras.effnet_preprocess(ref_images.to(device)))
         x0_style_forward = models_rbm.effnet(extras.effnet_preprocess(ref_style.to(device)))
         sam_mask, boxes, phrases, logits = sam_model.predict(x0_preview_pil, sam_prompt)
         # sam_mask, boxes, phrases, logits = sam_model.predict(transform(x0_preview[0]), sam_prompt)
         sam_mask = sam_mask.detach().unsqueeze(dim=0).to(device)
+        progress(0.3, "Generating conditions")
         conditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=False, eval_image_embeds=True, eval_subject_style=True, eval_csd=False)
         unconditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=True, eval_image_embeds=False, eval_subject_style=True)
         conditions_b = core_b.get_conditions(batch, models_b, extras_b, is_eval=True, is_unconditional=False)
             models_to(models_rbm, device="cpu", excepts=["generator", "previewer"])
             models_to(sam_model, device="cpu")
             models_to(sam_model.sam, device="cpu")
+        progress(0.4, "Starting Stage C reverse process")
         # Stage C reverse process.
         sampling_c = extras.gdf.sample(
             models_rbm.generator, conditions, stage_c_latent_shape,
             sam_prompt=sam_prompt
         )
+        for i, (sampled_c, _, _) in enumerate(sampling_c, 1):
+            if i % 5 == 0:  # Update progress every 5 steps
+                progress(0.4 + 0.3 * (i / extras.sampling_configs['timesteps']), f"Stage C reverse process: step {i}/{extras.sampling_configs['timesteps']}")
             sampled_c = sampled_c
+        progress(0.7, "Starting Stage B reverse process")
         # Stage B reverse process.
         with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.bfloat16):
             conditions_b['effnet'] = sampled_c
                 models_b.generator, conditions_b, stage_b_latent_shape,
                 unconditions_b, device=device, **extras_b.sampling_configs,
             )
+            for i, (sampled_b, _, _) in enumerate(sampling_b, 1):
+                if i % 5 == 0:  # Update progress every 5 steps
+                    progress(0.7 + 0.2 * (i / extras_b.sampling_configs['timesteps']), f"Stage B reverse process: step {i}/{extras_b.sampling_configs['timesteps']}")
                 sampled_b = sampled_b
             sampled = models_b.stage_a.decode(sampled_b).float()
+        progress(0.9, "Finalizing the output image")
         sampled = torch.cat([
             torch.nn.functional.interpolate(ref_images.cpu(), size=(height, width)),
             torch.nn.functional.interpolate(ref_style.cpu(), size=(height, width)),
 def run(style_reference_image, style_description, subject_prompt, subject_reference, use_subject_ref):
     result = None
+    progress = gr.Progress()
     if use_subject_ref is True:
+        result = infer_compo(style_description, style_reference_image, subject_prompt, subject_reference, progress)
     else:
         result = infer(style_reference_image, style_description, subject_prompt, progress)
     return result