Spaces:
Runtime error
Runtime error
Added styleclip support to video generation
Browse files- app.py +2 -5
- generate_videos.py +20 -4
app.py
CHANGED
|
@@ -258,18 +258,15 @@ class ImageEditor(object):
|
|
| 258 |
output_paths.append(output_path)
|
| 259 |
|
| 260 |
return output_paths
|
| 261 |
-
|
| 262 |
return self.generate_vid(generators, inverted_latent, target_latents, out_dir)
|
| 263 |
|
| 264 |
def generate_vid(self, generators, source_latent, target_latents, out_dir):
|
| 265 |
|
| 266 |
fps = 24
|
| 267 |
|
| 268 |
-
np_latent = source_latent.squeeze(0).cpu().detach().numpy()
|
| 269 |
-
|
| 270 |
with tempfile.TemporaryDirectory() as dirpath:
|
| 271 |
-
|
| 272 |
-
generate_frames(np_latent, target_latents, generators, dirpath)
|
| 273 |
video_from_interpolations(fps, dirpath)
|
| 274 |
|
| 275 |
gen_path = os.path.join(dirpath, "out.mp4")
|
|
|
|
| 258 |
output_paths.append(output_path)
|
| 259 |
|
| 260 |
return output_paths
|
| 261 |
+
|
| 262 |
return self.generate_vid(generators, inverted_latent, target_latents, out_dir)
|
| 263 |
|
| 264 |
def generate_vid(self, generators, source_latent, target_latents, out_dir):
|
| 265 |
|
| 266 |
fps = 24
|
| 267 |
|
|
|
|
|
|
|
| 268 |
with tempfile.TemporaryDirectory() as dirpath:
|
| 269 |
+
generate_frames(source_latent, target_latents, generators, dirpath)
|
|
|
|
| 270 |
video_from_interpolations(fps, dirpath)
|
| 271 |
|
| 272 |
gen_path = os.path.join(dirpath, "out.mp4")
|
generate_videos.py
CHANGED
|
@@ -32,6 +32,8 @@ import subprocess
|
|
| 32 |
import shutil
|
| 33 |
import copy
|
| 34 |
|
|
|
|
|
|
|
| 35 |
VALID_EDITS = ["pose", "age", "smile", "gender", "hair_length", "beard"]
|
| 36 |
|
| 37 |
SUGGESTED_DISTANCES = {
|
|
@@ -62,14 +64,24 @@ def generate_frames(source_latent, target_latents, g_ema_list, output_dir):
|
|
| 62 |
|
| 63 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
num_alphas = min(10, 30 // len(target_latents))
|
| 66 |
|
| 67 |
alphas = np.linspace(0, 1, num=num_alphas)
|
| 68 |
|
| 69 |
-
latents = interpolate_with_target_latents(
|
| 70 |
|
| 71 |
segments = len(g_ema_list) - 1
|
| 72 |
-
|
| 73 |
if segments:
|
| 74 |
segment_length = len(latents) / segments
|
| 75 |
|
|
@@ -91,10 +103,14 @@ def generate_frames(source_latent, target_latents, g_ema_list, output_dir):
|
|
| 91 |
src_pars[k].data.copy_(mix_pars[segment_id][k] * (1 - mix_alpha) + mix_pars[segment_id + 1][k] * mix_alpha)
|
| 92 |
|
| 93 |
if idx == 0 or segments or latent is not latents[idx - 1]:
|
| 94 |
-
|
| 95 |
|
| 96 |
with torch.no_grad():
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
utils.save_image(img, f"{output_dir}/{str(idx).zfill(3)}.jpg", nrow=1, normalize=True, scale_each=True, range=(-1, 1))
|
| 100 |
|
|
|
|
| 32 |
import shutil
|
| 33 |
import copy
|
| 34 |
|
| 35 |
+
from styleclip.styleclip_global import style_tensor_to_style_dict, style_dict_to_style_tensor
|
| 36 |
+
|
| 37 |
VALID_EDITS = ["pose", "age", "smile", "gender", "hair_length", "beard"]
|
| 38 |
|
| 39 |
SUGGESTED_DISTANCES = {
|
|
|
|
| 64 |
|
| 65 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 66 |
|
| 67 |
+
code_is_s = target_latents.size()[1] == 9088
|
| 68 |
+
|
| 69 |
+
if code_is_s:
|
| 70 |
+
source_s_dict = g_ema_list[0].get_s_code(source_latent, input_is_latent=True)[0]
|
| 71 |
+
np_latent = style_dict_to_style_tensor(source_s_dict, g_ema_list[0]).cpu().detach().numpy()
|
| 72 |
+
target_latents = target_latents.cpu().detach().numpy()
|
| 73 |
+
else:
|
| 74 |
+
np_latent = source_latent.squeeze(0).cpu().detach().numpy()
|
| 75 |
+
|
| 76 |
+
|
| 77 |
num_alphas = min(10, 30 // len(target_latents))
|
| 78 |
|
| 79 |
alphas = np.linspace(0, 1, num=num_alphas)
|
| 80 |
|
| 81 |
+
latents = interpolate_with_target_latents(np_latent, target_latents, alphas)
|
| 82 |
|
| 83 |
segments = len(g_ema_list) - 1
|
| 84 |
+
|
| 85 |
if segments:
|
| 86 |
segment_length = len(latents) / segments
|
| 87 |
|
|
|
|
| 103 |
src_pars[k].data.copy_(mix_pars[segment_id][k] * (1 - mix_alpha) + mix_pars[segment_id + 1][k] * mix_alpha)
|
| 104 |
|
| 105 |
if idx == 0 or segments or latent is not latents[idx - 1]:
|
| 106 |
+
latent_tensor = torch.from_numpy(latent).float().to(device)
|
| 107 |
|
| 108 |
with torch.no_grad():
|
| 109 |
+
if code_is_s:
|
| 110 |
+
latent_for_gen = style_tensor_to_style_dict(latent_tensor, g_ema)
|
| 111 |
+
img, _ = g_ema(latent_for_gen, input_is_s_code=True, input_is_latent=True, truncation=1, randomize_noise=False)
|
| 112 |
+
else:
|
| 113 |
+
img, _ = g_ema([latent_tensor], input_is_latent=True, truncation=1, randomize_noise=False)
|
| 114 |
|
| 115 |
utils.save_image(img, f"{output_dir}/{str(idx).zfill(3)}.jpg", nrow=1, normalize=True, scale_each=True, range=(-1, 1))
|
| 116 |
|