Spaces:

bytedance-research
/

UNO-FLUX

Running on Zero

App Files Files Community

wuwenxu.01 commited on Apr 8

Commit

c62efeb

1 Parent(s): 9d31e57

fix: remove unused parameters

Browse files

Files changed (19) hide show

app.py +19 -17
assets/examples/3one2one/config.json +0 -8
assets/examples/3one2one/ref1.png +0 -3
assets/examples/3one2one/result.png +0 -3
assets/examples/{5two2one → 3two2one}/config.json +1 -1
assets/examples/{5two2one → 3two2one}/ref1.png +0 -0
assets/examples/{5two2one → 3two2one}/ref2.png +0 -0
assets/examples/{5two2one → 3two2one}/result.png +0 -0
assets/examples/4two2one/config.json +1 -1
assets/examples/{6many2one → 5many2one}/config.json +0 -0
assets/examples/{6many2one → 5many2one}/ref1.png +0 -0
assets/examples/{6many2one → 5many2one}/ref2.png +0 -0
assets/examples/{6many2one → 5many2one}/ref3.png +0 -0
assets/examples/{6many2one → 5many2one}/result.png +0 -0
assets/examples/{7t2i → 6t2i}/config.json +0 -0
assets/examples/{7t2i → 6t2i}/result.png +0 -0
uno/flux/pipeline.py +2 -22
uno/flux/sampling.py +0 -19
uno/flux/util.py +9 -3

app.py CHANGED Viewed

@@ -44,7 +44,6 @@ def get_examples(examples_dir: str = "assets/examples") -> list:
                 example_list.append(None)
         example_list.append(example_dict["seed"])
-        example_list.append(example_dict["ref_long_side"])
         ans.append(example_list)
     return ans
@@ -58,23 +57,27 @@ def create_demo(
     pipeline = UNOPipeline(model_type, device, offload, only_lora=True, lora_rank=512)
     pipeline.gradio_generate = spaces.GPU(duratioin=120)(pipeline.gradio_generate)
     with gr.Blocks() as demo:
         gr.Markdown(f"# UNO by UNO team")
         with gr.Row():
             with gr.Column():
                 prompt = gr.Textbox(label="Prompt", value="handsome woman in the city")
                 with gr.Row():
-                    image_prompt1 = gr.Image(label="ref img1", visible=True, interactive=True, type="pil")
-                    image_prompt2 = gr.Image(label="ref img2", visible=True, interactive=True, type="pil")
-                    image_prompt3 = gr.Image(label="ref img3", visible=True, interactive=True, type="pil")
-                    image_prompt4 = gr.Image(label="ref img4", visible=True, interactive=True, type="pil")
-                with gr.Row():
-                    with gr.Column():
-                        ref_long_side = gr.Slider(128, 512, 512, step=16, label="Long side of Ref Images")
-                    with gr.Column():
-                        gr.Markdown("📌 **The recommended ref scale** is related to the ref img number.\n")
-                        gr.Markdown("   1->512 / 2,3,4->320")
                 with gr.Row():
                     with gr.Column():
@@ -87,7 +90,7 @@ def create_demo(
                             " and the higher size gives a better visual effect but is less stable"
                         )
-                with gr.Accordion("Generation Options", open=False):
                     with gr.Row():
                         num_steps = gr.Slider(1, 50, 25, step=1, label="Number of steps")
                         guidance = gr.Slider(1.0, 5.0, 4.0, step=0.1, label="Guidance", interactive=True)
@@ -102,7 +105,7 @@ def create_demo(
             inputs = [
                 prompt, width, height, guidance, num_steps,
-                seed, ref_long_side, image_prompt1, image_prompt2, image_prompt3, image_prompt4
             ]
             generate_btn.click(
                 fn=pipeline.gradio_generate,
@@ -118,11 +121,10 @@ def create_demo(
             inputs=[
                 example_text, prompt,
                 image_prompt1, image_prompt2, image_prompt3, image_prompt4,
-                seed, ref_long_side
             ],
         )
     return demo
 if __name__ == "__main__":
@@ -145,4 +147,4 @@ if __name__ == "__main__":
     args = args_tuple[0]
     demo = create_demo(args.name, args.device, args.offload)
-    demo.launch(server_port=args.port)

                 example_list.append(None)
         example_list.append(example_dict["seed"])
         ans.append(example_list)
     return ans
     pipeline = UNOPipeline(model_type, device, offload, only_lora=True, lora_rank=512)
     pipeline.gradio_generate = spaces.GPU(duratioin=120)(pipeline.gradio_generate)
+    badges_text = r"""
+    <div style="text-align: center; display: flex; justify-content: left; gap: 5px;">
+    <a href="https://bytedance.github.io/UNO/"><img alt="Build" src="https://img.shields.io/badge/Project%20Page-UNO-yellow"></a>
+    <a href="https://arxiv.org/abs/2504.02160"><img alt="Build" src="https://img.shields.io/badge/arXiv%20paper-UNO-b31b1b.svg"></a>
+    <a href="https://huggingface.co/bytedance-research/UNO"><img src="https://img.shields.io/static/v1?label=%F0%9F%A4%97%20Hugging%20Face&message=Model&color=orange"></a>
+    <a href="https://huggingface.co/spaces/bytedance-research/UNO-FLUX"><img src="https://img.shields.io/static/v1?label=%F0%9F%A4%97%20Hugging%20Face&message=demo&color=orange"></a>
+    </div>
+    """.strip()
     with gr.Blocks() as demo:
         gr.Markdown(f"# UNO by UNO team")
+        gr.Markdown(badges_text)
         with gr.Row():
             with gr.Column():
                 prompt = gr.Textbox(label="Prompt", value="handsome woman in the city")
                 with gr.Row():
+                    image_prompt1 = gr.Image(label="Ref Img1", visible=True, interactive=True, type="pil")
+                    image_prompt2 = gr.Image(label="Ref Img2", visible=True, interactive=True, type="pil")
+                    image_prompt3 = gr.Image(label="Ref Img3", visible=True, interactive=True, type="pil")
+                    image_prompt4 = gr.Image(label="Ref img4", visible=True, interactive=True, type="pil")
                 with gr.Row():
                     with gr.Column():
                             " and the higher size gives a better visual effect but is less stable"
                         )
+                with gr.Accordion("Advanced Options", open=False):
                     with gr.Row():
                         num_steps = gr.Slider(1, 50, 25, step=1, label="Number of steps")
                         guidance = gr.Slider(1.0, 5.0, 4.0, step=0.1, label="Guidance", interactive=True)
             inputs = [
                 prompt, width, height, guidance, num_steps,
+                seed, image_prompt1, image_prompt2, image_prompt3, image_prompt4
             ]
             generate_btn.click(
                 fn=pipeline.gradio_generate,
             inputs=[
                 example_text, prompt,
                 image_prompt1, image_prompt2, image_prompt3, image_prompt4,
+                seed, output_image
             ],
         )
     return demo
 if __name__ == "__main__":
     args = args_tuple[0]
     demo = create_demo(args.name, args.device, args.offload)
+    demo.launch(server_port=args.port)

assets/examples/3one2one/config.json DELETED Viewed

@@ -1,8 +0,0 @@
-{
-    "prompt": "3d cartoon style, a woman.",
-    "seed": 2,
-    "ref_long_side": 512,
-    "useage": "one2one",
-    "image_ref1": "./ref1.png",
-    "image_result": "./result.png"
-}

assets/examples/3one2one/ref1.png DELETED Viewed

Git LFS Details

SHA256: 434929ca5eeb1daf036bfff7c0d4297ccb7017967bd60141e0287c409203e0ae
Pointer size: 131 Bytes
Size of remote file: 574 kB

assets/examples/3one2one/result.png DELETED Viewed

Git LFS Details

SHA256: dc87fa4fa14fb69cb37abb65775525cec3bfb90f9d9c072ee5cbe5adaf4dd146
Pointer size: 131 Bytes
Size of remote file: 303 kB

assets/examples/{5two2one → 3two2one}/config.json RENAMED Viewed

@@ -1,6 +1,6 @@
 {
     "prompt": "The figurine is in the crystal ball",
-    "seed": 1,
     "ref_long_side": 320,
     "useage": "two2one",
     "image_ref1": "./ref1.png",

 {
     "prompt": "The figurine is in the crystal ball",
+    "seed": 0,
     "ref_long_side": 320,
     "useage": "two2one",
     "image_ref1": "./ref1.png",

assets/examples/{5two2one → 3two2one}/ref1.png RENAMED Viewed

File without changes

assets/examples/{5two2one → 3two2one}/ref2.png RENAMED Viewed

File without changes

assets/examples/{5two2one → 3two2one}/result.png RENAMED Viewed

File without changes

assets/examples/4two2one/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "prompt": "The logo is printed on the cup",
-    "seed": 0,
     "ref_long_side": 320,
     "useage": "two2one",
     "image_ref1": "./ref1.png",

 {
     "prompt": "The logo is printed on the cup",
+    "seed": 61733557,
     "ref_long_side": 320,
     "useage": "two2one",
     "image_ref1": "./ref1.png",

assets/examples/{6many2one → 5many2one}/config.json RENAMED Viewed

File without changes

assets/examples/{6many2one → 5many2one}/ref1.png RENAMED Viewed

File without changes

assets/examples/{6many2one → 5many2one}/ref2.png RENAMED Viewed

File without changes

assets/examples/{6many2one → 5many2one}/ref3.png RENAMED Viewed

File without changes

assets/examples/{6many2one → 5many2one}/result.png RENAMED Viewed

File without changes

assets/examples/{7t2i → 6t2i}/config.json RENAMED Viewed

File without changes

assets/examples/{7t2i → 6t2i}/result.png RENAMED Viewed

File without changes

uno/flux/pipeline.py CHANGED Viewed

@@ -27,7 +27,7 @@ from uno.flux.modules.layers import (
     SingleStreamBlockLoraProcessor,
     SingleStreamBlockProcessor,
 )
-from uno.flux.sampling import denoise, get_noise, get_schedule, prepare, prepare_multi_ip, unpack
 from uno.flux.util import (
     get_lora_rank,
     load_ae,
@@ -185,10 +185,6 @@ class UNOPipeline:
         guidance: float = 4,
         num_steps: int = 50,
         seed: int = 123456789,
-        true_gs: float = 3,
-        neg_prompt: str = '',
-        neg_image_prompt: Image = None,
-        timestep_to_start_cfg: int = 0,
         **kwargs
     ):
         width = 16 * (width // 16)
@@ -201,9 +197,6 @@ class UNOPipeline:
             guidance,
             num_steps,
             seed,
-            timestep_to_start_cfg=timestep_to_start_cfg,
-            true_gs=true_gs,
-            neg_prompt=neg_prompt,
             **kwargs
         )
@@ -216,7 +209,6 @@ class UNOPipeline:
         guidance: float,
         num_steps: int,
         seed: int,
-        ref_long_side: int,
         image_prompt1: Image.Image,
         image_prompt2: Image.Image,
         image_prompt3: Image.Image,
@@ -224,6 +216,7 @@ class UNOPipeline:
     ):
         ref_imgs = [image_prompt1, image_prompt2, image_prompt3, image_prompt4]
         ref_imgs = [img for img in ref_imgs if isinstance(img, Image.Image)]
         ref_imgs = [preprocess_ref(img, ref_long_side) for img in ref_imgs]
         seed = seed if seed != -1 else torch.randint(0, 10 ** 8, (1,)).item()
@@ -250,9 +243,6 @@ class UNOPipeline:
         guidance: float,
         num_steps: int,
         seed: int,
-        timestep_to_start_cfg: int = 1e5,  # TODO 没用，删除
-        true_gs: float = 3.5,
-        neg_prompt: str = "",
         ref_imgs: list[Image.Image] | None = None,
         pe: Literal['d', 'h', 'w', 'o'] = 'd',
     ):
@@ -283,11 +273,6 @@ class UNOPipeline:
             img=x,
             prompt=prompt, ref_imgs=x_1_refs, pe=pe
         )
-        neg_inp_cond = prepare_multi_ip(
-            t5=self.t5, clip=self.clip,
-            img=x,
-            prompt=neg_prompt, ref_imgs=x_1_refs, pe=pe
-        )
         if self.offload:
             self.offload_model_to_cpu(self.t5, self.clip)
@@ -298,11 +283,6 @@ class UNOPipeline:
             **inp_cond,
             timesteps=timesteps,
             guidance=guidance,
-            timestep_to_start_cfg=timestep_to_start_cfg,
-            neg_txt=neg_inp_cond['txt'],
-            neg_txt_ids=neg_inp_cond['txt_ids'],
-            neg_vec=neg_inp_cond['vec'],
-            true_gs=true_gs,
         )
         if self.offload:

     SingleStreamBlockLoraProcessor,
     SingleStreamBlockProcessor,
 )
+from uno.flux.sampling import denoise, get_noise, get_schedule, prepare_multi_ip, unpack
 from uno.flux.util import (
     get_lora_rank,
     load_ae,
         guidance: float = 4,
         num_steps: int = 50,
         seed: int = 123456789,
         **kwargs
     ):
         width = 16 * (width // 16)
             guidance,
             num_steps,
             seed,
             **kwargs
         )
         guidance: float,
         num_steps: int,
         seed: int,
         image_prompt1: Image.Image,
         image_prompt2: Image.Image,
         image_prompt3: Image.Image,
     ):
         ref_imgs = [image_prompt1, image_prompt2, image_prompt3, image_prompt4]
         ref_imgs = [img for img in ref_imgs if isinstance(img, Image.Image)]
+        ref_long_side = 512 if len(ref_imgs) <= 1 else 320
         ref_imgs = [preprocess_ref(img, ref_long_side) for img in ref_imgs]
         seed = seed if seed != -1 else torch.randint(0, 10 ** 8, (1,)).item()
         guidance: float,
         num_steps: int,
         seed: int,
         ref_imgs: list[Image.Image] | None = None,
         pe: Literal['d', 'h', 'w', 'o'] = 'd',
     ):
             img=x,
             prompt=prompt, ref_imgs=x_1_refs, pe=pe
         )
         if self.offload:
             self.offload_model_to_cpu(self.t5, self.clip)
             **inp_cond,
             timesteps=timesteps,
             guidance=guidance,
         )
         if self.offload:

uno/flux/sampling.py CHANGED Viewed

@@ -215,14 +215,9 @@ def denoise(
     txt: Tensor,
     txt_ids: Tensor,
     vec: Tensor,
-    neg_txt: Tensor,
-    neg_txt_ids: Tensor,
-    neg_vec: Tensor,
     # sampling parameters
     timesteps: list[float],
     guidance: float = 4.0,
-    true_gs = 1,
-    timestep_to_start_cfg=0,
     ref_img: Tensor=None,
     ref_img_ids: Tensor=None,
 ):
@@ -241,20 +236,6 @@ def denoise(
             timesteps=t_vec,
             guidance=guidance_vec
         )
-        if i >= timestep_to_start_cfg:
-            # not test
-            neg_pred = model(
-                img=img,
-                img_ids=img_ids,
-                ref_img=ref_img, # TODO: neg img embedding
-                ref_img_ids=ref_img_ids,
-                txt=neg_txt,
-                txt_ids=neg_txt_ids,
-                y=neg_vec,
-                timesteps=t_vec,
-                guidance=guidance_vec,
-            )
-            pred = neg_pred + true_gs * (pred - neg_pred)
         img = img + (t_prev - t_curr) * pred
         i += 1
     return img

     txt: Tensor,
     txt_ids: Tensor,
     vec: Tensor,
     # sampling parameters
     timesteps: list[float],
     guidance: float = 4.0,
     ref_img: Tensor=None,
     ref_img_ids: Tensor=None,
 ):
             timesteps=t_vec,
             guidance=guidance_vec
         )
         img = img + (t_prev - t_curr) * pred
         i += 1
     return img

uno/flux/util.py CHANGED Viewed

@@ -271,7 +271,11 @@ def load_flow_model_only_lora(
         ckpt_path = hf_hub_download(configs[name].repo_id, configs[name].repo_flow.replace("sft", "safetensors"))
     if hf_download:
-        lora_ckpt_path = hf_hub_download("bytedance-research/UNO", "dit_lora.safetensors")
     else:
         lora_ckpt_path = os.environ.get("LORA", None)
@@ -362,10 +366,12 @@ def load_flow_model_quintized(name: str, device: str | torch.device = "cuda", hf
 def load_t5(device: str | torch.device = "cuda", max_length: int = 512) -> HFEmbedder:
     # max length 64, 128, 256 and 512 should work (if your sequence is short enough)
-    return HFEmbedder("xlabs-ai/xflux_text_encoders", max_length=max_length, torch_dtype=torch.bfloat16).to(device)
 def load_clip(device: str | torch.device = "cuda") -> HFEmbedder:
-    return HFEmbedder("openai/clip-vit-large-patch14", max_length=77, torch_dtype=torch.bfloat16).to(device)
 def load_ae(name: str, device: str | torch.device = "cuda", hf_download: bool = True) -> AutoEncoder:

         ckpt_path = hf_hub_download(configs[name].repo_id, configs[name].repo_flow.replace("sft", "safetensors"))
     if hf_download:
+        # lora_ckpt_path = hf_hub_download("bytedance-research/UNO", "dit_lora.safetensors")
+        try:
+            lora_ckpt_path = hf_hub_download("bytedance-research/UNO", "dit_lora.safetensors")
+        except:
+            lora_ckpt_path = os.environ.get("LORA", None)
     else:
         lora_ckpt_path = os.environ.get("LORA", None)
 def load_t5(device: str | torch.device = "cuda", max_length: int = 512) -> HFEmbedder:
     # max length 64, 128, 256 and 512 should work (if your sequence is short enough)
+    version = os.environ.get("T5", "xlabs-ai/xflux_text_encoders")
+    return HFEmbedder(version, max_length=max_length, torch_dtype=torch.bfloat16).to(device)
 def load_clip(device: str | torch.device = "cuda") -> HFEmbedder:
+    version = os.environ.get("CLIP", "openai/clip-vit-large-patch14")
+    return HFEmbedder(version, max_length=77, torch_dtype=torch.bfloat16).to(device)
 def load_ae(name: str, device: str | torch.device = "cuda", hf_download: bool = True) -> AutoEncoder: