Spaces:

Shuang59
/

Composable-Diffusion

Runtime error

App Files Files Community

Shuang59 commited on Oct 10, 2022

Commit

e6ffe8d

1 Parent(s): 471b0b2

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -16

app.py CHANGED Viewed

@@ -29,9 +29,12 @@ print(device)
 # init stable diffusion model
 pipe = ComposableStableDiffusionPipeline.from_pretrained(
     "CompVis/stable-diffusion-v1-4",
-    use_auth_token=st.secrets["USER_TOKEN"]
 ).to(device)
 # create model for CLEVR Objects
 clevr_options = model_and_diffusion_defaults_for_clevr()
@@ -113,48 +116,54 @@ def compose_clevr_objects(prompt, guidance_scale, steps):
     return out_img
-def stable_diffusion_compose(prompt, scale, steps):
     with autocast('cpu' if not th.cuda.is_available() else 'cuda'):
-        image = pipe(prompt, guidance_scale=scale, num_inference_steps=steps)["sample"][0]
         return image
-def compose(prompt, version, guidance_scale, steps):
     try:
         with th.no_grad():
             if version == 'Stable_Diffusion_1v_4':
-                return stable_diffusion_compose(prompt, guidance_scale, steps)
             else:
                 return compose_clevr_objects(prompt, guidance_scale, steps)
     except Exception as e:
         print(e)
         return None
-examples_1 = 'a camel | a forest'
 examples_2 = 'A blue sky  | A mountain in the horizon | Cherry Blossoms in front of the mountain'
 examples_3 = '0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5'
-examples_4 = 'a blue house | a desert'
 examples_5 = 'a white church | lightning in the background'
 examples_6 = 'a camel | arctic'
 examples_7 = 'A lake | A mountain  | Cherry Blossoms next to the lake'
 examples = [
-            [examples_7, 'Stable_Diffusion_1v_4', 15, 50],
-            [examples_5, 'Stable_Diffusion_1v_4', 15, 50],
-            [examples_4, 'Stable_Diffusion_1v_4', 15, 50],
-            [examples_6, 'Stable_Diffusion_1v_4', 15, 50],
-            [examples_3, 'CLEVR Objects', 10, 100]
 ]
 title = 'Compositional Visual Generation with Composable Diffusion Models'
-description = '<p>Demo for Composable Diffusion<ul><li>~30s per Stable-Diffusion example</li><li>~10s per CLEVR Object example</li>(<b>Note</b>: time is varied depending on what gpu is used.)</ul></p><p>See more information from our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</p><ul><li>One version is based on the released <a href="https://github.com/openai/glide-text2im">GLIDE</a> and <a href="https://github.com/CompVis/stable-diffusion/">Stable Diffusion</a> for composing natural language description.</li><li>Another is based on our pre-trained CLEVR Object Model for composing objects. <br>(<b>Note</b>: We recommend using <b><i>x</i></b> in range <b><i>[0.1, 0.9]</i></b> and <b><i>y</i></b> in range <b><i>[0.25, 0.7]</i></b>, since the training dataset labels are in given ranges.)</li></ul><p>When composing  multiple sentences, use `|` as the delimiter, see given examples below.</p><p><b>Note: When using Stable Diffusion, black images will be returned if the given prompt is detected as problematic. For composing GLIDE model, we recommend using the Colab demo in our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</b></p>'
 iface = gr.Interface(compose,
                      inputs=[
-                         gr.Textbox(label='prompt', value='a white church | lightning in the background'),
                          gr.Radio(['Stable_Diffusion_1v_4', 'CLEVR Objects'], type="value", label='version', value='Stable_Diffusion_1v_4'),
                          gr.Slider(2, 30, value=15),
-                         gr.Slider(10, 200, value=50)
                      ],
                      outputs='image', cache_examples=False,
                      title=title, description=description, examples=examples)

 # init stable diffusion model
 pipe = ComposableStableDiffusionPipeline.from_pretrained(
     "CompVis/stable-diffusion-v1-4",
+    use_auth_token='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'
 ).to(device)
+def dummy(images, **kwargs):
+    return images, False
+pipe.safety_checker = dummy
 # create model for CLEVR Objects
 clevr_options = model_and_diffusion_defaults_for_clevr()
     return out_img
+def stable_diffusion_compose(prompt, scale, steps, weights, seed):
+    generator = th.Generator("cuda").manual_seed(int(seed))
     with autocast('cpu' if not th.cuda.is_available() else 'cuda'):
+        image = pipe(prompt, guidance_scale=scale, num_inference_steps=steps,
+                     weights=weights, generator=generator)["sample"][0]
+        image.save(f'{"_".join(prompt.split())}.png')
         return image
+def compose(prompt, weights, version, guidance_scale, steps, seed):
     try:
         with th.no_grad():
             if version == 'Stable_Diffusion_1v_4':
+                return stable_diffusion_compose(prompt, guidance_scale, steps, weights, seed)
             else:
                 return compose_clevr_objects(prompt, guidance_scale, steps)
     except Exception as e:
         print(e)
         return None
+examples_1 = "A castle in a forest | grainy, fog"
 examples_2 = 'A blue sky  | A mountain in the horizon | Cherry Blossoms in front of the mountain'
 examples_3 = '0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5'
+examples_4 = 'a photo of Obama | a photo of Biden'
 examples_5 = 'a white church | lightning in the background'
 examples_6 = 'a camel | arctic'
 examples_7 = 'A lake | A mountain  | Cherry Blossoms next to the lake'
 examples = [
+            [examples_1, "1 | -1", 'Stable_Diffusion_1v_4', 15, 50, 0],
+            [examples_4, "1 | 1", 'Stable_Diffusion_1v_4', 15, 50, 0],
+            [examples_7, "1 | 1 | 1", 'Stable_Diffusion_1v_4', 15, 50, 0],
+            [examples_5, "1 | 1", 'Stable_Diffusion_1v_4', 15, 50, 0],
+            [examples_6, "1 | 1", 'Stable_Diffusion_1v_4', 15, 50, 0],
+            [examples_6, "1 | -1", 'Stable_Diffusion_1v_4', 15, 50, 0],
+            [examples_3, "1 | 1 | 1 | 1 | 1", 'CLEVR Objects', 10, 100, 0]
 ]
 title = 'Compositional Visual Generation with Composable Diffusion Models'
+description = '<p>Our conjucntion and negation operators are also added into stable diffusion webui! (<a href="https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Negative-prompt">Negation</a> and <a href="https://github.com/AUTOMATIC1111/stable-diffusion-webui/commit/c26732fbee2a57e621ac22bf70decf7496daa4cd">Conjunction</a>)</p></p><p>See more information from our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</p><ul><li>One version is based on the released <a href="https://github.com/openai/glide-text2im">GLIDE</a> and <a href="https://github.com/CompVis/stable-diffusion/">Stable Diffusion</a> for composing natural language description.</li><li>Another is based on our pre-trained CLEVR Object Model for composing objects. <br>(<b>Note</b>: We recommend using <b><i>x</i></b> in range <b><i>[0.1, 0.9]</i></b> and <b><i>y</i></b> in range <b><i>[0.25, 0.7]</i></b>, since the training dataset labels are in given ranges.)</li></ul><p>When composing  multiple sentences, use `|` as the delimiter, see given examples below.</p><p>You can also specify the weight of each text by using `|` as the delimiter. When the weight is negative, it will use Negation Operator (NOT). Otherwise it will use Conjucntion operator (AND).</p><p><b>Only Conjunction operator is enabled for CLEVR Object.</b></p><p><b>Note: When using Stable Diffusion, black images will be returned if the given prompt is detected as problematic. For composing GLIDE model, we recommend using the Colab demo in our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</b></p>'
 iface = gr.Interface(compose,
                      inputs=[
+                         gr.Textbox(label='prompt', value='a photo of Obama | a photo of Biden'),
+                         gr.Textbox(label='weights', value='1 | 1'),
                          gr.Radio(['Stable_Diffusion_1v_4', 'CLEVR Objects'], type="value", label='version', value='Stable_Diffusion_1v_4'),
                          gr.Slider(2, 30, value=15),
+                         gr.Slider(10, 200, value=50),
+                         gr.Number(0)
                      ],
                      outputs='image', cache_examples=False,
                      title=title, description=description, examples=examples)