File size: 3,379 Bytes
42dfc4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import sys
import gradio as gr
import torch
import random
import numpy as np
from PIL import Image

# Setup and model loading
os.chdir('/content')
!git clone -b totoro2 https://github.com/camenduru/ComfyUI /content/TotoroUI
os.chdir('/content/TotoroUI')

# Create requirements.txt if it doesn't exist
requirements_content = """torch
torchsde
einops
diffusers
accelerate
xformers==0.0.26.post1
gradio"""

with open("requirements.txt", "w") as f:
    f.write(requirements_content)

# Install dependencies from requirements.txt
!pip install -r requirements.txt

# Install aria2
!apt -y install -qq aria2

# Download model weights
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/adamo1139/stable-diffusion-3-medium-ungated/resolve/main/sd3_medium_incl_clips_t5xxlfp8.safetensors -d /content/TotoroUI/model -o sd3_medium_incl_clips_t5xxlfp8.safetensors

# Add TotoroUI to sys.path
sys.path.append('/content/TotoroUI')

# Import after adding to sys.path
import node_helpers
from totoro.sd import load_checkpoint_guess_config
import nodes

# Check for GPU availability and CUDA
use_cuda = torch.cuda.is_available()

model_patcher, clip, vae, clipvision = load_checkpoint_guess_config(
    "/content/TotoroUI/model/sd3_medium_incl_clips_t5xxlfp8.safetensors", 
    output_vae=True, output_clip=True, embedding_directory=None
)

def zero_out(conditioning):
    c = []
    for t in conditioning:
        d = t[1].copy()
        if "pooled_output" in d:
            d["pooled_output"] = torch.zeros_like(d["pooled_output"])
        n = [torch.zeros_like(t[0]), d]
        c.append(n)
    return (c, )

def generate_image(prompt, negative_prompt, steps):
    with torch.inference_mode():
        latent = {"samples": torch.ones([1, 16, 1024 // 8, 1024 // 8]) * 0.0609}
        
        cond, pooled = clip.encode_from_tokens(clip.tokenize(prompt), return_pooled=True)
        cond = [[cond, {"pooled_output": pooled}]]
        
        n_cond, n_pooled = clip.encode_from_tokens(clip.tokenize(negative_prompt), return_pooled=True)
        n_cond = [[n_cond, {"pooled_output": n_pooled}]]

        n_cond1 = node_helpers.conditioning_set_values(n_cond, {"start_percent": 0, "end_percent": 0.1})
        n_cond2 = zero_out(n_cond)
        n_cond2 = node_helpers.conditioning_set_values(n_cond2[0], {"start_percent": 0.1, "end_percent": 1.0})
        n_cond = n_cond1 + n_cond2

        seed = random.randint(0, 18446744073709551615)
        
        sample = nodes.common_ksampler(
            model=model_patcher, 
            seed=seed, 
            steps=steps, 
            cfg=4.5, 
            sampler_name="dpmpp_2m", 
            scheduler="sgm_uniform", 
            positive=cond, 
            negative=n_cond,
            latent=latent, 
            denoise=1
        )
        
        sample = sample[0]["samples"].to(torch.float16)
        
        if use_cuda:
            vae.first_stage_model.cuda()
        decoded = vae.decode_tiled(sample).detach()

    return Image.fromarray(np.array(decoded*255, dtype=np.uint8)[0])

# Gradio interface
interface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.Textbox(label="Prompt"),
        gr.Textbox(label="Negative Prompt"),
        gr.Slider(label="Steps", minimum=1, maximum=200, step=1, default=28)
    ],
    outputs=gr.Image(label="Generated Image")
)

interface.launch()