Spaces:

Shizuku-AI
/

StreamDiffusion-realtime-txt2img

Runtime error

App Files Files Community

radames commited on Dec 19, 2023

Commit

a0a2ed9

1 Parent(s): 8d841f8

Upload 21 files

Browse files

Files changed (7) hide show

server/config.py +6 -3
server/main.py +6 -4
server/requirements.txt +1 -3
server/wrapper.py +467 -94
start.sh +3 -1
view/.DS_Store +0 -0
view/src/App.tsx +7 -4

server/config.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dataclasses import dataclass, field
-from typing import List
 import torch
 import os
@@ -24,8 +24,9 @@ class Config:
     ####################################################################
     # Model configuration
     ####################################################################
     # SD1.x variant model
-    model_id: str = "SimianLuo/LCM_Dreamshaper_v7"
     # LCM-LORA model
     lcm_lora_id: str = "latent-consistency/lcm-lora-sdv1-5"
     # TinyVAE model
@@ -34,6 +35,8 @@ class Config:
     device: torch.device = torch.device("cuda")
     # Data type
     dtype: torch.dtype = torch.float16
     ####################################################################
     # Inference configuration
@@ -42,4 +45,4 @@ class Config:
     t_index_list: List[int] = field(default_factory=lambda: [0, 16, 32, 45])
     # Number of warmup steps
     warmup: int = 10
-    safety_checker: bool = SAFETY_CHECKER

 from dataclasses import dataclass, field
+from typing import List, Literal
 import torch
 import os
     ####################################################################
     # Model configuration
     ####################################################################
+    mode: Literal["txt2img", "img2img"] = "txt2img"
     # SD1.x variant model
+    model_id: str = "KBlueLeaf/kohaku-v2.1"
     # LCM-LORA model
     lcm_lora_id: str = "latent-consistency/lcm-lora-sdv1-5"
     # TinyVAE model
     device: torch.device = torch.device("cuda")
     # Data type
     dtype: torch.dtype = torch.float16
+    # acceleration
+    acceleration: Literal["none", "xformers", "sfast", "tensorrt"] = "xformers"
     ####################################################################
     # Inference configuration
     t_index_list: List[int] = field(default_factory=lambda: [0, 16, 32, 45])
     # Number of warmup steps
     warmup: int = 10
+    use_safety_checker: bool = SAFETY_CHECKER

server/main.py CHANGED Viewed

@@ -55,14 +55,16 @@ class Api:
         """
         self.config = config
         self.stream_diffusion = StreamDiffusionWrapper(
             model_id=config.model_id,
             lcm_lora_id=config.lcm_lora_id,
             vae_id=config.vae_id,
             device=config.device,
             dtype=config.dtype,
             t_index_list=config.t_index_list,
             warmup=config.warmup,
-            safety_checker=config.safety_checker,
         )
         self.app = FastAPI()
         self.app.add_api_route(
@@ -85,8 +87,6 @@ class Api:
         self._predict_lock = asyncio.Lock()
         self._update_prompt_lock = asyncio.Lock()
-        self.last_prompt: str = ""
     async def _predict(self, inp: PredictInputModel) -> PredictResponseModel:
         """
         Predict an image and return.
@@ -102,7 +102,9 @@ class Api:
             The prediction result.
         """
         async with self._predict_lock:
-            return PredictResponseModel(base64_image=self._pil_to_base64(self.stream_diffusion(inp.prompt)))
     def _pil_to_base64(self, image: Image.Image, format: str = "JPEG") -> bytes:
         """

         """
         self.config = config
         self.stream_diffusion = StreamDiffusionWrapper(
+            mode=config.mode,
             model_id=config.model_id,
             lcm_lora_id=config.lcm_lora_id,
             vae_id=config.vae_id,
             device=config.device,
             dtype=config.dtype,
+            acceleration=config.acceleration,
             t_index_list=config.t_index_list,
             warmup=config.warmup,
+            use_safety_checker=config.use_safety_checker,
         )
         self.app = FastAPI()
         self.app.add_api_route(
         self._predict_lock = asyncio.Lock()
         self._update_prompt_lock = asyncio.Lock()
     async def _predict(self, inp: PredictInputModel) -> PredictResponseModel:
         """
         Predict an image and return.
             The prediction result.
         """
         async with self._predict_lock:
+            return PredictResponseModel(
+                base64_image=self._pil_to_base64(self.stream_diffusion(prompt=inp.prompt))
+            )
     def _pil_to_base64(self, image: Image.Image, format: str = "JPEG") -> bytes:
         """

server/requirements.txt CHANGED Viewed

@@ -2,7 +2,6 @@ xformers
 uvicorn[standard]==0.24.0.post1
 fastapi==0.104
 accelerate
-git+https://github.com/huggingface/diffusers@781775ea56160a6dea3d53fd5005d0d7fca5f10a
 # git+https://github.com/cumulo-autumn/StreamDiffusion.git@main#egg=stream-diffusion
 --extra-index-url https://download.pytorch.org/whl/cu121
 torch
@@ -10,5 +9,4 @@ torchvision
 torchaudio
 triton
 # https://github.com/chengzeyi/stable-fast --index-url https://download.pytorch.org/whl/cu121
-# https://github.com/chengzeyi/stable-fast/releases/download/v0.0.14/stable_fast-0.0.14+torch210cu121-cp310-cp310-manylinux2014_x86_64.whl
-https://github.com/chengzeyi/stable-fast/releases/download/v0.0.15.post1/stable_fast-0.0.15.post1+torch211cu121-cp310-cp310-manylinux2014_x86_64.whl

 uvicorn[standard]==0.24.0.post1
 fastapi==0.104
 accelerate
 # git+https://github.com/cumulo-autumn/StreamDiffusion.git@main#egg=stream-diffusion
 --extra-index-url https://download.pytorch.org/whl/cu121
 torch
 torchaudio
 triton
 # https://github.com/chengzeyi/stable-fast --index-url https://download.pytorch.org/whl/cu121
+https://github.com/chengzeyi/stable-fast/releases/download/v0.0.14/stable_fast-0.0.14+torch210cu121-cp310-cp310-manylinux2014_x86_64.whl

server/wrapper.py CHANGED Viewed

@@ -1,156 +1,529 @@
-import io
 import os
-from typing import List
-import PIL.Image
-import requests
 import torch
 from diffusers import AutoencoderTiny, StableDiffusionPipeline
 from streamdiffusion import StreamDiffusion
 from streamdiffusion.image_utils import postprocess_image
-def download_image(url: str):
-    response = requests.get(url)
-    image = PIL.Image.open(io.BytesIO(response.content))
-    return image
 class StreamDiffusionWrapper:
     def __init__(
         self,
         model_id: str,
-        lcm_lora_id: str,
-        vae_id: str,
-        device: str,
-        dtype: str,
         t_index_list: List[int],
-        warmup: int,
-        safety_checker: bool,
     ):
         self.device = device
         self.dtype = dtype
-        self.prompt = ""
-        self.batch_size = len(t_index_list)
         self.stream = self._load_model(
             model_id=model_id,
             lcm_lora_id=lcm_lora_id,
             vae_id=vae_id,
             t_index_list=t_index_list,
             warmup=warmup,
         )
-        self.safety_checker = None
-        if safety_checker:
-            from transformers import CLIPFeatureExtractor
-            from diffusers.pipelines.stable_diffusion.safety_checker import (
-                StableDiffusionSafetyChecker,
             )
-            self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
-                "CompVis/stable-diffusion-safety-checker"
             ).to(self.device)
-            self.feature_extractor = CLIPFeatureExtractor.from_pretrained(
-                "openai/clip-vit-base-patch32"
             )
-            self.nsfw_fallback_img = PIL.Image.new("RGB", (512, 512), (0, 0, 0))
-        self.stream.prepare("")
     def _load_model(
         self,
         model_id: str,
-        lcm_lora_id: str,
-        vae_id: str,
         t_index_list: List[int],
-        warmup: int,
     ):
-        if os.path.exists(model_id):
-            pipe: StableDiffusionPipeline = StableDiffusionPipeline.from_single_file(
-                model_id
-            ).to(device=self.device, dtype=self.dtype)
-        else:
             pipe: StableDiffusionPipeline = StableDiffusionPipeline.from_pretrained(
                 model_id
             ).to(device=self.device, dtype=self.dtype)
         stream = StreamDiffusion(
             pipe=pipe,
             t_index_list=t_index_list,
             torch_dtype=self.dtype,
-            is_drawing=True,
-        )
-        stream.load_lcm_lora(lcm_lora_id)
-        stream.fuse_lora()
-        stream.vae = AutoencoderTiny.from_pretrained(vae_id).to(
-            device=pipe.device, dtype=pipe.dtype
         )
         try:
-            from streamdiffusion.acceleration.tensorrt import accelerate_with_tensorrt
-            stream = accelerate_with_tensorrt(
-                stream,
-                "engines",
-                max_batch_size=self.batch_size,
-                engine_build_options={"build_static_batch": False},
-            )
-            print("TensorRT acceleration enabled.")
-        except Exception:
-            print("TensorRT acceleration has failed. Trying to use Stable Fast.")
-            try:
                 from streamdiffusion.acceleration.sfast import (
                     accelerate_with_stable_fast,
                 )
                 stream = accelerate_with_stable_fast(stream)
                 print("StableFast acceleration enabled.")
-            except Exception:
-                print("StableFast acceleration has failed. Using normal mode.")
-                pass
         stream.prepare(
             "",
             num_inference_steps=50,
             generator=torch.manual_seed(2),
         )
-        # warmup
-        for _ in range(warmup):
-            start = torch.cuda.Event(enable_timing=True)
-            end = torch.cuda.Event(enable_timing=True)
-            start.record()
-            stream.txt2img()
-            end.record()
-            torch.cuda.synchronize()
         return stream
-    def __call__(self, prompt: str) -> PIL.Image.Image:
-        if self.prompt != prompt:
-            self.stream.update_prompt(prompt)
-            self.prompt = prompt
-            for i in range(self.batch_size):
-                x_output = self.stream.txt2img()
-        x_output = self.stream.txt2img()
-        image = postprocess_image(x_output, output_type="pil")[0]
-        if self.safety_checker:
-            safety_checker_input = self.feature_extractor(
-                image, return_tensors="pt"
-            ).to(self.device)
-            _, has_nsfw_concept = self.safety_checker(
-                images=x_output,
-                clip_input=safety_checker_input.pixel_values.to(self.dtype),
-            )
-            image = self.nsfw_fallback_img if has_nsfw_concept[0] else image
-        return image
-if __name__ == "__main__":
-    wrapper = StreamDiffusionWrapper(10, 10)
-    wrapper()

+import gc
 import os
+import traceback
+from typing import List, Literal, Optional, Union
+import numpy as np
 import torch
 from diffusers import AutoencoderTiny, StableDiffusionPipeline
+from PIL import Image
+from polygraphy import cuda
 from streamdiffusion import StreamDiffusion
 from streamdiffusion.image_utils import postprocess_image
+torch.set_grad_enabled(False)
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
 class StreamDiffusionWrapper:
     def __init__(
         self,
         model_id: str,
         t_index_list: List[int],
+        mode: Literal["img2img", "txt2img"] = "img2img",
+        output_type: Literal["pil", "pt", "np", "latent"] = "pil",
+        lcm_lora_id: Optional[str] = None,
+        vae_id: Optional[str] = None,
+        device: Literal["cpu", "cuda"] = "cuda",
+        dtype: torch.dtype = torch.float16,
+        frame_buffer_size: int = 1,
+        width: int = 512,
+        height: int = 512,
+        warmup: int = 10,
+        acceleration: Literal["none", "xformers", "sfast", "tensorrt"] = "xformers",
+        is_drawing: bool = True,
+        device_ids: Optional[List[int]] = None,
+        use_lcm_lora: bool = True,
+        use_tiny_vae: bool = True,
+        enable_similar_image_filter: bool = False,
+        similar_image_filter_threshold: float = 0.98,
+        use_denoising_batch: bool = True,
+        cfg_type: Literal["none", "full", "self", "initialize"] = "none",
+        use_safety_checker: bool = False,
     ):
+        if mode == "txt2img":
+            if cfg_type != "none":
+                raise ValueError(
+                    f"txt2img mode accepts only cfg_type = 'none', but got {cfg_type}"
+                )
+            if use_denoising_batch and frame_buffer_size > 1:
+                raise ValueError(
+                    "txt2img mode cannot use denoising batch with frame_buffer_size > 1."
+                )
+        if mode == "img2img":
+            if not use_denoising_batch:
+                raise NotImplementedError(
+                    "img2img mode must use denoising batch for now."
+                )
+        self.sd_turbo = "turbo" in model_id
         self.device = device
         self.dtype = dtype
+        self.width = width
+        self.height = height
+        self.mode = mode
+        self.output_type = output_type
+        self.frame_buffer_size = frame_buffer_size
+        self.batch_size = (
+            len(t_index_list) * frame_buffer_size
+            if use_denoising_batch
+            else frame_buffer_size
+        )
+        self.use_denoising_batch = use_denoising_batch
+        self.use_safety_checker = use_safety_checker
         self.stream = self._load_model(
             model_id=model_id,
             lcm_lora_id=lcm_lora_id,
             vae_id=vae_id,
             t_index_list=t_index_list,
+            acceleration=acceleration,
             warmup=warmup,
+            is_drawing=is_drawing,
+            use_lcm_lora=use_lcm_lora,
+            use_tiny_vae=use_tiny_vae,
+            cfg_type=cfg_type,
         )
+        if device_ids is not None:
+            self.stream.unet = torch.nn.DataParallel(
+                self.stream.unet, device_ids=device_ids
             )
+        if enable_similar_image_filter:
+            self.stream.enable_similar_image_filter(similar_image_filter_threshold)
+    def prepare(
+        self,
+        prompt: str,
+        negative_prompt: str = "",
+        num_inference_steps: int = 50,
+        guidance_scale: float = 1.2,
+        delta: float = 1.0,
+    ) -> None:
+        """
+        Prepares the model for inference.
+        Parameters
+        ----------
+        prompt : str
+            The prompt to generate images from.
+        num_inference_steps : int, optional
+            The number of inference steps to perform, by default 50.
+        """
+        self.stream.prepare(
+            prompt,
+            negative_prompt,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            delta=delta,
+        )
+    def __call__(
+        self,
+        image: Optional[Union[str, Image.Image, torch.Tensor]] = None,
+        prompt: Optional[str] = None,
+    ) -> Union[Image.Image, List[Image.Image]]:
+        """
+        Performs img2img or txt2img based on the mode.
+        Parameters
+        ----------
+        image : Optional[Union[str, Image.Image, torch.Tensor]]
+            The image to generate from.
+        prompt : Optional[str]
+            The prompt to generate images from.
+        Returns
+        -------
+        Union[Image.Image, List[Image.Image]]
+            The generated image.
+        """
+        if self.mode == "img2img":
+            return self.img2img(image)
+        else:
+            return self.txt2img(prompt)
+    def txt2img(
+        self, prompt: str
+    ) -> Union[Image.Image, List[Image.Image], torch.Tensor, np.ndarray]:
+        """
+        Performs txt2img.
+        Parameters
+        ----------
+        prompt : str
+            The prompt to generate images from.
+        Returns
+        -------
+        Union[Image.Image, List[Image.Image]]
+            The generated image.
+        """
+        self.stream.update_prompt(prompt)
+        if self.sd_turbo:
+            image_tensor = self.stream.txt2img_sd_turbo(self.batch_size)
+        else:
+            image_tensor = self.stream.txt2img(self.frame_buffer_size)
+        image = self.postprocess_image(image_tensor, output_type=self.output_type)
+        if self.use_safety_checker:
+            safety_checker_input = self.feature_extractor(
+                image, return_tensors="pt"
             ).to(self.device)
+            _, has_nsfw_concept = self.safety_checker(
+                images=image_tensor.to(self.dtype),
+                clip_input=safety_checker_input.pixel_values.to(self.dtype),
             )
+            image = self.nsfw_fallback_img if has_nsfw_concept[0] else image
+        return image
+    def img2img(
+        self, image: Union[str, Image.Image, torch.Tensor]
+    ) -> Union[Image.Image, List[Image.Image], torch.Tensor, np.ndarray]:
+        """
+        Performs img2img.
+        Parameters
+        ----------
+        image : Union[str, Image.Image, torch.Tensor]
+            The image to generate from.
+        Returns
+        -------
+        Image.Image
+            The generated image.
+        """
+        if isinstance(image, str) or isinstance(image, Image.Image):
+            image = self.preprocess_image(image)
+        image_tensor = self.stream(image)
+        return self.postprocess_image(image_tensor, output_type=self.output_type)
+    def preprocess_image(self, image: Union[str, Image.Image]) -> torch.Tensor:
+        """
+        Preprocesses the image.
+        Parameters
+        ----------
+        image : Union[str, Image.Image, torch.Tensor]
+            The image to preprocess.
+        Returns
+        -------
+        torch.Tensor
+            The preprocessed image.
+        """
+        if isinstance(image, str):
+            image = Image.open(image).convert("RGB").resize((self.width, self.height))
+        if isinstance(image, Image.Image):
+            image = image.convert("RGB").resize((self.width, self.height))
+        return self.stream.image_processor.preprocess(
+            image, self.height, self.width
+        ).to(device=self.device, dtype=self.dtype)
+    def postprocess_image(
+        self, image_tensor: torch.Tensor, output_type: str = "pil"
+    ) -> Union[Image.Image, List[Image.Image], torch.Tensor, np.ndarray]:
+        """
+        Postprocesses the image.
+        Parameters
+        ----------
+        image_tensor : torch.Tensor
+            The image tensor to postprocess.
+        Returns
+        -------
+        Union[Image.Image, List[Image.Image]]
+            The postprocessed image.
+        """
+        if self.frame_buffer_size > 1:
+            return postprocess_image(image_tensor.cpu(), output_type=output_type)
+        else:
+            return postprocess_image(image_tensor.cpu(), output_type=output_type)[0]
     def _load_model(
         self,
         model_id: str,
         t_index_list: List[int],
+        lcm_lora_id: Optional[str] = None,
+        vae_id: Optional[str] = None,
+        acceleration: Literal["none", "sfast", "tensorrt"] = "tensorrt",
+        is_drawing: bool = True,
+        warmup: int = 10,
+        use_lcm_lora: bool = True,
+        use_tiny_vae: bool = True,
+        cfg_type: Literal["none", "full", "self", "initialize"] = "self",
     ):
+        """
+        Loads the model.
+        This method does the following:
+        1. Loads the model from the model_id.
+        2. Loads and fuses the LCM-LoRA model from the lcm_lora_id if needed.
+        3. Loads the VAE model from the vae_id if needed.
+        4. Enables acceleration if needed.
+        5. Prepares the model for inference.
+        6. Warms up the model.
+        Parameters
+        ----------
+        model_id : str
+            The model id to load.
+        t_index_list : List[int]
+            The t_index_list to use for inference.
+        lcm_lora_id : Optional[str], optional
+            The lcm_lora_id to load, by default None.
+        vae_id : Optional[str], optional
+            The vae_id to load, by default None.
+        acceleration : Literal["none", "xfomers", "sfast", "tensorrt"], optional
+            The acceleration method to use, by default "tensorrt".
+        warmup : int, optional
+            The number of warmup steps to perform, by default 10.
+        is_drawing : bool, optional
+            Whether to draw the image or not, by default True.
+        use_lcm_lora : bool, optional
+            Whether to use LCM-LoRA or not, by default True.
+        use_tiny_vae : bool, optional
+            Whether to use TinyVAE or not, by default True.
+        cfg_type : Literal["none", "full", "self", "initialize"], optional
+            The cfg_type to use, by default "self".
+        """
+        try:  # Load from local directory
             pipe: StableDiffusionPipeline = StableDiffusionPipeline.from_pretrained(
+                model_id,
+            ).to(device=self.device, dtype=self.dtype)
+        except ValueError:  # Load from huggingface
+            pipe: StableDiffusionPipeline = StableDiffusionPipeline.from_single_file(
                 model_id
             ).to(device=self.device, dtype=self.dtype)
+        except Exception:  # No model found
+            traceback.print_exc()
+            print("Model load has failed. Doesn't exist.")
+            exit()
+        if self.use_safety_checker:
+            from transformers import CLIPFeatureExtractor
+            from diffusers.pipelines.stable_diffusion.safety_checker import (
+                StableDiffusionSafetyChecker,
+            )
+            self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
+                "CompVis/stable-diffusion-safety-checker"
+            ).to(pipe.device)
+            self.feature_extractor = CLIPFeatureExtractor.from_pretrained(
+                "openai/clip-vit-base-patch32"
+            )
+            self.nsfw_fallback_img = Image.new("RGB", (512, 512), (0, 0, 0))
         stream = StreamDiffusion(
             pipe=pipe,
             t_index_list=t_index_list,
             torch_dtype=self.dtype,
+            width=self.width,
+            height=self.height,
+            is_drawing=is_drawing,
+            frame_buffer_size=self.frame_buffer_size,
+            use_denoising_batch=self.use_denoising_batch,
+            cfg_type=cfg_type,
         )
+        if not self.sd_turbo:
+            if use_lcm_lora:
+                if lcm_lora_id is not None:
+                    stream.load_lcm_lora(
+                        pretrained_model_name_or_path_or_dict=lcm_lora_id
+                    )
+                else:
+                    stream.load_lcm_lora()
+                stream.fuse_lora()
+        if use_tiny_vae:
+            if vae_id is not None:
+                stream.vae = AutoencoderTiny.from_pretrained(vae_id).to(
+                    device=pipe.device, dtype=pipe.dtype
+                )
+            else:
+                stream.vae = AutoencoderTiny.from_pretrained("madebyollin/taesd").to(
+                    device=pipe.device, dtype=pipe.dtype
+                )
         try:
+            if acceleration == "xformers":
+                stream.pipe.enable_xformers_memory_efficient_attention()
+            if acceleration == "tensorrt":
+                from streamdiffusion.acceleration.tensorrt import (
+                    TorchVAEEncoder,
+                    compile_unet,
+                    compile_vae_decoder,
+                    compile_vae_encoder,
+                )
+                from streamdiffusion.acceleration.tensorrt.engine import (
+                    AutoencoderKLEngine,
+                    UNet2DConditionModelEngine,
+                )
+                from streamdiffusion.acceleration.tensorrt.models import (
+                    VAE,
+                    UNet,
+                    VAEEncoder,
+                )
+                def create_prefix(
+                    max_batch_size: int,
+                    min_batch_size: int,
+                ):
+                    return f"{model_id}--lcm_lora-{use_tiny_vae}--tiny_vae-{use_lcm_lora}--max_batch-{max_batch_size}--min_batch-{min_batch_size}--mode-{self.mode}"
+                engine_dir = os.path.join("engines")
+                unet_path = os.path.join(
+                    engine_dir,
+                    create_prefix(
+                        stream.trt_unet_batch_size, stream.trt_unet_batch_size
+                    ),
+                    "unet.engine",
+                )
+                vae_encoder_path = os.path.join(
+                    engine_dir,
+                    create_prefix(
+                        self.batch_size
+                        if self.mode == "txt2img"
+                        else stream.frame_bff_size,
+                        self.batch_size
+                        if self.mode == "txt2img"
+                        else stream.frame_bff_size,
+                    ),
+                    "vae_encoder.engine",
+                )
+                vae_decoder_path = os.path.join(
+                    engine_dir,
+                    create_prefix(
+                        self.batch_size
+                        if self.mode == "txt2img"
+                        else stream.frame_bff_size,
+                        self.batch_size
+                        if self.mode == "txt2img"
+                        else stream.frame_bff_size,
+                    ),
+                    "vae_decoder.engine",
+                )
+                if not os.path.exists(unet_path):
+                    os.makedirs(os.path.dirname(unet_path), exist_ok=True)
+                    unet_model = UNet(
+                        fp16=True,
+                        device=stream.device,
+                        max_batch_size=stream.trt_unet_batch_size,
+                        min_batch_size=stream.trt_unet_batch_size,
+                        embedding_dim=stream.text_encoder.config.hidden_size,
+                        unet_dim=stream.unet.config.in_channels,
+                    )
+                    compile_unet(
+                        stream.unet,
+                        unet_model,
+                        unet_path + ".onnx",
+                        unet_path + ".opt.onnx",
+                        unet_path,
+                        opt_batch_size=stream.trt_unet_batch_size,
+                    )
+                if not os.path.exists(vae_decoder_path):
+                    os.makedirs(os.path.dirname(vae_decoder_path), exist_ok=True)
+                    stream.vae.forward = stream.vae.decode
+                    vae_decoder_model = VAE(
+                        device=stream.device,
+                        max_batch_size=self.batch_size
+                        if self.mode == "txt2img"
+                        else stream.frame_bff_size,
+                        min_batch_size=self.batch_size
+                        if self.mode == "txt2img"
+                        else stream.frame_bff_size,
+                    )
+                    compile_vae_decoder(
+                        stream.vae,
+                        vae_decoder_model,
+                        vae_decoder_path + ".onnx",
+                        vae_decoder_path + ".opt.onnx",
+                        vae_decoder_path,
+                        opt_batch_size=self.batch_size
+                        if self.mode == "txt2img"
+                        else stream.frame_bff_size,
+                    )
+                    delattr(stream.vae, "forward")
+                if not os.path.exists(vae_encoder_path):
+                    os.makedirs(os.path.dirname(vae_encoder_path), exist_ok=True)
+                    vae_encoder = TorchVAEEncoder(stream.vae).to(torch.device("cuda"))
+                    vae_encoder_model = VAEEncoder(
+                        device=stream.device,
+                        max_batch_size=self.batch_size
+                        if self.mode == "txt2img"
+                        else stream.frame_bff_size,
+                        min_batch_size=self.batch_size
+                        if self.mode == "txt2img"
+                        else stream.frame_bff_size,
+                    )
+                    compile_vae_encoder(
+                        vae_encoder,
+                        vae_encoder_model,
+                        vae_encoder_path + ".onnx",
+                        vae_encoder_path + ".opt.onnx",
+                        vae_encoder_path,
+                        opt_batch_size=self.batch_size
+                        if self.mode == "txt2img"
+                        else stream.frame_bff_size,
+                    )
+                cuda_steram = cuda.Stream()
+                vae_config = stream.vae.config
+                vae_dtype = stream.vae.dtype
+                stream.unet = UNet2DConditionModelEngine(
+                    unet_path, cuda_steram, use_cuda_graph=False
+                )
+                stream.vae = AutoencoderKLEngine(
+                    vae_encoder_path,
+                    vae_decoder_path,
+                    cuda_steram,
+                    stream.pipe.vae_scale_factor,
+                    use_cuda_graph=False,
+                )
+                setattr(stream.vae, "config", vae_config)
+                setattr(stream.vae, "dtype", vae_dtype)
+                gc.collect()
+                torch.cuda.empty_cache()
+                print("TensorRT acceleration enabled.")
+            if acceleration == "sfast":
                 from streamdiffusion.acceleration.sfast import (
                     accelerate_with_stable_fast,
                 )
                 stream = accelerate_with_stable_fast(stream)
                 print("StableFast acceleration enabled.")
+        except Exception:
+            traceback.print_exc()
+            print("Acceleration has failed. Falling back to normal mode.")
         stream.prepare(
+            "",
             "",
             num_inference_steps=50,
+            guidance_scale=1.1
+            if stream.cfg_type in ["full", "self", "initialize"]
+            else 1.0,
             generator=torch.manual_seed(2),
         )
         return stream

start.sh CHANGED Viewed

@@ -1,2 +1,4 @@
 cd view && npm run build && cd ..
-cd server && python3 main.py

+#!/bin/bash
+pip install -r requirements.txt
 cd view && npm run build && cd ..
+cd server && python3 main.py

view/.DS_Store CHANGED Viewed

Binary files a/view/.DS_Store and b/view/.DS_Store differ

view/src/App.tsx CHANGED Viewed

@@ -38,7 +38,7 @@ function App() {
   const fetchImage = useCallback(
     async (index: number) => {
       try {
-        const response = await fetch("/api/predict", {
           method: "POST",
           headers: { "Content-Type": "application/json" },
           body: JSON.stringify({ prompt: inputPrompt }),
@@ -63,7 +63,7 @@ function App() {
     const newPrompt = event.target.value;
     const editDistance = calculateEditDistance(lastPrompt, newPrompt);
-    if (editDistance >= 2) {
       setInputPrompt(newPrompt);
       setLastPrompt(newPrompt);
       for (let i = 0; i < 16; i++) {
@@ -98,7 +98,7 @@ function App() {
         <Grid
           container
           spacing={1}
-          style={{ maxWidth: "50%", maxHeight: "70%" }}
         >
           {images.map((image, index) => (
             <Grid item xs={3} key={index}>
@@ -106,6 +106,8 @@ function App() {
                 src={image}
                 alt={`Generated ${index}`}
                 style={{
                   maxWidth: "100%",
                   maxHeight: "150px",
                   borderRadius: "10px",
@@ -121,7 +123,8 @@ function App() {
           style={{
             marginBottom: "20px",
             marginTop: "20px",
-            width: "640px",
             color: "#ffffff",
             borderColor: "#ffffff",
             borderRadius: "10px",

   const fetchImage = useCallback(
     async (index: number) => {
       try {
+        const response = await fetch("api/predict", {
           method: "POST",
           headers: { "Content-Type": "application/json" },
           body: JSON.stringify({ prompt: inputPrompt }),
     const newPrompt = event.target.value;
     const editDistance = calculateEditDistance(lastPrompt, newPrompt);
+    if (editDistance >= 4) {
       setInputPrompt(newPrompt);
       setLastPrompt(newPrompt);
       for (let i = 0; i < 16; i++) {
         <Grid
           container
           spacing={1}
+          style={{ maxWidth: "60rem", maxHeight: "70%" }}
         >
           {images.map((image, index) => (
             <Grid item xs={3} key={index}>
                 src={image}
                 alt={`Generated ${index}`}
                 style={{
+                  display: "block",
+                  margin: "0 auto",
                   maxWidth: "100%",
                   maxHeight: "150px",
                   borderRadius: "10px",
           style={{
             marginBottom: "20px",
             marginTop: "20px",
+            width: "100%",
+            maxWidth: "50rem",
             color: "#ffffff",
             borderColor: "#ffffff",
             borderRadius: "10px",