Upload 2 files
Browse files- handler.py +10 -10
- requirements.txt +3 -3
handler.py
CHANGED
|
@@ -24,17 +24,17 @@ class EndpointHandler:
|
|
| 24 |
).to("cuda")
|
| 25 |
self.pipe.enable_vae_slicing()
|
| 26 |
self.pipe.enable_vae_tiling()
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
apply_cache_on_pipe(self.pipe, residual_diff_threshold=0.12)
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
self.pipe.transformer = autoquant(self.pipe.transformer, error_on_unseen=False)
|
| 39 |
self.pipe.vae = autoquant(self.pipe.vae, error_on_unseen=False)
|
| 40 |
|
|
|
|
| 24 |
).to("cuda")
|
| 25 |
self.pipe.enable_vae_slicing()
|
| 26 |
self.pipe.enable_vae_tiling()
|
| 27 |
+
self.pipe.transformer.fuse_qkv_projections()
|
| 28 |
+
self.pipe.vae.fuse_qkv_projections()
|
| 29 |
+
self.pipe.transformer.to(memory_format=torch.channels_last)
|
| 30 |
+
self.pipe.vae.to(memory_format=torch.channels_last)
|
| 31 |
apply_cache_on_pipe(self.pipe, residual_diff_threshold=0.12)
|
| 32 |
+
self.pipe.transformer = torch.compile(
|
| 33 |
+
self.pipe.transformer, mode="max-autotune-no-cudagraphs",
|
| 34 |
+
)
|
| 35 |
+
self.pipe.vae = torch.compile(
|
| 36 |
+
self.pipe.vae, mode="max-autotune-no-cudagraphs",
|
| 37 |
+
)
|
| 38 |
self.pipe.transformer = autoquant(self.pipe.transformer, error_on_unseen=False)
|
| 39 |
self.pipe.vae = autoquant(self.pipe.vae, error_on_unseen=False)
|
| 40 |
|
requirements.txt
CHANGED
|
@@ -3,8 +3,8 @@ torch==2.6.0+cu126
|
|
| 3 |
torchvision
|
| 4 |
torchaudio
|
| 5 |
huggingface_hub
|
| 6 |
-
torchao
|
| 7 |
-
diffusers
|
| 8 |
peft
|
| 9 |
transformers<=4.48.3
|
| 10 |
numpy<2
|
|
@@ -13,4 +13,4 @@ Pillow
|
|
| 13 |
sentencepiece
|
| 14 |
protobuf
|
| 15 |
triton
|
| 16 |
-
para-attn
|
|
|
|
| 3 |
torchvision
|
| 4 |
torchaudio
|
| 5 |
huggingface_hub
|
| 6 |
+
torchao==0.9.0
|
| 7 |
+
diffusers==0.32.2
|
| 8 |
peft
|
| 9 |
transformers<=4.48.3
|
| 10 |
numpy<2
|
|
|
|
| 13 |
sentencepiece
|
| 14 |
protobuf
|
| 15 |
triton
|
| 16 |
+
para-attn==0.3.23
|