English
John6666 commited on
Commit
8b61e71
·
verified ·
1 Parent(s): a5cb0bf

Upload 2 files

Browse files
Files changed (2) hide show
  1. handler.py +10 -10
  2. requirements.txt +3 -3
handler.py CHANGED
@@ -24,17 +24,17 @@ class EndpointHandler:
24
  ).to("cuda")
25
  self.pipe.enable_vae_slicing()
26
  self.pipe.enable_vae_tiling()
27
- #self.pipe.transformer.fuse_qkv_projections()
28
- #self.pipe.vae.fuse_qkv_projections()
29
- #self.pipe.transformer.to(memory_format=torch.channels_last)
30
- #self.pipe.vae.to(memory_format=torch.channels_last)
31
  apply_cache_on_pipe(self.pipe, residual_diff_threshold=0.12)
32
- #self.pipe.transformer = torch.compile(
33
- # self.pipe.transformer, mode="max-autotune-no-cudagraphs",
34
- #)
35
- #self.pipe.vae = torch.compile(
36
- # self.pipe.vae, mode="max-autotune-no-cudagraphs",
37
- #)
38
  self.pipe.transformer = autoquant(self.pipe.transformer, error_on_unseen=False)
39
  self.pipe.vae = autoquant(self.pipe.vae, error_on_unseen=False)
40
 
 
24
  ).to("cuda")
25
  self.pipe.enable_vae_slicing()
26
  self.pipe.enable_vae_tiling()
27
+ self.pipe.transformer.fuse_qkv_projections()
28
+ self.pipe.vae.fuse_qkv_projections()
29
+ self.pipe.transformer.to(memory_format=torch.channels_last)
30
+ self.pipe.vae.to(memory_format=torch.channels_last)
31
  apply_cache_on_pipe(self.pipe, residual_diff_threshold=0.12)
32
+ self.pipe.transformer = torch.compile(
33
+ self.pipe.transformer, mode="max-autotune-no-cudagraphs",
34
+ )
35
+ self.pipe.vae = torch.compile(
36
+ self.pipe.vae, mode="max-autotune-no-cudagraphs",
37
+ )
38
  self.pipe.transformer = autoquant(self.pipe.transformer, error_on_unseen=False)
39
  self.pipe.vae = autoquant(self.pipe.vae, error_on_unseen=False)
40
 
requirements.txt CHANGED
@@ -3,8 +3,8 @@ torch==2.6.0+cu126
3
  torchvision
4
  torchaudio
5
  huggingface_hub
6
- torchao>=0.9.0
7
- diffusers>=0.32.2
8
  peft
9
  transformers<=4.48.3
10
  numpy<2
@@ -13,4 +13,4 @@ Pillow
13
  sentencepiece
14
  protobuf
15
  triton
16
- para-attn
 
3
  torchvision
4
  torchaudio
5
  huggingface_hub
6
+ torchao==0.9.0
7
+ diffusers==0.32.2
8
  peft
9
  transformers<=4.48.3
10
  numpy<2
 
13
  sentencepiece
14
  protobuf
15
  triton
16
+ para-attn==0.3.23