Spaces:
Sleeping
Sleeping
Further optimize + quantize
Browse files- app.py +8 -0
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -15,6 +15,8 @@ import gradio as gr
|
|
| 15 |
import spaces
|
| 16 |
import torch
|
| 17 |
from diffusers import FluxPipeline
|
|
|
|
|
|
|
| 18 |
|
| 19 |
from zerogpu import aoti_compile
|
| 20 |
|
|
@@ -26,6 +28,12 @@ print('FluxPipeline.from_pretrained', -(t0 - (t0 := datetime.now())))
|
|
| 26 |
@spaces.GPU(duration=1500)
|
| 27 |
def compile_transformer():
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
def _example_tensor(*shape):
|
| 30 |
return torch.randn(*shape, device='cuda', dtype=torch.bfloat16)
|
| 31 |
|
|
|
|
| 15 |
import spaces
|
| 16 |
import torch
|
| 17 |
from diffusers import FluxPipeline
|
| 18 |
+
from torchao.quantization import quantize_
|
| 19 |
+
from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
|
| 20 |
|
| 21 |
from zerogpu import aoti_compile
|
| 22 |
|
|
|
|
| 28 |
@spaces.GPU(duration=1500)
|
| 29 |
def compile_transformer():
|
| 30 |
|
| 31 |
+
pipeline.transformer.fuse_qkv_projections()
|
| 32 |
+
pipeline.vae.fuse_qkv_projections()
|
| 33 |
+
pipeline.vae.to(memory_format=torch.channels_last)
|
| 34 |
+
|
| 35 |
+
quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
|
| 36 |
+
|
| 37 |
def _example_tensor(*shape):
|
| 38 |
return torch.randn(*shape, device='cuda', dtype=torch.bfloat16)
|
| 39 |
|
requirements.txt
CHANGED
|
@@ -3,3 +3,4 @@ diffusers
|
|
| 3 |
transformers
|
| 4 |
sentencepiece
|
| 5 |
protobuf
|
|
|
|
|
|
| 3 |
transformers
|
| 4 |
sentencepiece
|
| 5 |
protobuf
|
| 6 |
+
torchao
|