Spaces:

zerogpu-aoti
/

FLUX.1-dev-fa3-aoti

Running on Zero

App Files Files Community

cbensimon HF Staff commited on Jul 3

Commit

674e245

1 Parent(s): a3d55a6

ZeroGPUCompiledModel

Browse files

Files changed (2) hide show

app.py +5 -33
utils/zerogpu.py +60 -0

app.py CHANGED Viewed

@@ -19,9 +19,8 @@ import spaces
 import torch
 import torch._inductor
 from diffusers import FluxPipeline
-from torch._inductor.package import package_aoti
-from torch.export.pt2_archive._package import AOTICompiledModel
-from torch.export.pt2_archive._package_weights import Weights
 pipeline = FluxPipeline.from_pretrained('black-forest-labs/FLUX.1-schnell', torch_dtype=torch.bfloat16).to('cuda')
@@ -61,43 +60,16 @@ def compile_transformer():
     exported = torch.export.export(pipeline.transformer, args=(), kwargs=transformer_kwargs)
-    artifacts = torch._inductor.aot_compile(exported.module(), *exported.example_inputs, options=inductor_configs | {
-        'aot_inductor.package_constants_in_so': False,
-        'aot_inductor.package_constants_on_disk': True,
-        'aot_inductor.package': True,
-    })
-    files = [file for file in artifacts if isinstance(file, str)]
-    package_aoti(package_path, files)
-    weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
-    weights_: dict[str, torch.Tensor] = {}
-    for name in weights:
-        tensor, _properties = weights.get_weight(name)
-        tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
-        weights_[name] = tensor_.copy_(tensor).detach().share_memory_()
-    return weights_
-weights = compile_transformer()
-weights = {name: tensor.to('cuda') for name, tensor in weights.items()}
-print('compile_transformer', -(t0 - (t0 := datetime.now())))
 transformer_config = pipeline.transformer.config
-pipeline.transformer = None
 @spaces.GPU
 def _generate_image(prompt: str, t0: datetime):
     print('@spaces.GPU', -(t0 - (t0 := datetime.now())))
-    compiled_transformer: AOTICompiledModel = torch._inductor.aoti_load_package(package_path)
-    print('aoti_load_package', -(t0 - (t0 := datetime.now())))
-    compiled_transformer.load_constants(weights, check_full_update=True, user_managed=True)
-    print('load_constants', -(t0 - (t0 := datetime.now())))
-    pipeline.transformer = compiled_transformer
-    pipeline.transformer.config = transformer_config
     images = []
     for _ in range(4):
         images += pipeline(prompt, num_inference_steps=4).images

 import torch
 import torch._inductor
 from diffusers import FluxPipeline
+from .utils.zerogpu import aoti_compile
 pipeline = FluxPipeline.from_pretrained('black-forest-labs/FLUX.1-schnell', torch_dtype=torch.bfloat16).to('cuda')
     exported = torch.export.export(pipeline.transformer, args=(), kwargs=transformer_kwargs)
+    return aoti_compile(exported, inductor_configs)
 transformer_config = pipeline.transformer.config
+pipeline.transformer = compile_transformer()
+pipeline.transformer.config = transformer_config
 @spaces.GPU
 def _generate_image(prompt: str, t0: datetime):
     print('@spaces.GPU', -(t0 - (t0 := datetime.now())))
     images = []
     for _ in range(4):
         images += pipeline(prompt, num_inference_steps=4).images

utils/zerogpu.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""
+"""
+from io import BytesIO
+from typing import Any
+import torch
+from torch._inductor.package.package import package_aoti
+from torch.export.pt2_archive._package import AOTICompiledModel
+from torch.export.pt2_archive._package_weights import TensorProperties
+from torch.export.pt2_archive._package_weights import Weights
+INDUCTOR_CONFIGS_OVERRIDES = {
+    'aot_inductor.package_constants_in_so': False,
+    'aot_inductor.package_constants_on_disk': True,
+    'aot_inductor.package': True,
+}
+class ZeroGPUCompiledModel:
+    def __init__(self, archive_file: BytesIO, weights: Weights, cuda: bool = False):
+        self.archive_file = archive_file
+        self.weights = weights
+        if cuda:
+            self.weights_to_cuda_()
+        self.compiled_model: AOTICompiledModel | None = None
+    def weights_to_cuda_(self):
+        for name in self.weights:
+            tensor, properties = self.weights.get_weight(name)
+            self.weights[name] = (tensor.to('cuda'), properties)
+    def __call__(self, *args, **kwargs):
+        if self.compiled_model is None:
+            constants_map = {name: value[1] for name, value in self.weights.items()}
+            compiled_model: AOTICompiledModel = torch._inductor.aoti_load_package(self.archive_file)
+            compiled_model.load_constants(constants_map, check_full_update=True, user_managed=True)
+            self.compiled_model = compiled_model
+        return self.compiled_model(*args, **kwargs)
+    def __reduce__(self):
+        weight_dict: dict[str, tuple[torch.Tensor, TensorProperties]] = {}
+        for name in self.weights:
+            tensor, properties = self.weights.get_weight(name)
+            tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
+            weight_dict[name] = (tensor_.copy_(tensor).detach().share_memory_(), properties)
+        return ZeroGPUCompiledModel, (self.archive_file, Weights(weight_dict), True)
+def aoti_compile(
+    exported_program: torch.export.ExportedProgram,
+    inductor_configs: dict[str, Any] | None = None,
+):
+    inductor_configs = inductor_configs or {} | INDUCTOR_CONFIGS_OVERRIDES
+    gm = exported_program.module()
+    assert exported_program.example_inputs is not None
+    args, kwargs = exported_program.example_inputs
+    artifacts = torch._inductor.aot_compile(gm, args, kwargs, options=inductor_configs)
+    archive_file = BytesIO()
+    files = [file for file in artifacts if isinstance(file, str)]
+    package_aoti(archive_file, files)
+    weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
+    return ZeroGPUCompiledModel(archive_file, weights)