Spaces:

zerogpu-aoti
/

FLUX.1-dev-fa3-aoti

Running on Zero

cbensimon HF Staff commited on Aug 29

Commit

c72054d

1 Parent(s): 48a26b4

fp8e4m3 (disable aoti)

Files changed (2) hide show

fa3.py CHANGED Viewed

@@ -10,7 +10,8 @@ _flash_attn_func = get_kernel("kernels-community/vllm-flash-attn3").flash_attn_f
 @torch.library.custom_op("flash::flash_attn_func", mutates_args=())
 def flash_attn_func(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor) -> torch.Tensor:
-    outputs, lse = _flash_attn_func(q, k, v)
     return outputs
 @flash_attn_func.register_fake

 @torch.library.custom_op("flash::flash_attn_func", mutates_args=())
 def flash_attn_func(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor) -> torch.Tensor:
+    dtype = torch.float8_e4m3fn
+    outputs, lse = _flash_attn_func(q.to(dtype), k.to(dtype), v.to(dtype))
     return outputs
 @flash_attn_func.register_fake

optimization.py CHANGED Viewed

@@ -41,4 +41,4 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
     pipeline.transformer.fuse_qkv_projections()
     pipeline.transformer.set_attn_processor(FlashFusedFluxAttnProcessor3_0())
-    spaces.aoti_apply(compile_transformer(), pipeline.transformer)

     pipeline.transformer.fuse_qkv_projections()
     pipeline.transformer.set_attn_processor(FlashFusedFluxAttnProcessor3_0())
+    # spaces.aoti_apply(compile_transformer(), pipeline.transformer)