Spaces:

multimodalart
/

LongCat-Video

Running on Zero

multimodalart HF Staff commited on Oct 27

Commit

04ca81f

verified ·

1 Parent(s): d78ad44

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -33,7 +33,7 @@ try:
     with open(attention_file_path, "r") as f:
         content = f.read()
-    # Define the original problematic code block
     original_code = """            x, *_ = flash_attn_func(
                 q,
                 k,
@@ -42,17 +42,25 @@ try:
             )
             x = rearrange(x, "B S H D -> B H S D")"""
-    # Define the corrected code block that handles the 3D output of FA3
-    corrected_code = """            # The output of flash_attn_func is 3D (total_tokens, H, D), but the code expects 4D.
-            # We get B and S from the rearranged q's shape and reshape the output tensor x.
-            B, S, H, D = q.shape
-            x, *_ = flash_attn_func(
                 q,
                 k,
                 v,
                 softmax_scale=self.scale,
             )
-            x = x.view(B, S, H, D) # Reshape from 3D to 4D
             x = rearrange(x, "B S H D -> B H S D")"""
     if original_code in content:

     with open(attention_file_path, "r") as f:
         content = f.read()
+    # Original code block that we need to replace
     original_code = """            x, *_ = flash_attn_func(
                 q,
                 k,
             )
             x = rearrange(x, "B S H D -> B H S D")"""
+    # Corrected code block to handle FA3's 3D output shape
+    corrected_code = """            x, *_ = flash_attn_func(
                 q,
                 k,
                 v,
                 softmax_scale=self.scale,
             )
+            # The output of FA3's flash_attn_func can be 3D (total_tokens, H, D).
+            # We need to robustly reshape it back to the 4D format (B, S, H, D) that the
+            # subsequent rearrange operation expects.
+            if x.ndim == 3:
+                # B is the original batch size from the input q tensor
+                B = q.shape[0]
+                # S_total is the flattened batch and sequence length
+                S_total, H, D = x.shape
+                # Calculate the sequence length per batch item
+                S = S_total // B
+                x = x.view(B, S, H, D)
             x = rearrange(x, "B S H D -> B H S D")"""
     if original_code in content: