diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/__init__.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/__init__.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/__init__.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/__init__.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/_ops.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/_ops.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/_ops.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/_ops.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/bert_padding.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/bert_padding.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/bert_padding.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/bert_padding.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/flash_attn_interface.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/flash_attn_interface.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/flash_attn_interface.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/flash_attn_interface.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/layers/__init__.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/layers/__init__.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/layers/__init__.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/layers/__init__.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/layers/patch_embed.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/layers/patch_embed.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/layers/patch_embed.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/layers/patch_embed.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/layers/rotary.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/layers/rotary.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/layers/rotary.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/layers/rotary.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/__init__.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/__init__.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/__init__.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/__init__.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/activations.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/activations.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/activations.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/activations.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/fused_dense.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/fused_dense.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/fused_dense.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/fused_dense.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/layer_norm.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/layer_norm.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/layer_norm.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/layer_norm.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/rms_norm.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/rms_norm.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/rms_norm.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/rms_norm.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/triton/__init__.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/triton/__init__.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/triton/__init__.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/triton/__init__.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/triton/cross_entropy.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/triton/cross_entropy.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/triton/k_activations.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/triton/k_activations.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/triton/k_activations.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/triton/k_activations.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/triton/layer_norm.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/triton/layer_norm.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/triton/layer_norm.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/triton/layer_norm.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/triton/linear.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/triton/linear.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/triton/linear.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/triton/linear.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/triton/mlp.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/triton/mlp.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/triton/mlp.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/triton/mlp.py diff --git a/build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/triton/rotary.py b/build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/triton/rotary.py similarity index 100% rename from build/torch27-cxx11-cu118-x86_64-linux/flash_attn/ops/triton/rotary.py rename to build/torch27-cxx11-cu118-x86_64-linux/flash_attn2/ops/triton/rotary.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/__init__.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/__init__.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/__init__.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/__init__.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/_ops.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/_ops.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/_ops.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/_ops.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/bert_padding.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/bert_padding.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/bert_padding.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/bert_padding.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/flash_attn_interface.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/flash_attn_interface.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/flash_attn_interface.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/flash_attn_interface.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/layers/__init__.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/layers/__init__.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/layers/__init__.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/layers/__init__.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/layers/patch_embed.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/layers/patch_embed.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/layers/patch_embed.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/layers/patch_embed.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/layers/rotary.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/layers/rotary.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/layers/rotary.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/layers/rotary.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/__init__.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/__init__.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/__init__.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/__init__.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/activations.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/activations.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/activations.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/activations.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/fused_dense.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/fused_dense.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/fused_dense.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/fused_dense.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/layer_norm.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/layer_norm.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/layer_norm.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/layer_norm.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/rms_norm.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/rms_norm.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/rms_norm.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/rms_norm.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/__init__.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/__init__.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/__init__.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/__init__.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/cross_entropy.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/cross_entropy.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/k_activations.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/k_activations.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/k_activations.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/k_activations.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/layer_norm.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/layer_norm.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/layer_norm.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/layer_norm.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/linear.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/linear.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/linear.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/linear.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/mlp.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/mlp.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/mlp.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/mlp.py diff --git a/build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/rotary.py b/build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/rotary.py similarity index 100% rename from build/torch27-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/rotary.py rename to build/torch27-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/rotary.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/__init__.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/__init__.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/__init__.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/__init__.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/_ops.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/_ops.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/_ops.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/_ops.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/bert_padding.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/bert_padding.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/bert_padding.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/bert_padding.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/flash_attn_interface.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/flash_attn_interface.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/flash_attn_interface.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/flash_attn_interface.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/layers/__init__.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/layers/__init__.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/layers/__init__.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/layers/__init__.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/layers/patch_embed.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/layers/patch_embed.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/layers/patch_embed.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/layers/patch_embed.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/layers/rotary.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/layers/rotary.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/layers/rotary.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/layers/rotary.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/__init__.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/__init__.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/__init__.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/__init__.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/activations.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/activations.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/activations.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/activations.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/fused_dense.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/fused_dense.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/fused_dense.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/fused_dense.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/layer_norm.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/layer_norm.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/layer_norm.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/layer_norm.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/rms_norm.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/rms_norm.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/rms_norm.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/rms_norm.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/__init__.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/__init__.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/__init__.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/__init__.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/cross_entropy.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/cross_entropy.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/k_activations.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/k_activations.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/k_activations.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/k_activations.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/layer_norm.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/layer_norm.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/layer_norm.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/layer_norm.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/linear.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/linear.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/linear.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/linear.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/mlp.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/mlp.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/mlp.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/mlp.py diff --git a/build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/rotary.py b/build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/rotary.py similarity index 100% rename from build/torch27-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/rotary.py rename to build/torch27-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/rotary.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/__init__.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/__init__.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/__init__.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/_ops.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/_ops.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/_ops.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/_ops.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/bert_padding.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/bert_padding.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/bert_padding.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/bert_padding.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/flash_attn_interface.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/flash_attn_interface.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/flash_attn_interface.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/flash_attn_interface.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/layers/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/layers/__init__.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/layers/__init__.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/layers/__init__.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/layers/patch_embed.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/layers/patch_embed.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/layers/patch_embed.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/layers/patch_embed.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/layers/rotary.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/layers/rotary.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/layers/rotary.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/layers/rotary.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/__init__.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/__init__.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/__init__.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/activations.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/activations.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/activations.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/activations.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/fused_dense.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/fused_dense.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/fused_dense.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/fused_dense.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/layer_norm.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/layer_norm.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/layer_norm.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/layer_norm.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/rms_norm.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/rms_norm.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/rms_norm.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/rms_norm.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/__init__.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/__init__.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/__init__.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/cross_entropy.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/cross_entropy.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/k_activations.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/k_activations.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/k_activations.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/k_activations.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/layer_norm.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/layer_norm.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/layer_norm.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/layer_norm.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/linear.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/linear.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/linear.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/linear.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/mlp.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/mlp.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/mlp.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/mlp.py diff --git a/build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/rotary.py b/build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/rotary.py similarity index 100% rename from build/torch28-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/rotary.py rename to build/torch28-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/rotary.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/__init__.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/__init__.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/__init__.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/_ops.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/_ops.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/_ops.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/_ops.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/bert_padding.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/bert_padding.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/bert_padding.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/bert_padding.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/flash_attn_interface.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/flash_attn_interface.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/flash_attn_interface.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/flash_attn_interface.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/layers/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/layers/__init__.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/layers/__init__.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/layers/__init__.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/layers/patch_embed.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/layers/patch_embed.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/layers/patch_embed.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/layers/patch_embed.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/layers/rotary.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/layers/rotary.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/layers/rotary.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/layers/rotary.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/__init__.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/__init__.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/__init__.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/activations.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/activations.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/activations.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/activations.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/fused_dense.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/fused_dense.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/fused_dense.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/fused_dense.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/layer_norm.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/layer_norm.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/layer_norm.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/layer_norm.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/rms_norm.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/rms_norm.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/rms_norm.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/rms_norm.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/__init__.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/__init__.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/__init__.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/cross_entropy.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/cross_entropy.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/k_activations.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/k_activations.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/k_activations.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/k_activations.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/layer_norm.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/layer_norm.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/layer_norm.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/layer_norm.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/linear.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/linear.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/linear.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/linear.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/mlp.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/mlp.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/mlp.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/mlp.py diff --git a/build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/rotary.py b/build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/rotary.py similarity index 100% rename from build/torch28-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/rotary.py rename to build/torch28-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/rotary.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/__init__.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/__init__.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/__init__.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/_ops.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/_ops.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/_ops.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/_ops.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/bert_padding.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/bert_padding.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/bert_padding.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/bert_padding.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/flash_attn_interface.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/flash_attn_interface.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/flash_attn_interface.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/flash_attn_interface.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/layers/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/layers/__init__.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/layers/__init__.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/layers/__init__.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/layers/patch_embed.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/layers/patch_embed.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/layers/patch_embed.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/layers/patch_embed.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/layers/rotary.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/layers/rotary.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/layers/rotary.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/layers/rotary.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/__init__.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/__init__.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/__init__.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/activations.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/activations.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/activations.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/activations.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/fused_dense.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/fused_dense.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/fused_dense.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/fused_dense.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/layer_norm.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/layer_norm.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/layer_norm.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/layer_norm.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/rms_norm.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/rms_norm.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/rms_norm.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/rms_norm.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/triton/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/triton/__init__.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/triton/__init__.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/triton/__init__.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/triton/cross_entropy.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/triton/cross_entropy.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/triton/k_activations.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/triton/k_activations.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/triton/k_activations.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/triton/k_activations.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/triton/layer_norm.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/triton/layer_norm.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/triton/layer_norm.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/triton/layer_norm.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/triton/linear.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/triton/linear.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/triton/linear.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/triton/linear.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/triton/mlp.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/triton/mlp.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/triton/mlp.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/triton/mlp.py diff --git a/build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/triton/rotary.py b/build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/triton/rotary.py similarity index 100% rename from build/torch28-cxx11-cu129-x86_64-linux/flash_attn/ops/triton/rotary.py rename to build/torch28-cxx11-cu129-x86_64-linux/flash_attn2/ops/triton/rotary.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/__init__.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/__init__.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/__init__.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/__init__.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/_flash_attn_c984dd4_dirty.abi3.so b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/_flash_attn_c984dd4_dirty.abi3.so similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/_flash_attn_c984dd4_dirty.abi3.so rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/_flash_attn_c984dd4_dirty.abi3.so diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/_ops.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/_ops.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/_ops.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/_ops.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/bert_padding.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/bert_padding.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/bert_padding.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/bert_padding.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/flash_attn_interface.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/flash_attn_interface.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/flash_attn_interface.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/flash_attn_interface.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/layers/__init__.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/layers/__init__.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/layers/__init__.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/layers/__init__.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/layers/patch_embed.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/layers/patch_embed.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/layers/patch_embed.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/layers/patch_embed.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/layers/rotary.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/layers/rotary.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/layers/rotary.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/layers/rotary.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/__init__.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/__init__.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/__init__.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/__init__.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/activations.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/activations.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/activations.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/activations.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/fused_dense.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/fused_dense.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/fused_dense.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/fused_dense.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/layer_norm.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/layer_norm.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/layer_norm.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/layer_norm.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/rms_norm.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/rms_norm.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/rms_norm.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/rms_norm.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/triton/__init__.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/triton/__init__.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/triton/__init__.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/triton/__init__.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/triton/cross_entropy.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/triton/cross_entropy.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/triton/k_activations.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/triton/k_activations.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/triton/k_activations.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/triton/k_activations.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/triton/layer_norm.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/triton/layer_norm.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/triton/layer_norm.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/triton/layer_norm.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/triton/linear.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/triton/linear.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/triton/linear.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/triton/linear.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/triton/mlp.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/triton/mlp.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/triton/mlp.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/triton/mlp.py diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/triton/rotary.py b/build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/triton/rotary.py similarity index 100% rename from build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn/ops/triton/rotary.py rename to build/torch28-cxx11-xpu20251-x86_64-linux/flash_attn2/ops/triton/rotary.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/__init__.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/__init__.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/__init__.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/__init__.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/_ops.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/_ops.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/_ops.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/_ops.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/bert_padding.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/bert_padding.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/bert_padding.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/bert_padding.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/flash_attn_interface.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/flash_attn_interface.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/flash_attn_interface.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/flash_attn_interface.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/layers/__init__.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/layers/__init__.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/layers/__init__.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/layers/__init__.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/layers/patch_embed.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/layers/patch_embed.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/layers/patch_embed.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/layers/patch_embed.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/layers/rotary.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/layers/rotary.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/layers/rotary.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/layers/rotary.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/__init__.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/__init__.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/__init__.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/__init__.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/activations.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/activations.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/activations.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/activations.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/fused_dense.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/fused_dense.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/fused_dense.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/fused_dense.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/layer_norm.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/layer_norm.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/layer_norm.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/layer_norm.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/rms_norm.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/rms_norm.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/rms_norm.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/rms_norm.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/__init__.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/__init__.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/__init__.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/__init__.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/cross_entropy.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/cross_entropy.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/k_activations.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/k_activations.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/k_activations.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/k_activations.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/layer_norm.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/layer_norm.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/layer_norm.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/layer_norm.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/linear.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/linear.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/linear.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/linear.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/mlp.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/mlp.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/mlp.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/mlp.py diff --git a/build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/rotary.py b/build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/rotary.py similarity index 100% rename from build/torch29-cxx11-cu126-x86_64-linux/flash_attn/ops/triton/rotary.py rename to build/torch29-cxx11-cu126-x86_64-linux/flash_attn2/ops/triton/rotary.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/__init__.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/__init__.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/__init__.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/__init__.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/_ops.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/_ops.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/_ops.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/_ops.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/bert_padding.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/bert_padding.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/bert_padding.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/bert_padding.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/flash_attn_interface.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/flash_attn_interface.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/flash_attn_interface.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/flash_attn_interface.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/layers/__init__.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/layers/__init__.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/layers/__init__.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/layers/__init__.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/layers/patch_embed.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/layers/patch_embed.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/layers/patch_embed.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/layers/patch_embed.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/layers/rotary.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/layers/rotary.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/layers/rotary.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/layers/rotary.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/__init__.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/__init__.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/__init__.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/__init__.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/activations.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/activations.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/activations.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/activations.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/fused_dense.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/fused_dense.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/fused_dense.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/fused_dense.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/layer_norm.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/layer_norm.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/layer_norm.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/layer_norm.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/rms_norm.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/rms_norm.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/rms_norm.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/rms_norm.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/__init__.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/__init__.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/__init__.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/__init__.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/cross_entropy.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/cross_entropy.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/k_activations.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/k_activations.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/k_activations.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/k_activations.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/layer_norm.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/layer_norm.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/layer_norm.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/layer_norm.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/linear.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/linear.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/linear.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/linear.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/mlp.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/mlp.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/mlp.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/mlp.py diff --git a/build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/rotary.py b/build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/rotary.py similarity index 100% rename from build/torch29-cxx11-cu128-x86_64-linux/flash_attn/ops/triton/rotary.py rename to build/torch29-cxx11-cu128-x86_64-linux/flash_attn2/ops/triton/rotary.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/__init__.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/__init__.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/__init__.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/__init__.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/_flash_attn_9e27194.abi3.so rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/_flash_attn_9e27194.abi3.so diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/_ops.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/_ops.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/_ops.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/_ops.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/bert_padding.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/bert_padding.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/bert_padding.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/bert_padding.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/flash_attn_interface.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/flash_attn_interface.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/flash_attn_interface.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/flash_attn_interface.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/layers/__init__.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/layers/__init__.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/layers/__init__.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/layers/__init__.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/layers/patch_embed.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/layers/patch_embed.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/layers/patch_embed.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/layers/patch_embed.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/layers/rotary.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/layers/rotary.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/layers/rotary.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/layers/rotary.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/__init__.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/__init__.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/__init__.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/__init__.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/activations.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/activations.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/activations.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/activations.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/fused_dense.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/fused_dense.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/fused_dense.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/fused_dense.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/layer_norm.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/layer_norm.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/layer_norm.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/layer_norm.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/rms_norm.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/rms_norm.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/rms_norm.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/rms_norm.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/triton/__init__.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/triton/__init__.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/triton/__init__.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/triton/__init__.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/triton/cross_entropy.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/triton/cross_entropy.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/triton/k_activations.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/triton/k_activations.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/triton/k_activations.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/triton/k_activations.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/triton/layer_norm.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/triton/layer_norm.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/triton/layer_norm.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/triton/layer_norm.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/triton/linear.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/triton/linear.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/triton/linear.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/triton/linear.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/triton/mlp.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/triton/mlp.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/triton/mlp.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/triton/mlp.py diff --git a/build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/triton/rotary.py b/build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/triton/rotary.py similarity index 100% rename from build/torch29-cxx11-cu130-x86_64-linux/flash_attn/ops/triton/rotary.py rename to build/torch29-cxx11-cu130-x86_64-linux/flash_attn2/ops/triton/rotary.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/__init__.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/__init__.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/__init__.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/__init__.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/_flash_attn_c984dd4_dirty.abi3.so b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/_flash_attn_c984dd4_dirty.abi3.so similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/_flash_attn_c984dd4_dirty.abi3.so rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/_flash_attn_c984dd4_dirty.abi3.so diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/_ops.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/_ops.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/_ops.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/_ops.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/bert_padding.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/bert_padding.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/bert_padding.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/bert_padding.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/flash_attn_interface.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/flash_attn_interface.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/flash_attn_interface.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/flash_attn_interface.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/layers/__init__.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/layers/__init__.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/layers/__init__.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/layers/__init__.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/layers/patch_embed.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/layers/patch_embed.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/layers/patch_embed.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/layers/patch_embed.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/layers/rotary.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/layers/rotary.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/layers/rotary.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/layers/rotary.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/__init__.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/__init__.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/__init__.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/__init__.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/activations.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/activations.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/activations.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/activations.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/fused_dense.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/fused_dense.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/fused_dense.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/fused_dense.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/layer_norm.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/layer_norm.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/layer_norm.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/layer_norm.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/rms_norm.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/rms_norm.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/rms_norm.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/rms_norm.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/triton/__init__.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/triton/__init__.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/triton/__init__.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/triton/__init__.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/triton/cross_entropy.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/triton/cross_entropy.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/triton/cross_entropy.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/triton/k_activations.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/triton/k_activations.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/triton/k_activations.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/triton/k_activations.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/triton/layer_norm.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/triton/layer_norm.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/triton/layer_norm.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/triton/layer_norm.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/triton/linear.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/triton/linear.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/triton/linear.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/triton/linear.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/triton/mlp.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/triton/mlp.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/triton/mlp.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/triton/mlp.py diff --git a/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/triton/rotary.py b/build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/triton/rotary.py similarity index 100% rename from build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn/ops/triton/rotary.py rename to build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn2/ops/triton/rotary.py