wan2-2-fp8da-aoti-faster

Running on Zero

App Files Files Community

linoyts HF Staff commited on Sep 4

Commit

861b772

verified ·

1 Parent(s): 5376c20

Update optimization_utils.py

Browse files

Files changed (1) hide show

optimization_utils.py +28 -3

optimization_utils.py CHANGED Viewed

@@ -98,10 +98,35 @@ def capture_component_call(
             captured_call.kwargs = e.kwargs
 def drain_module_parameters(module: torch.nn.Module):
-    state_dict_meta = {name: {'device': tensor.device, 'dtype': tensor.dtype} for name, tensor in module.state_dict().items()}
-    state_dict = {name: torch.nn.Parameter(torch.empty_like(tensor, device='cpu')) for name, tensor in module.state_dict().items()}
     module.load_state_dict(state_dict, assign=True)
     for name, param in state_dict.items():
         meta = state_dict_meta[name]
-        param.data = torch.Tensor([]).to(**meta)

             captured_call.kwargs = e.kwargs
+# def drain_module_parameters(module: torch.nn.Module):
+#     state_dict_meta = {name: {'device': tensor.device, 'dtype': tensor.dtype} for name, tensor in module.state_dict().items()}
+#     state_dict = {name: torch.nn.Parameter(torch.empty_like(tensor, device='cpu')) for name, tensor in module.state_dict().items()}
+#     module.load_state_dict(state_dict, assign=True)
+#     for name, param in state_dict.items():
+#         meta = state_dict_meta[name]
+#         param.data = torch.Tensor([]).to(**meta)
 def drain_module_parameters(module: torch.nn.Module):
+    state_dict_meta = {
+        name: {'device': tensor.device, 'dtype': tensor.dtype}
+        for name, tensor in module.state_dict().items()
+    }
+    state_dict = {}
+    for name, tensor in module.state_dict().items():
+        try:
+            param = torch.nn.Parameter(torch.empty_like(tensor, device='cpu'))
+        except NotImplementedError:
+            # Fallback: dequantize (or convert) if empty_like isn't implemented
+            param = torch.nn.Parameter(tensor.dequantize().to('cpu') if hasattr(tensor, 'dequantize') else tensor.to('cpu'))
+        state_dict[name] = param
     module.load_state_dict(state_dict, assign=True)
     for name, param in state_dict.items():
         meta = state_dict_meta[name]
+        try:
+            param.data = torch.Tensor([]).to(**meta)
+        except NotImplementedError:
+            # Fallback for quantized tensors
+            param.data = (param.dequantize().to(**meta) if hasattr(param, 'dequantize') else torch.Tensor([]).to(**meta))