Spaces:
Running
on
Zero
Running
on
Zero
Update optimization_utils.py
Browse files- optimization_utils.py +28 -3
optimization_utils.py
CHANGED
|
@@ -98,10 +98,35 @@ def capture_component_call(
|
|
| 98 |
captured_call.kwargs = e.kwargs
|
| 99 |
|
| 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
def drain_module_parameters(module: torch.nn.Module):
|
| 102 |
-
state_dict_meta = {
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
module.load_state_dict(state_dict, assign=True)
|
|
|
|
| 105 |
for name, param in state_dict.items():
|
| 106 |
meta = state_dict_meta[name]
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
captured_call.kwargs = e.kwargs
|
| 99 |
|
| 100 |
|
| 101 |
+
# def drain_module_parameters(module: torch.nn.Module):
|
| 102 |
+
# state_dict_meta = {name: {'device': tensor.device, 'dtype': tensor.dtype} for name, tensor in module.state_dict().items()}
|
| 103 |
+
# state_dict = {name: torch.nn.Parameter(torch.empty_like(tensor, device='cpu')) for name, tensor in module.state_dict().items()}
|
| 104 |
+
# module.load_state_dict(state_dict, assign=True)
|
| 105 |
+
# for name, param in state_dict.items():
|
| 106 |
+
# meta = state_dict_meta[name]
|
| 107 |
+
# param.data = torch.Tensor([]).to(**meta)
|
| 108 |
+
|
| 109 |
def drain_module_parameters(module: torch.nn.Module):
|
| 110 |
+
state_dict_meta = {
|
| 111 |
+
name: {'device': tensor.device, 'dtype': tensor.dtype}
|
| 112 |
+
for name, tensor in module.state_dict().items()
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
state_dict = {}
|
| 116 |
+
for name, tensor in module.state_dict().items():
|
| 117 |
+
try:
|
| 118 |
+
param = torch.nn.Parameter(torch.empty_like(tensor, device='cpu'))
|
| 119 |
+
except NotImplementedError:
|
| 120 |
+
# Fallback: dequantize (or convert) if empty_like isn't implemented
|
| 121 |
+
param = torch.nn.Parameter(tensor.dequantize().to('cpu') if hasattr(tensor, 'dequantize') else tensor.to('cpu'))
|
| 122 |
+
state_dict[name] = param
|
| 123 |
+
|
| 124 |
module.load_state_dict(state_dict, assign=True)
|
| 125 |
+
|
| 126 |
for name, param in state_dict.items():
|
| 127 |
meta = state_dict_meta[name]
|
| 128 |
+
try:
|
| 129 |
+
param.data = torch.Tensor([]).to(**meta)
|
| 130 |
+
except NotImplementedError:
|
| 131 |
+
# Fallback for quantized tensors
|
| 132 |
+
param.data = (param.dequantize().to(**meta) if hasattr(param, 'dequantize') else torch.Tensor([]).to(**meta))
|