Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	Update diffusers_helper/memory.py
Browse files- diffusers_helper/memory.py +88 -15
 
    	
        diffusers_helper/memory.py
    CHANGED
    
    | 
         @@ -2,10 +2,35 @@ 
     | 
|
| 2 | 
         | 
| 3 | 
         | 
| 4 | 
         
             
            import torch
         
     | 
| 
         | 
|
| 5 | 
         | 
| 
         | 
|
| 
         | 
|
| 6 | 
         | 
| 
         | 
|
| 7 | 
         
             
            cpu = torch.device('cpu')
         
     | 
| 8 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 9 | 
         
             
            gpu_complete_modules = []
         
     | 
| 10 | 
         | 
| 11 | 
         | 
| 
         @@ -57,7 +82,11 @@ class DynamicSwapInstaller: 
     | 
|
| 57 | 
         
             
                    return
         
     | 
| 58 | 
         | 
| 59 | 
         | 
| 60 | 
         
            -
            def fake_diffusers_current_device(model: torch.nn.Module, target_device 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 61 | 
         
             
                if hasattr(model, 'scale_shift_table'):
         
     | 
| 62 | 
         
             
                    model.scale_shift_table.data = model.scale_shift_table.data.to(target_device)
         
     | 
| 63 | 
         
             
                    return
         
     | 
| 
         @@ -71,19 +100,47 @@ def fake_diffusers_current_device(model: torch.nn.Module, target_device: torch.d 
     | 
|
| 71 | 
         
             
            def get_cuda_free_memory_gb(device=None):
         
     | 
| 72 | 
         
             
                if device is None:
         
     | 
| 73 | 
         
             
                    device = gpu
         
     | 
| 74 | 
         
            -
             
     | 
| 75 | 
         
            -
                 
     | 
| 76 | 
         
            -
                 
     | 
| 77 | 
         
            -
             
     | 
| 78 | 
         
            -
                 
     | 
| 79 | 
         
            -
                 
     | 
| 80 | 
         
            -
                 
     | 
| 81 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 82 | 
         | 
| 83 | 
         | 
| 84 | 
         
             
            def move_model_to_device_with_memory_preservation(model, target_device, preserved_memory_gb=0):
         
     | 
| 85 | 
         
             
                print(f'Moving {model.__class__.__name__} to {target_device} with preserved memory: {preserved_memory_gb} GB')
         
     | 
| 86 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 87 | 
         
             
                for m in model.modules():
         
     | 
| 88 | 
         
             
                    if get_cuda_free_memory_gb(target_device) <= preserved_memory_gb:
         
     | 
| 89 | 
         
             
                        torch.cuda.empty_cache()
         
     | 
| 
         @@ -100,6 +157,21 @@ def move_model_to_device_with_memory_preservation(model, target_device, preserve 
     | 
|
| 100 | 
         
             
            def offload_model_from_device_for_memory_preservation(model, target_device, preserved_memory_gb=0):
         
     | 
| 101 | 
         
             
                print(f'Offloading {model.__class__.__name__} from {target_device} to preserve memory: {preserved_memory_gb} GB')
         
     | 
| 102 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 103 | 
         
             
                for m in model.modules():
         
     | 
| 104 | 
         
             
                    if get_cuda_free_memory_gb(target_device) >= preserved_memory_gb:
         
     | 
| 105 | 
         
             
                        torch.cuda.empty_cache()
         
     | 
| 
         @@ -115,22 +187,23 @@ def offload_model_from_device_for_memory_preservation(model, target_device, pres 
     | 
|
| 115 | 
         | 
| 116 | 
         
             
            def unload_complete_models(*args):
         
     | 
| 117 | 
         
             
                for m in gpu_complete_modules + list(args):
         
     | 
| 118 | 
         
            -
                    if m is None:
         
     | 
| 119 | 
         
            -
                        continue
         
     | 
| 120 | 
         
             
                    m.to(device=cpu)
         
     | 
| 121 | 
         
             
                    print(f'Unloaded {m.__class__.__name__} as complete.')
         
     | 
| 122 | 
         | 
| 123 | 
         
             
                gpu_complete_modules.clear()
         
     | 
| 124 | 
         
            -
                torch.cuda.empty_cache()
         
     | 
| 125 | 
         
             
                return
         
     | 
| 126 | 
         | 
| 127 | 
         | 
| 128 | 
         
             
            def load_model_as_complete(model, target_device, unload=True):
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 129 | 
         
             
                if unload:
         
     | 
| 130 | 
         
             
                    unload_complete_models()
         
     | 
| 131 | 
         | 
| 132 | 
         
             
                model.to(device=target_device)
         
     | 
| 133 | 
         
             
                print(f'Loaded {model.__class__.__name__} to {target_device} as complete.')
         
     | 
| 134 | 
         | 
| 135 | 
         
            -
                gpu_complete_modules.append(model)
         
     | 
| 136 | 
         
            -
                return
         
     | 
| 
         | 
|
| 2 | 
         | 
| 3 | 
         | 
| 4 | 
         
             
            import torch
         
     | 
| 5 | 
         
            +
            import os
         
     | 
| 6 | 
         | 
| 7 | 
         
            +
            # 检查是否在Hugging Face Space环境中
         
     | 
| 8 | 
         
            +
            IN_HF_SPACE = os.environ.get('SPACE_ID') is not None
         
     | 
| 9 | 
         | 
| 10 | 
         
            +
            # 设置CPU设备
         
     | 
| 11 | 
         
             
            cpu = torch.device('cpu')
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            # 在Stateless GPU环境中,不要在主进程初始化CUDA
         
     | 
| 14 | 
         
            +
            def get_gpu_device():
         
     | 
| 15 | 
         
            +
                if IN_HF_SPACE:
         
     | 
| 16 | 
         
            +
                    # 在Spaces中将延迟初始化GPU设备
         
     | 
| 17 | 
         
            +
                    return 'cuda'  # 返回字符串,而不是实际初始化设备
         
     | 
| 18 | 
         
            +
                
         
     | 
| 19 | 
         
            +
                # 非Spaces环境正常初始化
         
     | 
| 20 | 
         
            +
                try:
         
     | 
| 21 | 
         
            +
                    if torch.cuda.is_available():
         
     | 
| 22 | 
         
            +
                        return torch.device(f'cuda:{torch.cuda.current_device()}')
         
     | 
| 23 | 
         
            +
                    else:
         
     | 
| 24 | 
         
            +
                        print("CUDA不可用,使用CPU作为默认设备")
         
     | 
| 25 | 
         
            +
                        return torch.device('cpu')
         
     | 
| 26 | 
         
            +
                except Exception as e:
         
     | 
| 27 | 
         
            +
                    print(f"初始化CUDA设备时出错: {e}")
         
     | 
| 28 | 
         
            +
                    print("回退到CPU设备")
         
     | 
| 29 | 
         
            +
                    return torch.device('cpu')
         
     | 
| 30 | 
         
            +
             
     | 
| 31 | 
         
            +
            # 保存一个字符串表示,而不是实际的设备对象
         
     | 
| 32 | 
         
            +
            gpu = get_gpu_device()
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
             
            gpu_complete_modules = []
         
     | 
| 35 | 
         | 
| 36 | 
         | 
| 
         | 
|
| 82 | 
         
             
                    return
         
     | 
| 83 | 
         | 
| 84 | 
         | 
| 85 | 
         
            +
            def fake_diffusers_current_device(model: torch.nn.Module, target_device):
         
     | 
| 86 | 
         
            +
                # 转换字符串设备为torch.device
         
     | 
| 87 | 
         
            +
                if isinstance(target_device, str):
         
     | 
| 88 | 
         
            +
                    target_device = torch.device(target_device)
         
     | 
| 89 | 
         
            +
                    
         
     | 
| 90 | 
         
             
                if hasattr(model, 'scale_shift_table'):
         
     | 
| 91 | 
         
             
                    model.scale_shift_table.data = model.scale_shift_table.data.to(target_device)
         
     | 
| 92 | 
         
             
                    return
         
     | 
| 
         | 
|
| 100 | 
         
             
            def get_cuda_free_memory_gb(device=None):
         
     | 
| 101 | 
         
             
                if device is None:
         
     | 
| 102 | 
         
             
                    device = gpu
         
     | 
| 103 | 
         
            +
                
         
     | 
| 104 | 
         
            +
                # 如果是字符串,转换为设备
         
     | 
| 105 | 
         
            +
                if isinstance(device, str):
         
     | 
| 106 | 
         
            +
                    device = torch.device(device)
         
     | 
| 107 | 
         
            +
                
         
     | 
| 108 | 
         
            +
                # 如果不是CUDA设备,返回默认值
         
     | 
| 109 | 
         
            +
                if device.type != 'cuda':
         
     | 
| 110 | 
         
            +
                    print("无法获取非CUDA设备的内存信息,返回默认值")
         
     | 
| 111 | 
         
            +
                    return 6.0  # 返回一个默认值
         
     | 
| 112 | 
         
            +
                
         
     | 
| 113 | 
         
            +
                try:
         
     | 
| 114 | 
         
            +
                    memory_stats = torch.cuda.memory_stats(device)
         
     | 
| 115 | 
         
            +
                    bytes_active = memory_stats['active_bytes.all.current']
         
     | 
| 116 | 
         
            +
                    bytes_reserved = memory_stats['reserved_bytes.all.current']
         
     | 
| 117 | 
         
            +
                    bytes_free_cuda, _ = torch.cuda.mem_get_info(device)
         
     | 
| 118 | 
         
            +
                    bytes_inactive_reserved = bytes_reserved - bytes_active
         
     | 
| 119 | 
         
            +
                    bytes_total_available = bytes_free_cuda + bytes_inactive_reserved
         
     | 
| 120 | 
         
            +
                    return bytes_total_available / (1024 ** 3)
         
     | 
| 121 | 
         
            +
                except Exception as e:
         
     | 
| 122 | 
         
            +
                    print(f"获取CUDA内存信息时出错: {e}")
         
     | 
| 123 | 
         
            +
                    return 6.0  # 返回一个默认值
         
     | 
| 124 | 
         | 
| 125 | 
         | 
| 126 | 
         
             
            def move_model_to_device_with_memory_preservation(model, target_device, preserved_memory_gb=0):
         
     | 
| 127 | 
         
             
                print(f'Moving {model.__class__.__name__} to {target_device} with preserved memory: {preserved_memory_gb} GB')
         
     | 
| 128 | 
         | 
| 129 | 
         
            +
                # 如果是字符串,转换为设备
         
     | 
| 130 | 
         
            +
                if isinstance(target_device, str):
         
     | 
| 131 | 
         
            +
                    target_device = torch.device(target_device)
         
     | 
| 132 | 
         
            +
                
         
     | 
| 133 | 
         
            +
                # 如果gpu是字符串,转换为设备
         
     | 
| 134 | 
         
            +
                gpu_device = gpu
         
     | 
| 135 | 
         
            +
                if isinstance(gpu_device, str):
         
     | 
| 136 | 
         
            +
                    gpu_device = torch.device(gpu_device)
         
     | 
| 137 | 
         
            +
             
     | 
| 138 | 
         
            +
                # 如果目标设备是CPU或当前在CPU上,直接移动
         
     | 
| 139 | 
         
            +
                if target_device.type == 'cpu' or gpu_device.type == 'cpu':
         
     | 
| 140 | 
         
            +
                    model.to(device=target_device)
         
     | 
| 141 | 
         
            +
                    torch.cuda.empty_cache() if torch.cuda.is_available() else None
         
     | 
| 142 | 
         
            +
                    return
         
     | 
| 143 | 
         
            +
             
     | 
| 144 | 
         
             
                for m in model.modules():
         
     | 
| 145 | 
         
             
                    if get_cuda_free_memory_gb(target_device) <= preserved_memory_gb:
         
     | 
| 146 | 
         
             
                        torch.cuda.empty_cache()
         
     | 
| 
         | 
|
| 157 | 
         
             
            def offload_model_from_device_for_memory_preservation(model, target_device, preserved_memory_gb=0):
         
     | 
| 158 | 
         
             
                print(f'Offloading {model.__class__.__name__} from {target_device} to preserve memory: {preserved_memory_gb} GB')
         
     | 
| 159 | 
         | 
| 160 | 
         
            +
                # 如果是字符串,转换为设备
         
     | 
| 161 | 
         
            +
                if isinstance(target_device, str):
         
     | 
| 162 | 
         
            +
                    target_device = torch.device(target_device)
         
     | 
| 163 | 
         
            +
                
         
     | 
| 164 | 
         
            +
                # 如果gpu是字符串,转换为设备
         
     | 
| 165 | 
         
            +
                gpu_device = gpu
         
     | 
| 166 | 
         
            +
                if isinstance(gpu_device, str):
         
     | 
| 167 | 
         
            +
                    gpu_device = torch.device(gpu_device)
         
     | 
| 168 | 
         
            +
             
     | 
| 169 | 
         
            +
                # 如果目标设备是CPU或当前在CPU上,直接处理
         
     | 
| 170 | 
         
            +
                if target_device.type == 'cpu' or gpu_device.type == 'cpu':
         
     | 
| 171 | 
         
            +
                    model.to(device=cpu)
         
     | 
| 172 | 
         
            +
                    torch.cuda.empty_cache() if torch.cuda.is_available() else None
         
     | 
| 173 | 
         
            +
                    return
         
     | 
| 174 | 
         
            +
             
     | 
| 175 | 
         
             
                for m in model.modules():
         
     | 
| 176 | 
         
             
                    if get_cuda_free_memory_gb(target_device) >= preserved_memory_gb:
         
     | 
| 177 | 
         
             
                        torch.cuda.empty_cache()
         
     | 
| 
         | 
|
| 187 | 
         | 
| 188 | 
         
             
            def unload_complete_models(*args):
         
     | 
| 189 | 
         
             
                for m in gpu_complete_modules + list(args):
         
     | 
| 
         | 
|
| 
         | 
|
| 190 | 
         
             
                    m.to(device=cpu)
         
     | 
| 191 | 
         
             
                    print(f'Unloaded {m.__class__.__name__} as complete.')
         
     | 
| 192 | 
         | 
| 193 | 
         
             
                gpu_complete_modules.clear()
         
     | 
| 194 | 
         
            +
                torch.cuda.empty_cache() if torch.cuda.is_available() else None
         
     | 
| 195 | 
         
             
                return
         
     | 
| 196 | 
         | 
| 197 | 
         | 
| 198 | 
         
             
            def load_model_as_complete(model, target_device, unload=True):
         
     | 
| 199 | 
         
            +
                # 如果是字符串,转换为设备
         
     | 
| 200 | 
         
            +
                if isinstance(target_device, str):
         
     | 
| 201 | 
         
            +
                    target_device = torch.device(target_device)
         
     | 
| 202 | 
         
            +
                    
         
     | 
| 203 | 
         
             
                if unload:
         
     | 
| 204 | 
         
             
                    unload_complete_models()
         
     | 
| 205 | 
         | 
| 206 | 
         
             
                model.to(device=target_device)
         
     | 
| 207 | 
         
             
                print(f'Loaded {model.__class__.__name__} to {target_device} as complete.')
         
     | 
| 208 | 
         | 
| 209 | 
         
            +
                gpu_complete_modules.append(model)
         
     | 
| 
         |