Spaces:

DawnC
/

PawMatchAI

Running on Zero

App Files Files Community

DawnC commited on Nov 28, 2024

Commit

bf1a76e

1 Parent(s): 4331937

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -40

app.py CHANGED Viewed

@@ -538,46 +538,34 @@ from urllib.parse import quote
 from ultralytics import YOLO
 import asyncio
 import traceback
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-os.environ['HF_ZERO_GPU'] = '1'  # 明確告訴系統我們要使用 ZeroGPU
-os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
 def get_device():
     print("Initializing device configuration...")
-    # 特別針對 ZeroGPU 的檢測邏輯
-    if 'HF_ZERO_GPU' in os.environ and torch.cuda.is_available():
-        try:
-            # 強制進行 CUDA 初始化
-            torch.cuda.init()
-            # 等待一小段時間讓系統完成初始化
-            import time
-            time.sleep(2)
             device = torch.device('cuda')
-            # 執行一個小的測試來確認 GPU 功能
-            test_tensor = torch.rand(1).to(device)
-            _ = test_tensor * test_tensor
-            print("ZeroGPU initialization successful")
-            print(f"Using device: {device}")
-            if torch.cuda.is_available():
-                print(f"GPU: {torch.cuda.get_device_name(0)}")
             return device
-        except Exception as e:
-            print(f"ZeroGPU initialization failed: {str(e)}")
-            print("Falling back to CPU")
-            return torch.device('cpu')
-    else:
-        if not torch.cuda.is_available():
-            print("CUDA not available, using CPU")
-        elif 'HF_ZERO_GPU' not in os.environ:
-            print("HF_ZERO_GPU not set, using CPU")
-        return torch.device('cpu')
 device = get_device()
@@ -670,18 +658,60 @@ class BaseModel(nn.Module):
         logits = self.classifier(attended_features)
         return logits, attended_features
 # Initialize model
 num_classes = len(dog_breeds)
-# Initialize base model
 model = BaseModel(num_classes=num_classes, device=device)
-# Load model path
-model_path = "124_best_model_dog.pth"
-checkpoint = torch.load(model_path, map_location=device)
-# Load model state
-model.load_state_dict(checkpoint['base_model'], strict=False)
 model.eval()
 # Image preprocessing function

 from ultralytics import YOLO
 import asyncio
 import traceback
+import spaces
+import torch.cuda.amp
+# os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+# os.environ['HF_ZERO_GPU'] = '1'  # 明確告訴系統我們要使用 ZeroGPU
+# os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
+@spaces.GPU
 def get_device():
     print("Initializing device configuration...")
+    try:
+        # 強制進行 CUDA 初始化
+        torch.cuda.init()
+        # 使用 mixed precision
+        torch.set_float32_matmul_precision('medium')
+        if torch.cuda.is_available():
             device = torch.device('cuda')
+            # 設置默認的 CUDA 設備
+            torch.cuda.set_device(device)
+            print(f"Successfully initialized CUDA device")
             return device
+    except Exception as e:
+        print(f"GPU initialization error: {str(e)}")
+    print("Using CPU fallback")
+    return torch.device('cpu')
 device = get_device()
         logits = self.classifier(attended_features)
         return logits, attended_features
+def load_model(model_path, model_instance, device):
+    """
+    優化的模型載入函數，支援 ZeroGPU 和混合精度計算
+    Args:
+        model_path: 模型檔案的路徑
+        model_instance: BaseModel 的實例
+        device: 計算設備（CPU 或 GPU）
+    Returns:
+        載入權重後的模型實例
+    """
+    try:
+        print(f"正在將模型載入到設備: {device}")
+        # 使用混合精度計算來優化記憶體使用
+        with torch.cuda.amp.autocast(enabled=device.type == 'cuda'):
+            # 載入檢查點，使用 weights_only=True 來避免警告
+            checkpoint = torch.load(
+                model_path,
+                map_location=device,
+                weights_only=True
+            )
+            # 載入模型權重
+            model_instance.load_state_dict(checkpoint['base_model'], strict=False)
+            # 確保模型在正確的設備上
+            if device.type == 'cuda':
+                model_instance = model_instance.to(device)
+            # 設置為評估模式
+            model_instance.eval()
+            print("模型載入成功")
+            return model_instance
+    except Exception as e:
+        print(f"模型載入出錯: {str(e)}")
+        print("嘗試使用基本載入方式...")
+        # 如果優化載入失敗，退回到基本載入方式
+        checkpoint = torch.load(model_path, map_location=device)
+        model_instance.load_state_dict(checkpoint['base_model'], strict=False)
+        model_instance.eval()
+        return model_instance
 # Initialize model
 num_classes = len(dog_breeds)
 model = BaseModel(num_classes=num_classes, device=device)
+# 使用優化後的載入函數
+model = load_model("124_best_model_dog.pth", model, device)
 model.eval()
 # Image preprocessing function