Spaces:

DawnC
/

PawMatchAI

Running on Zero

App Files Files Community

DawnC commited on Dec 19, 2024

Commit

83bc690

1 Parent(s): 282a8a2

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -7

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import gradio as gr
 import time
 import traceback
 import spaces
-from torchvision.models import efficientnet_v2_m, EfficientNet_V2_M_Weights
 from torchvision.ops import nms, box_iou
 import torch.nn.functional as F
 from torchvision import transforms
@@ -98,29 +98,61 @@ class MultiHeadAttention(nn.Module):
         return out
 class BaseModel(nn.Module):
     def __init__(self, num_classes, device='cuda' if torch.cuda.is_available() else 'cpu'):
         super().__init__()
         self.device = device
-        self.backbone = efficientnet_v2_m(weights=EfficientNet_V2_M_Weights.IMAGENET1K_V1)
-        self.feature_dim = self.backbone.classifier[1].in_features
-        self.backbone.classifier = nn.Identity()
         self.num_heads = max(1, min(8, self.feature_dim // 64))
         self.attention = MultiHeadAttention(self.feature_dim, num_heads=self.num_heads)
         self.classifier = nn.Sequential(
             nn.LayerNorm(self.feature_dim),
             nn.Dropout(0.3),
             nn.Linear(self.feature_dim, num_classes)
         )
-        self.to(device)
     def forward(self, x):
         x = x.to(self.device)
         features = self.backbone(x)
         attended_features = self.attention(features)
         logits = self.classifier(attended_features)
         return logits, attended_features
@@ -179,7 +211,7 @@ class ModelManager:
             ).to(self.device)
             checkpoint = torch.load(
-                '124_best_model_dog.pth',
                 map_location=self.device  # 確保checkpoint加載到正確的設備
             )
             self._breed_model.load_state_dict(checkpoint['base_model'], strict=False)

 import time
 import traceback
 import spaces
+from torchvision.models import convnext_base, ConvNeXt_Base_Weights
 from torchvision.ops import nms, box_iou
 import torch.nn.functional as F
 from torchvision import transforms
         return out
 class BaseModel(nn.Module):
     def __init__(self, num_classes, device='cuda' if torch.cuda.is_available() else 'cpu'):
         super().__init__()
         self.device = device
+        # 1. 初始化 backbone
+        self.backbone = convnext_base(weights=ConvNeXt_Base_Weights.IMAGENET1K_V1)
+        self.backbone.classifier = nn.Identity()  # 移除原始分類器
+        # 2. 使用測試數據確定實際的特徵維度
+        with torch.no_grad():  # 不需要計算梯度
+            dummy_input = torch.randn(1, 3, 224, 224)  # 創建示例輸入
+            features = self.backbone(dummy_input)
+            if len(features.shape) > 2:  # 如果特徵是多維的
+                features = features.mean([-2, -1])  # 進行全局平均池化
+            self.feature_dim = features.shape[1]  # 獲取正確的特徵維度
+        print(f"Feature Dim: {self.feature_dim}")  # 幫助調試
+        # 3. 設置多頭注意力層
         self.num_heads = max(1, min(8, self.feature_dim // 64))
         self.attention = MultiHeadAttention(self.feature_dim, num_heads=self.num_heads)
+        # 4. 設置分類器
         self.classifier = nn.Sequential(
             nn.LayerNorm(self.feature_dim),
             nn.Dropout(0.3),
             nn.Linear(self.feature_dim, num_classes)
         )
     def forward(self, x):
+        """
+        模型的前向傳播過程
+        Args:
+            x (Tensor): 輸入圖像張量，形狀為 [batch_size, channels, height, width]
+        Returns:
+            Tuple[Tensor, Tensor]: 分類邏輯值和注意力特徵
+        """
         x = x.to(self.device)
+        # 1. 提取基礎特徵
         features = self.backbone(x)
+        # 2. 處理特徵維度
+        if len(features.shape) > 2:
+            # 如果特徵維度是 [batch_size, channels, height, width]
+            # 轉換為 [batch_size, channels]
+            features = features.mean([-2, -1])  # 使用全局平均池化
+        # 3. 應用注意力機制
         attended_features = self.attention(features)
+        # 4. 最終分類
         logits = self.classifier(attended_features)
         return logits, attended_features
             ).to(self.device)
             checkpoint = torch.load(
+                'ConvNextBase_best_model_dog.pth',
                 map_location=self.device  # 確保checkpoint加載到正確的設備
             )
             self._breed_model.load_state_dict(checkpoint['base_model'], strict=False)