Spaces:

telecomadm1145
/

AIDetectV2

Running

App Files Files Community

telecomadm1145 commited on Nov 7

Commit

b6adf0f

verified ·

1 Parent(s): 94778bf

Update app.py

Browse files

Files changed (1) hide show

app.py +189 -10

app.py CHANGED Viewed

@@ -5,6 +5,7 @@
 • Swin-V7 / V8 / V9                     : 4-class  (photo / anime × AI / Non-AI)
 • CAFormer-V2.5                         : 4-class  (photo / anime × AI / Non-AI)
 • V3-Emb                                : 2-class  (AI vs. Non-AI)
 -------------------------------------------------------------------
 """
 import os, torch, timm, numpy as np
@@ -24,7 +25,8 @@ HF_FILENAMES = {
     "V2":                 "swin_classifier_stage1_v2_epoch_3.pth",
     "V4":                 "swin_classifier_stage1_v4.pth",
     "V9":                 "swin_classifier_4class_fp16_v9_acc9861.pth",
-    "V3-Emb": "swinv2_v3_v3.pth"
 }
 CKPT_META = {
     "V2": { "n_cls": 2, "head": "v4", "backbone": "swin_large_patch4_window12_384",
@@ -42,9 +44,21 @@ CKPT_META = {
         "backbone_repo_id": "SmilingWolf/wd-swinv2-tagger-v3",
         "backbone_filename": "model.safetensors",
         "names": ["Non-AI Generated", "AI Generated"]
     }
 }
-DEFAULT_CKPT = "V3-Emb"
 LOCAL_CKPT_DIR = "./checkpoints"
 SEED = 4421
 DROP_RATE = 0.1
@@ -55,6 +69,7 @@ print(f"Using device: {device}")
 model, current_ckpt = None, None
 current_meta = None
 class EmbeddingClassifier(nn.Module):
     def __init__(self, input_dim=1024, hidden_dim1=4096, hidden_dim2=256, output_dim=1):
         super().__init__()
@@ -67,12 +82,13 @@ class EmbeddingClassifier(nn.Module):
             nn.LayerNorm(hidden_dim2),
             nn.GELU(),
             nn.Dropout(0.4),
-            nn.Linear(hidden_dim2, output_dim),
-            nn.Sigmoid(),
         )
     def forward(self, x):
-        return self.net(x)
 class EmbeddingClassifierModel(nn.Module):
     def __init__(self, timm_model_name, num_classes):
         super().__init__()
@@ -82,10 +98,134 @@ class EmbeddingClassifierModel(nn.Module):
     def forward(self, x):
         features = self.backbone(x)
-        prob_class0 = self.classifier(features)
         prob_class1 = 1 - prob_class0
         return torch.cat([prob_class0, prob_class1], dim=1)
 class SwinClassifier(nn.Module):
     def __init__(self, model_name, num_classes, pretrained=True,
                  head_version="v4"):
@@ -141,6 +281,8 @@ def load_model(ckpt_name: str):
     ckpt_filename = HF_FILENAMES[ckpt_name]
     head_version = meta.get("head", "v4")
     if head_version == "embedding_classifier":
         print(f"Creating backbone: {meta['timm_model_name']}")
         model = EmbeddingClassifierModel(
@@ -167,7 +309,40 @@ def load_model(ckpt_name: str):
         classifier_state = torch.load(classifier_ckpt_file, map_location=device, weights_only=False)
         model.classifier.load_state_dict(classifier_state)
         print("✅ Classifier head weights loaded.")
     else:
         ckpt_file = hf_hub_download(
             repo_id=REPO_ID,
@@ -235,9 +410,12 @@ def predict(image: Image.Image,
         tfm = build_transform(False, interpolation)
         inp = tfm(image).unsqueeze(0).to(device)
-    if current_meta["head"] == "embedding_classifier":
         probs = model(inp)[0].cpu()
     else:
         probs = F.softmax(model(inp), dim=1)[0].cpu()
     class_names = current_meta["names"]
@@ -251,13 +429,13 @@ def launch():
         gr.Markdown("# AI Detector")
         gr.Markdown(
             "Choose a model checkpoint on the left, upload an image, "
-            "and click **Run** to see predictions. V3-Emb produces the best results."
         )
         with gr.Row():
             with gr.Column(scale=1):
                 run_btn = gr.Button("🚀 Run", variant="primary")
                 sel_ckpt = gr.Dropdown(
-                    list(HF_FILENAMES.keys()),
                     value=DEFAULT_CKPT, label="Checkpoint"
                 )
                 sel_interp = gr.Radio(
@@ -289,4 +467,5 @@ def launch():
     demo.launch()
 if __name__ == "__main__":
-    launch()

 • Swin-V7 / V8 / V9                     : 4-class  (photo / anime × AI / Non-AI)
 • CAFormer-V2.5                         : 4-class  (photo / anime × AI / Non-AI)
 • V3-Emb                                : 2-class  (AI vs. Non-AI)
+• V3-Emb-MoE (新)                       : 2-class  (AI vs. Non-AI, MoE Head)
 -------------------------------------------------------------------
 """
 import os, torch, timm, numpy as np
     "V2":                 "swin_classifier_stage1_v2_epoch_3.pth",
     "V4":                 "swin_classifier_stage1_v4.pth",
     "V9":                 "swin_classifier_4class_fp16_v9_acc9861.pth",
+    "V3-Emb":             "swinv2_v3_v3.pth",
+    "V3-Emb-MoE":         "smoe_emb.pth" # <-- 新增 MoE 模型文件
 }
 CKPT_META = {
     "V2": { "n_cls": 2, "head": "v4", "backbone": "swin_large_patch4_window12_384",
         "backbone_repo_id": "SmilingWolf/wd-swinv2-tagger-v3",
         "backbone_filename": "model.safetensors",
         "names": ["Non-AI Generated", "AI Generated"]
+    },
+    # <-- 新增 MoE 模型元数据 -->
+    "V3-Emb-MoE": {
+        "n_cls": 2,
+        "head": "moe_embedding_classifier", # 新的 head 类型
+        "timm_model_name": "hf_hub:SmilingWolf/wd-swinv2-tagger-v3",
+        "backbone_repo_id": "SmilingWolf/wd-swinv2-tagger-v3",
+        "backbone_filename": "model.safetensors",
+        "names": ["Non-AI Generated", "AI Generated"],
+        "num_experts": 16, # <-- MoE 特定参数
+        "moe_hidden_dim": 1024, # <-- MoE 特定参数
+        "top_k": 2 # 假设 top_k=2，与训练脚本一致
     }
 }
+DEFAULT_CKPT = "V3-Emb-MoE" # <-- 默认为新的 MoE 模型
 LOCAL_CKPT_DIR = "./checkpoints"
 SEED = 4421
 DROP_RATE = 0.1
 model, current_ckpt = None, None
 current_meta = None
+# --- 标准分类头 (V3-Emb) ---
 class EmbeddingClassifier(nn.Module):
     def __init__(self, input_dim=1024, hidden_dim1=4096, hidden_dim2=256, output_dim=1):
         super().__init__()
             nn.LayerNorm(hidden_dim2),
             nn.GELU(),
             nn.Dropout(0.4),
+            nn.Linear(hidden_dim2, output_dim)
+            # <-- 修改: 移除了 nn.Sigmoid()，包装器将处理激活
         )
     def forward(self, x):
+        return self.net(x) # 输出 logits
+# --- 标准分类头包装器 (V3-Emb) ---
 class EmbeddingClassifierModel(nn.Module):
     def __init__(self, timm_model_name, num_classes):
         super().__init__()
     def forward(self, x):
         features = self.backbone(x)
+        logits = self.classifier(features) # 获取 logits
+        # <-- 修改: 在此处应用 sigmoid 将 logits 转为 prob ---
+        prob_class0 = torch.sigmoid(logits)
+        prob_class1 = 1 - prob_class0
+        return torch.cat([prob_class0, prob_class1], dim=1)
+# --- 新增: MoE 模型定义 (V3-Emb-MoE) ---
+class Expert(nn.Module):
+    def __init__(self, input_dim, hidden_dim, output_dim, dropout=0.4):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, output_dim)
+        )
+    def forward(self, x):
+        return self.net(x)
+class SparseMoE(nn.Module):
+    def __init__(self, input_dim, num_experts, top_k, expert_hidden_dim, load_balancing_alpha=1e-2):
+        super().__init__()
+        self.input_dim = input_dim
+        self.num_experts = num_experts
+        self.top_k = top_k
+        self.load_balancing_alpha = load_balancing_alpha
+        self.gate = nn.Linear(input_dim, num_experts)
+        self.experts = nn.ModuleList([
+            Expert(input_dim, expert_hidden_dim, input_dim) for _ in range(num_experts)
+        ])
+    def forward(self, x):
+        batch_size, _ = x.shape
+        gate_logits = self.gate(x)
+        gate_probs = torch.softmax(gate_logits, dim=-1)
+        top_k_weights, top_k_indices = torch.topk(gate_probs, self.top_k, dim=-1)
+        top_k_weights = top_k_weights / torch.sum(top_k_weights, dim=-1, keepdim=True)
+        # 辅助损失 (仅在训练时重要)
+        tokens_per_expert_onehot = nn.functional.one_hot(top_k_indices, self.num_experts).sum(dim=1).float()
+        f_i = tokens_per_expert_onehot.mean(dim=0)
+        P_i = gate_probs.mean(dim=0)
+        aux_loss = self.load_balancing_alpha * self.num_experts * torch.mean(f_i * P_i)
+        expanded_x = x.unsqueeze(1).expand(-1, self.top_k, -1)
+        flat_x = expanded_x.flatten(0, 1)
+        flat_top_k_indices = top_k_indices.flatten()
+        flat_output = torch.zeros_like(flat_x)
+        for i in range(self.num_experts):
+            mask = (flat_top_k_indices == i)
+            if mask.any():
+                expert_inputs = flat_x[mask]
+                expert_outputs = self.experts[i](expert_inputs)
+                flat_output[mask] = expert_outputs
+        expert_outputs_grouped = flat_output.view(batch_size, self.top_k, self.input_dim)
+        weighted_outputs = top_k_weights.unsqueeze(-1) * expert_outputs_grouped
+        final_output = torch.sum(weighted_outputs, dim=1)
+        return final_output, aux_loss
+class MoEClassifier(nn.Module):
+    def __init__(self, input_dim=1024, output_dim=1, num_experts=8, top_k=2,
+                 moe_hidden_dim=2048, head_hidden_dim=256, load_balancing_alpha=1e-2):
+        super().__init__()
+        self.input_dim = input_dim
+        self.num_experts = num_experts
+        self.top_k = top_k
+        self.moe_hidden_dim = moe_hidden_dim
+        self.head_hidden_dim = head_hidden_dim
+        self.load_balancing_alpha = load_balancing_alpha
+        self.pre_moe_net = nn.Sequential(
+            nn.Linear(input_dim, input_dim),
+            nn.LayerNorm(input_dim),
+            nn.GELU()
+        )
+        self.moe_layer = SparseMoE(
+            input_dim=input_dim,
+            num_experts=num_experts,
+            top_k=top_k,
+            expert_hidden_dim=moe_hidden_dim,
+            load_balancing_alpha=load_balancing_alpha
+        )
+        self.moe_ln = nn.LayerNorm(input_dim)
+        self.moe_dropout = nn.Dropout(0.4)
+        self.head = nn.Sequential(
+            nn.Linear(input_dim, head_hidden_dim),
+            nn.LayerNorm(head_hidden_dim),
+            nn.GELU(),
+            nn.Dropout(0.4),
+            nn.Linear(head_hidden_dim, output_dim) # 输出 logits
+        )
+    def forward(self, x):
+        pre_moe_out = self.pre_moe_net(x)
+        moe_input = pre_moe_out
+        moe_output, aux_loss = self.moe_layer(moe_input)
+        moe_output = self.moe_dropout(moe_output)
+        post_moe = self.moe_ln(moe_output + moe_input)
+        logits = self.head(post_moe)
+        return logits, aux_loss
+# --- 新增: MoE 分类头包装器 (V3-Emb-MoE) ---
+class MoEEmbeddingClassifierModel(nn.Module):
+    def __init__(self, timm_model_name, num_classes, num_experts, moe_hidden_dim, top_k=2):
+        super().__init__()
+        self.backbone = timm.create_model(timm_model_name, pretrained=False, num_classes=0)
+        self.data_config = timm.data.resolve_data_config({}, model=self.backbone)
+        # 使用 MoEClassifier 作为分类头
+        self.classifier = MoEClassifier(
+            input_dim=self.backbone.num_features,
+            output_dim=1, # 2-class (AI vs Non-AI)
+            num_experts=num_experts,
+            top_k=top_k,
+            moe_hidden_dim=moe_hidden_dim,
+            head_hidden_dim=256 # 保持与 V3-Emb 的 head_hidden_dim 一致
+        )
+    def forward(self, x):
+        features = self.backbone(x)
+        logits, aux_loss = self.classifier(features) # MoE 返回 (logits, aux_loss)
+        # 推理时我们只关心 logits
+        prob_class0 = torch.sigmoid(logits)
         prob_class1 = 1 - prob_class0
         return torch.cat([prob_class0, prob_class1], dim=1)
 class SwinClassifier(nn.Module):
     def __init__(self, model_name, num_classes, pretrained=True,
                  head_version="v4"):
     ckpt_filename = HF_FILENAMES[ckpt_name]
     head_version = meta.get("head", "v4")
+    # --- 修改: 扩展加载逻辑 ---
     if head_version == "embedding_classifier":
         print(f"Creating backbone: {meta['timm_model_name']}")
         model = EmbeddingClassifierModel(
         classifier_state = torch.load(classifier_ckpt_file, map_location=device, weights_only=False)
         model.classifier.load_state_dict(classifier_state)
         print("✅ Classifier head weights loaded.")
+    # --- 新增: MoE 加载逻辑 ---
+    elif head_version == "moe_embedding_classifier":
+        print(f"Creating MoE model with backbone: {meta['timm_model_name']}")
+        model = MoEEmbeddingClassifierModel(
+            timm_model_name=meta["timm_model_name"],
+            num_classes=meta["n_cls"],
+            num_experts=meta["num_experts"],
+            moe_hidden_dim=meta["moe_hidden_dim"],
+            top_k=meta.get("top_k", 2) # 从 meta 或 默认值
+        ).to(device)
+        print(f"Loading backbone weights from {meta['backbone_repo_id']}...")
+        backbone_ckpt_file = hf_hub_download(
+            repo_id=meta["backbone_repo_id"],
+            filename=meta["backbone_filename"],
+            local_dir=LOCAL_CKPT_DIR, force_download=False
+        )
+        backbone_state = load_file(backbone_ckpt_file, device=device)
+        model.backbone.load_state_dict(backbone_state,strict=False)
+        print("✅ Backbone weights loaded.")
+        print(f"Loading MoE classifier head weights from {REPO_ID}...")
+        classifier_ckpt_file = hf_hub_download(
+            repo_id=REPO_ID,
+            filename=ckpt_filename,
+            local_dir=LOCAL_CKPT_DIR, force_download=False
+        )
+        # 假设 MoE 头部保存的也是 state_dict
+        classifier_state = torch.load(classifier_ckpt_file, map_location=device, weights_only=False)
+        model.classifier.load_state_dict(classifier_state)
+        print("✅ MoE Classifier head weights loaded.")
+    # --- 原始 Swin 加载逻辑 ---
     else:
         ckpt_file = hf_hub_download(
             repo_id=REPO_ID,
         tfm = build_transform(False, interpolation)
         inp = tfm(image).unsqueeze(0).to(device)
+    # --- 修改: 扩展 logits/prob 处理 ---
+    # V3-Emb 和 V3-Emb-MoE 包装器都已在其 forward 中转换为 2 类概率
+    if current_meta["head"] in ["embedding_classifier", "moe_embedding_classifier"]:
         probs = model(inp)[0].cpu()
     else:
+        # 其他模型 (V2, V4, V9, CAFormer) 输出 logits，需要 softmax
         probs = F.softmax(model(inp), dim=1)[0].cpu()
     class_names = current_meta["names"]
         gr.Markdown("# AI Detector")
         gr.Markdown(
             "Choose a model checkpoint on the left, upload an image, "
+            "and click **Run** to see predictions. V3-Emb-MoE produces the best results."
         )
         with gr.Row():
             with gr.Column(scale=1):
                 run_btn = gr.Button("🚀 Run", variant="primary")
                 sel_ckpt = gr.Dropdown(
+                    list(HF_FILENAMES.keys()), # 自动包含 "V3-Emb-MoE"
                     value=DEFAULT_CKPT, label="Checkpoint"
                 )
                 sel_interp = gr.Radio(
     demo.launch()
 if __name__ == "__main__":
+    launch()