Spaces:

telecomadm1145
/

AIDetectV2

Running

App Files Files Community

telecomadm1145 commited on Jul 21

Commit

f014c13

verified ·

1 Parent(s): af382d3

Update app.py

Browse files

Files changed (1) hide show

app.py +148 -130

app.py CHANGED Viewed

@@ -1,217 +1,235 @@
 # -*- coding: utf-8 -*-
 """
-Swin-Large AI vs. Non-AI Detector (with Model Selection & Attention Visualization) - V5 Update
 """
-import os
-import math
-import torch
-import torch.nn.functional as F
 import torch.nn as nn
-import timm
-import numpy as np
 from PIL import Image
 import gradio as gr
-import matplotlib.pyplot as plt
 from huggingface_hub import hf_hub_download
-# --- Configuration ---------------------------------------------------------
-REPO_ID = "telecomadm1145/swin-ai-detection"
 HF_FILENAMES = {
-    "V2": "swin_classifier_stage1_v2_epoch_3.pth",
-    "V4": "swin_classifier_stage1_v4.pth",
-    "V5(underfitting)": "swin_classifier_stage1_v5_fp16.pth",
 }
-DEFAULT_CKPT = "V4" # Set V5 as the new default
 LOCAL_CKPT_DIR = "./checkpoints"
-MODEL_NAME = "swin_large_patch4_window12_384"
-NUM_CLASSES = 2
 SEED = 4421
-dropout_rate = 0.1
-class_names = ["Non-AI Generated", "AI Generated"] # 0, 1
 device = "cuda" if torch.cuda.is_available() else "cpu"
-torch.manual_seed(SEED)
-np.random.seed(SEED)
 print(f"Using device: {device}")
-# --- Global model state ----------------------------------------------------
-model = None
-current_ckpt_name = None
-attention_maps = [] # To store hooked attention maps
-# ---------------------------------------------------------------------------
-# 1. 模型结构 (Model Structure)
 class SwinClassifier(nn.Module):
-    """
-    Swin Transformer based classifier.
-    The MLP head can be configured for different model versions (V2/V4 vs. V5).
-    """
-    def __init__(self, model_name, num_classes, pretrained=True, classifier_version='v4'):
         super().__init__()
-        self.backbone = timm.create_model(model_name, pretrained=pretrained,
-                                          num_classes=0) # Get features only
         self.data_config = timm.data.resolve_data_config({}, model=self.backbone)
-        # Select classifier head based on version
-        if classifier_version == 'v5':
-            print("Using V5 classifier head with GELU activation.")
             self.classifier = nn.Sequential(
-                nn.Dropout(dropout_rate),
                 nn.Linear(self.backbone.num_features, 512),
                 nn.BatchNorm1d(512),
-                nn.GELU(),  # V5 uses GELU
-                nn.Dropout(dropout_rate * 0.7),
                 nn.Linear(512, 128),
                 nn.BatchNorm1d(128),
-                nn.GELU(),  # V5 uses GELU
-                nn.Dropout(dropout_rate * 0.5),
-                nn.Linear(128, num_classes)
             )
-        else:
-            print("Using V2/V4 classifier head with ReLU activation.")
             self.classifier = nn.Sequential(
-                nn.Dropout(dropout_rate),
                 nn.Linear(self.backbone.num_features, 512),
                 nn.BatchNorm1d(512),
                 nn.ReLU(),
-                nn.Dropout(dropout_rate * 0.7),
                 nn.Linear(512, 128),
                 nn.BatchNorm1d(128),
                 nn.ReLU(),
-                nn.Dropout(dropout_rate * 0.5),
-                nn.Linear(128, num_classes)
             )
     def forward(self, x):
-        feats = self.backbone(x)
-        return self.classifier(feats)
-# ---------------------------------------------------------------------------
-# 2. 动态模型加载函数 (Dynamic Model Loading Function)
 def load_model(ckpt_name: str):
-    """
-    Dynamically loads the selected model checkpoint.
-    If the model is already loaded, it does nothing.
-    It selects the correct classifier head based on the checkpoint name.
-    """
-    global model, current_ckpt_name
-    if ckpt_name == current_ckpt_name and model is not None:
-        #print(f"✅ Model '{ckpt_name}' is already loaded.")
-        return
-    print(f"🔄 Switching to model: '{ckpt_name}'...")
-    hf_filename = HF_FILENAMES[ckpt_name]
-    print("⏬ Downloading / caching checkpoint if needed…")
-    ckpt_path = hf_hub_download(
         repo_id=REPO_ID,
-        filename=hf_filename,
-        local_dir=LOCAL_CKPT_DIR,
-        force_download=False
     )
-    print(f"Checkpoint path: {ckpt_path}")
-    # Determine which classifier version to use based on the checkpoint name
-    classifier_version = 'v5' if 'V5' in ckpt_name else 'v4'
-    # Instantiate and load weights
     model = SwinClassifier(
         MODEL_NAME,
-        NUM_CLASSES,
-        pretrained=False,
-        classifier_version=classifier_version
     ).to(device)
-    state = torch.load(ckpt_path, map_location=device, weights_only=False)
     model.load_state_dict(state.get("model_state_dict", state), strict=True)
     model.eval()
-    current_ckpt_name = ckpt_name
-    print(f"✅ Model '{ckpt_name}' loaded successfully.")
-# ---------------------------------------------------------------------------
-# 3. torchvision / timm transform 工厂函数 (Transform Factory Function)
 def build_transform(is_training: bool, interpolation: str):
-    if model is None:
-        raise RuntimeError("Model is not loaded. Please call load_model() first.")
     cfg = model.data_config.copy()
     cfg.update(dict(interpolation=interpolation))
     return timm.data.create_transform(**cfg, is_training=is_training)
-# ---------------------------------------------------------------------------
-# 5. 推理 (Inference)
-def predict_and_visualize(image_pil: Image.Image,
-                          ckpt_name: str,
-                          interpolation: str = "bicubic"):
-    if image_pil is None:
-        return None, None
-    load_model(ckpt_name)
-    transform = build_transform(is_training=False, interpolation=interpolation)
-    input_tensor = transform(image_pil).unsqueeze(0).to(device)
-    with torch.no_grad():
-        logits = model(input_tensor)
-    probs = F.softmax(logits, dim=1)[0]
-    confidences = {class_names[i]: float(probs[i]) for i in range(NUM_CLASSES)}
-    return confidences
-# ---------------------------------------------------------------------------
-# 6. Gradio UI
-def launch_app():
-    # Pre-load the default model on startup
-    load_model(DEFAULT_CKPT)
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
-        gr.Markdown("# 🖼️ AI vs. Non-AI Image Classifier")
-        gr.Markdown("Using Swin-Large Transformer with multiple model checkpoints.")
         with gr.Row():
             with gr.Column(scale=1):
                 run_btn = gr.Button("🚀 Run", variant="primary")
-                model_choice = gr.Dropdown(
-                    list(HF_FILENAMES.keys()), value=DEFAULT_CKPT, label="Select Model"
                 )
-                interp_choice = gr.Radio(
-                    ["bilinear", "bicubic", "nearest"], value="bicubic",
-                    label="Resize Interpolation (Preprocessing)"
                 )
-                in_img = gr.Image(type="pil", label="Upload an Image")
-            with gr.Column(scale=2):
-                out_lbl = gr.Label(num_top_classes=2, label="Predictions")
         run_btn.click(
-            predict_and_visualize,
-            inputs=[in_img, model_choice, interp_choice],
             outputs=[out_lbl]
         )
-        # Create a dummy examples directory if it doesn't exist
-        example_dir = "examples"
-        if not os.path.exists(example_dir):
-            os.makedirs(example_dir)
-            print(f"Created '{example_dir}' directory. Please add sample images there for UI examples.")
-        # Check for example files before creating the component
-        example_files = [os.path.join(example_dir, f) for f in os.listdir(example_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
         if example_files:
             gr.Examples(
                 examples=[[f, DEFAULT_CKPT, "bicubic"] for f in example_files],
-                inputs=[in_img, model_choice, interp_choice],
                 outputs=[out_lbl],
-                fn=predict_and_visualize,
                 cache_examples=False,
             )
     demo.launch()
-# ---------------------------------------------------------------------------
 if __name__ == "__main__":
-    launch_app()

 # -*- coding: utf-8 -*-
 """
+Swin-Large AI / Non-AI   ‑- now with V7 (4-class) support
+-------------------------------------------------------------------
+• V2 / V4 / V5(underfitting)          : 2-class  (photo-style  AI vs. Non-AI)
+• NEW  V7                             : 4-class  (photo / anime × AI / Non-AI)
+-------------------------------------------------------------------
+Author : you 😊
 """
+import os, torch, timm, math, numpy as np
 import torch.nn as nn
+import torch.nn.functional as F
 from PIL import Image
 import gradio as gr
 from huggingface_hub import hf_hub_download
+# --------------------------------------------------
+# 1. Model & Checkpoint Meta-data
+# --------------------------------------------------
+REPO_ID = "telecomadm1145/swin-ai-detection"     # 同一个 repo 存两种 ckpt 也 OK
 HF_FILENAMES = {
+    "V2":                 "swin_classifier_stage1_v2_epoch_3.pth",
+    "V4":                 "swin_classifier_stage1_v4.pth",
+    "V5(underfitting)":   "swin_classifier_stage1_v5_fp16.pth",
+    "V7":                 "swin_classifier_4class_fp16_v7.pth"   # <-- NEW
+}
+CKPT_META = {
+    "V2": { "n_cls": 2, "head": "v4",
+            "names": ["Non-AI Generated", "AI Generated"]},
+    "V4": { "n_cls": 2, "head": "v4",
+            "names": ["Non-AI Generated", "AI Generated"]},
+    "V5(underfitting)": { "n_cls": 2, "head": "v5",
+            "names": ["Non-AI Generated", "AI Generated"]},
+    # ---------- NEW ----------
+    "V7": { "n_cls": 4, "head": "v7",
+            "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]},
 }
+DEFAULT_CKPT = "V4"     # 默认仍然先加载较小的 2-类模型
+MODEL_NAME  = "swin_large_patch4_window12_384"
 LOCAL_CKPT_DIR = "./checkpoints"
 SEED = 4421
+DROP_RATE = 0.1
 device = "cuda" if torch.cuda.is_available() else "cpu"
+torch.manual_seed(SEED);  np.random.seed(SEED)
 print(f"Using device: {device}")
+# --------------------------------------------------
+# 2. Global State
+# --------------------------------------------------
+model, current_ckpt = None, None
+current_meta = None   # 记录当前模型的 meta（类别数 / 名称）
+# --------------------------------------------------
+# 3. SwinClassifier – 添加 v7 专属 MLP
+# --------------------------------------------------
 class SwinClassifier(nn.Module):
+    def __init__(self, model_name, num_classes, pretrained=True,
+                 head_version="v4"):
         super().__init__()
+        self.backbone = timm.create_model(
+            model_name, pretrained=pretrained, num_classes=0
+        )
         self.data_config = timm.data.resolve_data_config({}, model=self.backbone)
+        # ------- 根据版本选择不同 head -------
+        if head_version == "v7":            # <-- V7: 极简 64-hidden, GELU
+            self.classifier = nn.Sequential(
+                nn.Dropout(DROP_RATE),
+                nn.Linear(self.backbone.num_features, 64),
+                nn.BatchNorm1d(64),
+                nn.GELU(),
+                nn.Dropout(DROP_RATE * 0.8),
+                nn.Linear(64, num_classes),
+            )
+        elif head_version == "v5":          # V5: 512-128, GELU
             self.classifier = nn.Sequential(
+                nn.Dropout(DROP_RATE),
                 nn.Linear(self.backbone.num_features, 512),
                 nn.BatchNorm1d(512),
+                nn.GELU(),
+                nn.Dropout(DROP_RATE * 0.7),
                 nn.Linear(512, 128),
                 nn.BatchNorm1d(128),
+                nn.GELU(),
+                nn.Dropout(DROP_RATE * 0.5),
+                nn.Linear(128, num_classes),
             )
+        else:                               # V2 / V4: 512-128, ReLU
             self.classifier = nn.Sequential(
+                nn.Dropout(DROP_RATE),
                 nn.Linear(self.backbone.num_features, 512),
                 nn.BatchNorm1d(512),
                 nn.ReLU(),
+                nn.Dropout(DROP_RATE * 0.7),
                 nn.Linear(512, 128),
                 nn.BatchNorm1d(128),
                 nn.ReLU(),
+                nn.Dropout(DROP_RATE * 0.5),
+                nn.Linear(128, num_classes),
             )
     def forward(self, x):
+        return self.classifier(self.backbone(x))
+# --------------------------------------------------
+# 4. 动态加载模型
+# --------------------------------------------------
 def load_model(ckpt_name: str):
+    """Load model only when `ckpt_name` changes."""
+    global model, current_ckpt, current_meta
+    if ckpt_name == current_ckpt and model is not None:
+        return
+    print(f"\n🔄 Switching to {ckpt_name} ...")
+    meta = CKPT_META[ckpt_name]
+    ckpt_file = hf_hub_download(
         repo_id=REPO_ID,
+        filename=HF_FILENAMES[ckpt_name],
+        local_dir=LOCAL_CKPT_DIR, force_download=False
     )
+    print(f"Checkpoint: {ckpt_file}")
+    # Build model structure
     model = SwinClassifier(
         MODEL_NAME,
+        num_classes       = meta["n_cls"],
+        pretrained        = False,
+        head_version      = meta["head"]
     ).to(device)
+    # compatible load
+    state = torch.load(ckpt_file, map_location=device, weights_only=False)
     model.load_state_dict(state.get("model_state_dict", state), strict=True)
     model.eval()
+    current_ckpt, current_meta = ckpt_name, meta
+    print(f"✅ {ckpt_name} loaded (classes = {meta['n_cls']}).")
+# --------------------------------------------------
+# 5. Transform 工厂
+# --------------------------------------------------
 def build_transform(is_training: bool, interpolation: str):
+    if model is None: raise RuntimeError("Model not loaded yet.")
     cfg = model.data_config.copy()
     cfg.update(dict(interpolation=interpolation))
     return timm.data.create_transform(**cfg, is_training=is_training)
+# --------------------------------------------------
+# 6. Inference
+# --------------------------------------------------
+@torch.no_grad()
+def predict(image: Image.Image,
+            ckpt_name: str,
+            interpolation: str = "bicubic"):
+    if image is None: return None
+    load_model(ckpt_name)
+    tfm = build_transform(False, interpolation)
+    inp = tfm(image).unsqueeze(0).to(device)
+    probs = F.softmax(model(inp), dim=1)[0].cpu()
+    class_names = current_meta["names"]
+    # 保证 gr.Label 在 2 / 4 类都能正常显示
+    return {class_names[i]: float(probs[i])
+            for i in range(len(class_names))}
+# --------------------------------------------------
+# 7. Gradio UI
+# --------------------------------------------------
+def launch():
+    load_model(DEFAULT_CKPT)      # 预加载
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🖼️ Swin-Large — AI / Non-AI  Detector (V2-V7)")
+        gr.Markdown(
+            "Choose a model checkpoint on the left, upload an image, "
+            "and click **Run** to see predictions.  V7 outputs 4 classes."
+        )
         with gr.Row():
             with gr.Column(scale=1):
+                in_img  = gr.Image(type="pil", label="Upload Image")
                 run_btn = gr.Button("🚀 Run", variant="primary")
+                sel_ckpt = gr.Dropdown(
+                    list(HF_FILENAMES.keys()),
+                    value=DEFAULT_CKPT, label="Checkpoint"
                 )
+                sel_interp = gr.Radio(
+                    ["bilinear", "bicubic", "nearest"],
+                    value="bicubic", label="Resize Interpolation"
                 )
+            with gr.Column(scale=1):
+                # num_top_classes 设为 4，兼容 2-class / 4-class
+                out_lbl = gr.Label(num_top_classes=4, label="Predictions")
         run_btn.click(
+            predict,
+            inputs=[in_img, sel_ckpt, sel_interp],
             outputs=[out_lbl]
         )
+        # optional example folder
+        if not os.path.exists("examples"):
+            os.makedirs("examples")
+            print("Put some jpg/png files inside ./examples for demo examples")
+        example_files = [os.path.join("examples", f)
+                         for f in os.listdir("examples")
+                         if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
         if example_files:
             gr.Examples(
                 examples=[[f, DEFAULT_CKPT, "bicubic"] for f in example_files],
+                inputs=[in_img, sel_ckpt, sel_interp],
                 outputs=[out_lbl],
+                fn=predict,
                 cache_examples=False,
             )
     demo.launch()
+# --------------------------------------------------
 if __name__ == "__main__":
+    launch()