Spaces:

telecomadm1145
/

AIDetectV2

Running

App Files Files Community

telecomadm1145 commited on Jul 29

Commit

0358510

verified ·

1 Parent(s): 6211642

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -16

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 # -*- coding: utf-8 -*-
 """
-Swin/CAFormer AI detection
 -------------------------------------------------------------------
 • Swin-V2 / V4                          : 2-class  (AI vs. Non-AI)
 • Swin-V7 / V8 / V9                     : 4-class  (photo / anime × AI / Non-AI)
 • CAFormer-V10                          : 4-class  (photo / anime × AI / Non-AI)
 -------------------------------------------------------------------
 Author: telecomadm1145
 """
@@ -16,6 +17,9 @@ from PIL import Image
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file  # Added for .safetensors support
 # --------------------------------------------------
 # 1. Model & Checkpoint Meta-data
@@ -29,6 +33,9 @@ HF_FILENAMES = {
     "V8":                 "swin_classifier_4class_fp16_v8_epoch7_acc9740.pth",
     "V9":                 "swin_classifier_4class_fp16_v9_acc9861.pth",
     "V1-CAFormer":        "caformer_b36_4class.safetensors",
 }
 CKPT_META = {
@@ -46,12 +53,24 @@ CKPT_META = {
             "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]},
     "V1-CAFormer": { "n_cls": 4, "head": "v7", "backbone": "caformer_b36.sail_in22k_ft_in1k_384",
             "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]},
 }
 DEFAULT_CKPT = "V1-CAFormer"
 LOCAL_CKPT_DIR = "./checkpoints"
 SEED = 4421
 DROP_RATE = 0.1
 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch.manual_seed(SEED);  np.random.seed(SEED)
@@ -60,6 +79,55 @@ print(f"Using device: {device}")
 model, current_ckpt = None, None
 current_meta = None
 # Renamed to ImageClassifier for clarity, but keeping original name to avoid breaking changes if subclassed elsewhere.
 class SwinClassifier(nn.Module):
     def __init__(self, model_name, num_classes, pretrained=True,
@@ -126,20 +194,36 @@ def load_model(ckpt_name: str):
     print(f"\n🔄 Switching to {ckpt_name} ...")
     meta = CKPT_META[ckpt_name]
     ckpt_filename = HF_FILENAMES[ckpt_name]
-    ckpt_file = hf_hub_download(
-        repo_id=REPO_ID,
-        filename=ckpt_filename,
-        local_dir=LOCAL_CKPT_DIR, force_download=False
-    )
     print(f"Checkpoint: {ckpt_file}")
-    # Build model structure
-    model = SwinClassifier(
-        meta["backbone"],   # Use backbone from meta
-        num_classes       = meta["n_cls"],
-        pretrained        = False,
-        head_version      = meta["head"]
-    ).to(device)
     # Compatible load for .pth and .safetensors
     if ckpt_filename.endswith(".safetensors"):
@@ -173,7 +257,19 @@ def predict(image: Image.Image,
     if image is None: return None
     load_model(ckpt_name)
-    tfm = build_transform(False, interpolation)
     inp = tfm(image).unsqueeze(0).to(device)
     probs = F.softmax(model(inp), dim=1)[0].cpu()
@@ -193,7 +289,7 @@ def launch():
         gr.Markdown("# AI Detector")
         gr.Markdown(
             "Choose a model checkpoint on the left, upload an image, "
-            "and click **Run** to see predictions. Checkpoint V7+ outputs 4 classes."
         )
         with gr.Row():
@@ -206,7 +302,7 @@ def launch():
                 )
                 sel_interp = gr.Radio(
                     ["bilinear", "bicubic", "nearest"],
-                    value="bicubic", label="Resize Interpolation"
                 )
                 in_img  = gr.Image(type="pil", label="Upload Image")

 # -*- coding: utf-8 -*-
 """
+Swin/CAFormer/DINOv2 AI detection
 -------------------------------------------------------------------
 • Swin-V2 / V4                          : 2-class  (AI vs. Non-AI)
 • Swin-V7 / V8 / V9                     : 4-class  (photo / anime × AI / Non-AI)
 • CAFormer-V10                          : 4-class  (photo / anime × AI / Non-AI)
+• DINOv2-4class                         : 4-class  (photo / anime × AI / Non-AI)
 -------------------------------------------------------------------
 Author: telecomadm1145
 """
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file  # Added for .safetensors support
+# Added for DINOv2 model
+from transformers import AutoModel
+from torchvision import transforms
 # --------------------------------------------------
 # 1. Model & Checkpoint Meta-data
     "V8":                 "swin_classifier_4class_fp16_v8_epoch7_acc9740.pth",
     "V9":                 "swin_classifier_4class_fp16_v9_acc9861.pth",
     "V1-CAFormer":        "caformer_b36_4class.safetensors",
+    "V2-CAFormer":        "caformer_b36_4class_95.safetensors",
+    "V2.5-CAFormer":      "caformer_b36_4class_96.safetensors",
+    "DINOv2-4class":      "dinov2_4class.safetensors", # Added DINOv2 checkpoint
 }
 CKPT_META = {
             "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]},
     "V1-CAFormer": { "n_cls": 4, "head": "v7", "backbone": "caformer_b36.sail_in22k_ft_in1k_384",
             "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]},
+    "V2-CAFormer": { "n_cls": 4, "head": "v7", "backbone": "caformer_b36.sail_in22k_ft_in1k_384",
+            "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]},
+    "V2.5-CAFormer": { "n_cls": 4, "head": "v7", "backbone": "caformer_b36.sail_in22k_ft_in1k_384",
+            "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]},
+    # Added DINOv2 metadata
+    "DINOv2-4class": {
+        "model_type": "dinov2",
+        "backbone": 'facebook/dinov2-base',
+        "n_cls": 4,
+        "names": ["non_ai", "ai", "ani_non_ai", "ani_ai"]
+    },
 }
 DEFAULT_CKPT = "V1-CAFormer"
 LOCAL_CKPT_DIR = "./checkpoints"
 SEED = 4421
 DROP_RATE = 0.1
+DROPOUT_RATE = 0.1 # From train.py for DINOv2
 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch.manual_seed(SEED);  np.random.seed(SEED)
 model, current_ckpt = None, None
 current_meta = None
+# --- Start of code from train.py ---
+class DINOv2Classifier(nn.Module):
+    def __init__(self, model_name, num_classes):
+        super().__init__()
+        self.backbone = AutoModel.from_pretrained(model_name)
+        self.weight_self_attention = nn.MultiheadAttention(
+            embed_dim=self.backbone.config.hidden_size,
+            num_heads=self.backbone.config.num_attention_heads,
+            dropout=self.backbone.config.hidden_dropout_prob,
+            batch_first=True
+        )
+        self.weight_mlp = nn.Sequential(
+            nn.Linear(self.backbone.config.hidden_size, self.backbone.config.hidden_size * 4),
+            nn.LayerNorm(self.backbone.config.hidden_size * 4),
+            nn.GELU(),
+            nn.Linear(self.backbone.config.hidden_size * 4, 1)
+        )
+        self.classifier = nn.Sequential(
+            nn.Dropout(DROPOUT_RATE),
+            nn.Linear(self.backbone.config.hidden_size, self.backbone.config.hidden_size),
+            nn.LayerNorm(self.backbone.config.hidden_size),
+            nn.GELU(),
+            nn.Dropout(DROPOUT_RATE),
+            nn.Linear(self.backbone.config.hidden_size, num_classes)
+        )
+        nn.init.xavier_uniform_(self.weight_self_attention.in_proj_weight)
+        nn.init.xavier_uniform_(self.weight_self_attention.out_proj.weight)
+        nn.init.constant_(self.weight_self_attention.out_proj.bias, 0)
+        for module in [self.weight_mlp, self.classifier]:
+            if isinstance(module, nn.Linear):
+                nn.init.xavier_uniform_(module.weight)
+                nn.init.constant_(module.bias, 0)
+    def forward(self, x):
+        outputs = self.backbone(x)
+        attn_output, _ = self.weight_self_attention(
+            outputs.last_hidden_state,
+            outputs.last_hidden_state,
+            outputs.last_hidden_state,
+        )
+        raw_weights = self.weight_mlp(attn_output)
+        raw_weights = raw_weights.squeeze(-1)
+        pooling_weights = torch.softmax(raw_weights, dim=-1)
+        pooled_output = torch.sum(outputs.last_hidden_state * pooling_weights.unsqueeze(-1), dim=1)
+        return self.classifier(pooled_output)
+# --- End of code from train.py ---
 # Renamed to ImageClassifier for clarity, but keeping original name to avoid breaking changes if subclassed elsewhere.
 class SwinClassifier(nn.Module):
     def __init__(self, model_name, num_classes, pretrained=True,
     print(f"\n🔄 Switching to {ckpt_name} ...")
     meta = CKPT_META[ckpt_name]
     ckpt_filename = HF_FILENAMES[ckpt_name]
+    # Check if the checkpoint is DINOv2 and handle its local path
+    if meta.get("model_type") == "dinov2":
+        # Assume DINOv2 model is local, as generated by train.py
+        ckpt_file = ckpt_filename
+        if not os.path.exists(ckpt_file):
+             raise FileNotFoundError(f"DINOv2 checkpoint not found at {ckpt_file}. Please run train.py first.")
+    else:
+        # Download other models from HF Hub
+        ckpt_file = hf_hub_download(
+            repo_id=REPO_ID,
+            filename=ckpt_filename,
+            local_dir=LOCAL_CKPT_DIR, force_download=False
+        )
     print(f"Checkpoint: {ckpt_file}")
+    # Build model structure based on model_type
+    if meta.get("model_type") == "dinov2":
+        model = DINOv2Classifier(
+            model_name=meta["backbone"],
+            num_classes=meta["n_cls"]
+        ).to(device)
+    else: # Existing logic for Swin/CAFormer
+        model = SwinClassifier(
+            meta["backbone"],
+            num_classes=meta["n_cls"],
+            pretrained=False,
+            head_version=meta.get("head", "v4")
+        ).to(device)
     # Compatible load for .pth and .safetensors
     if ckpt_filename.endswith(".safetensors"):
     if image is None: return None
     load_model(ckpt_name)
+    # Select transform based on the current model type
+    if current_meta.get("model_type") == "dinov2":
+        # DINOv2 specific transform from train.py
+        tfm = transforms.Compose([
+            transforms.Resize((224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        ])
+    else:
+        # Original transform logic for Swin/CAFormer
+        tfm = build_transform(False, interpolation)
     inp = tfm(image).unsqueeze(0).to(device)
     probs = F.softmax(model(inp), dim=1)[0].cpu()
         gr.Markdown("# AI Detector")
         gr.Markdown(
             "Choose a model checkpoint on the left, upload an image, "
+            "and click **Run** to see predictions. Checkpoint V7+ and DINOv2 outputs 4 classes."
         )
         with gr.Row():
                 )
                 sel_interp = gr.Radio(
                     ["bilinear", "bicubic", "nearest"],
+                    value="bicubic", label="Resize Interpolation (for Swin/CAFormer)"
                 )
                 in_img  = gr.Image(type="pil", label="Upload Image")