Spaces:

arghyaiitb
/

resnet50-imagenet-1k

Sleeping

App Files Files Community

argo commited on 7 days ago

Commit

70a26de

1 Parent(s): c2a28ed

Added gradio app

Browse files

Files changed (2) hide show

app.py +243 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,243 @@

+import gradio as gr
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import transforms
+import numpy as np
+from PIL import Image
+import json
+# ImageNet-1k class names
+# We'll load these from a separate file
+with open('imagenet_classes.json', 'r') as f:
+    IMAGENET_CLASSES = json.load(f)
+# Model definition - ResNet-50 for ImageNet
+class Bottleneck(nn.Module):
+    """Bottleneck block for ResNet-50/101/152"""
+    expansion = 4
+    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(out_channels)
+        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3,
+                               stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_channels)
+        self.conv3 = nn.Conv2d(out_channels, out_channels * self.expansion,
+                               kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)
+        self.downsample = downsample
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = F.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = F.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = F.relu(out)
+        return out
+class ResNet50(nn.Module):
+    """ResNet-50 model for ImageNet"""
+    def __init__(self, num_classes=1000):
+        super(ResNet50, self).__init__()
+        self.in_channels = 64
+        # Initial convolution layer
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        # ResNet-50 architecture: [3, 4, 6, 3] blocks
+        self.layer1 = self._make_layer(64, 3, stride=1)
+        self.layer2 = self._make_layer(128, 4, stride=2)
+        self.layer3 = self._make_layer(256, 6, stride=2)
+        self.layer4 = self._make_layer(512, 3, stride=2)
+        # Final layers
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * Bottleneck.expansion, num_classes)
+        # Initialize weights
+        self._initialize_weights()
+    def _make_layer(self, out_channels, blocks, stride):
+        """Create a residual layer with specified number of blocks"""
+        downsample = None
+        if stride != 1 or self.in_channels != out_channels * Bottleneck.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.in_channels, out_channels * Bottleneck.expansion,
+                         kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(out_channels * Bottleneck.expansion),
+            )
+        layers = []
+        layers.append(Bottleneck(self.in_channels, out_channels, stride, downsample))
+        self.in_channels = out_channels * Bottleneck.expansion
+        for _ in range(1, blocks):
+            layers.append(Bottleneck(self.in_channels, out_channels))
+        return nn.Sequential(*layers)
+    def _initialize_weights(self):
+        """Initialize weights using He initialization"""
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+    def forward(self, x):
+        # Initial layers
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = F.relu(x)
+        x = self.maxpool(x)
+        # Residual layers
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        # Final layers
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+        x = self.fc(x)
+        return x
+# Load model
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = ResNet50(num_classes=1000)
+# Load trained weights
+try:
+    checkpoint = torch.load("best_model.pt", map_location=device)
+    if 'model_state_dict' in checkpoint:
+        model.load_state_dict(checkpoint['model_state_dict'])
+        print(f"Model loaded successfully! Top-1 accuracy: {checkpoint.get('top1_accuracy', 'N/A'):.2f}%")
+        print(f"Top-5 accuracy: {checkpoint.get('top5_accuracy', 'N/A'):.2f}%")
+    else:
+        model.load_state_dict(checkpoint)
+        print("Model loaded successfully!")
+except Exception as e:
+    print(f"Warning: Could not load model weights: {e}")
+    print("Using randomly initialized model for demo purposes.")
+model.to(device)
+model.eval()
+# ImageNet preprocessing
+transform = transforms.Compose([
+    transforms.Resize(256),
+    transforms.CenterCrop(224),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                       std=[0.229, 0.224, 0.225])
+])
+def predict(image):
+    """Predict the class of the input image"""
+    if image is None:
+        return {"Error": "No image provided"}
+    try:
+        # Convert to PIL Image if needed
+        if isinstance(image, np.ndarray):
+            image = Image.fromarray(image.astype('uint8'), 'RGB')
+        # Ensure RGB mode
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        # Preprocess image
+        img_tensor = transform(image).unsqueeze(0).to(device)
+        # Make prediction
+        with torch.no_grad():
+            outputs = model(img_tensor)
+            probabilities = F.softmax(outputs, dim=1)[0]
+        # Get top 5 predictions
+        top5_prob, top5_idx = torch.topk(probabilities, 5)
+        # Format results as a dictionary
+        results = {}
+        for i, (idx, prob) in enumerate(zip(top5_idx, top5_prob), 1):
+            class_idx = idx.item()
+            class_name = IMAGENET_CLASSES.get(str(class_idx), f"Class {class_idx}")
+            results[f"{i}. {class_name}"] = f"{float(prob.item()) * 100:.2f}%"
+        return results
+    except Exception as e:
+        return {"Error": str(e)}
+# Create Gradio interface
+title = "ResNet-50 ImageNet-1k Classifier"
+description = """
+Upload an image to classify it into one of 1000 ImageNet categories.
+This model is a **ResNet-50** trained on the ImageNet-1k dataset with modern optimization techniques:
+- **Architecture**: ResNet-50 with Bottleneck blocks [3, 4, 6, 3]
+- **Training Optimizations**:
+  - Progressive resizing (128→160→192→224px)
+  - CutMix and MixUp augmentation
+  - Label smoothing (0.1)
+  - Exponential Moving Average (EMA)
+  - Automatic Mixed Precision (AMP)
+  - PyTorch 2.0 compilation
+- **Target Accuracy**: 78%+ (Top-1), 94%+ (Top-5)
+- **Training Time**: ~90 minutes on 8x A100 GPUs
+The model works best with natural images containing objects, animals, or scenes from the ImageNet categories.
+"""
+examples = [
+    ["https://images.unsplash.com/photo-1543466835-00a7907e9de1?w=400", "Golden Retriever"],
+    ["https://images.unsplash.com/photo-1514888286974-6c03e2ca1dba?w=400", "Tabby Cat"],
+    ["https://images.unsplash.com/photo-1511367461989-f85a21fda167?w=400", "Granny Smith Apple"],
+]
+# Create the interface
+demo = gr.Interface(
+    fn=predict,
+    inputs=gr.Image(type="pil", label="Upload Image"),
+    outputs=gr.JSON(label="Top 5 Predictions"),
+    title=title,
+    description=description,
+    examples=examples,
+    theme=gr.themes.Soft(),
+    allow_flagging="never"
+)
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch>=2.0.0
+torchvision>=0.15.0
+gradio>=5.49.1
+numpy>=1.24.0
+Pillow>=9.0.0
+pydantic==2.10.6