Spaces:

Kousik831
/

face-similarity-demo

Sleeping

App Files Files Community

Kousik Kumar Siddavaram commited on Oct 24

Commit

dcb23aa

1 Parent(s): 4730a0e

Updated face recognition python files

Browse files

Files changed (2) hide show

app/Hackathon_setup/face_recognition.py +35 -19
app/Hackathon_setup/face_recognition_model.py +24 -11

app/Hackathon_setup/face_recognition.py CHANGED Viewed

@@ -8,12 +8,17 @@ import os
 import joblib
 import torch.nn.functional as F
 # Current_path stores absolute path of the file from where it runs.
 current_path = os.path.dirname(os.path.abspath(__file__))
 # -------------------------
 # Load trained classifier and label encoder
 # -------------------------
 clf_path = os.path.join(current_path, "team_classifier.joblib")
 le_path = os.path.join(current_path, "label_encoder.joblib")
 clf = joblib.load(clf_path)
@@ -23,22 +28,29 @@ le = joblib.load(le_path)
 # Face Detection
 # -------------------------
 def detected_face(image):
     eye_haar = current_path + '/haarcascade_eye.xml'
     face_haar = current_path + '/haarcascade_frontalface_default.xml'
     face_cascade = cv2.CascadeClassifier(face_haar)
-    eye_cascade = cv2.CascadeClassifier(eye_haar)
     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    faces = face_cascade.detectMultiScale(gray, 1.3, 5)
-    face_areas = []
-    images = []
-    required_image = 0
-    for i, (x, y, w, h) in enumerate(faces):
-        face_cropped = gray[y:y+h, x:x+w]
-        face_areas.append(w*h)
-        images.append(face_cropped)
-        required_image = images[np.argmax(face_areas)]
-        required_image = Image.fromarray(required_image)
-    return required_image
 # -------------------------
 # Compute Similarity
@@ -47,16 +59,19 @@ def get_similarity(img1, img2):
     # Detect faces
     det_img1 = detected_face(img1)
     det_img2 = detected_face(img2)
-    if det_img1 == 0 or det_img2 == 0:
         det_img1 = Image.fromarray(cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY))
         det_img2 = Image.fromarray(cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY))
     # Transform images
     face1 = trnscm(det_img1).unsqueeze(0).to(device)
     face2 = trnscm(det_img2).unsqueeze(0).to(device)
-    # Load Siamese model
-    model_path = current_path + '/siamese_model.t7'
     checkpoint = torch.load(model_path, map_location=device)
     feature_net = Siamese().to(device)
     feature_net.load_state_dict(checkpoint['net_dict'])
@@ -76,22 +91,23 @@ def get_similarity(img1, img2):
 def get_face_class(img1):
     # Detect face
     det_img1 = detected_face(img1)
     if det_img1 == 0:
         det_img1 = Image.fromarray(cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY))
     # Transform image
     face = trnscm(det_img1).unsqueeze(0).to(device)
-    # Load Siamese model
-    model_path = current_path + '/siamese_model.t7'
     checkpoint = torch.load(model_path, map_location=device)
     feature_net = Siamese().to(device)
     feature_net.load_state_dict(checkpoint['net_dict'])
     feature_net.eval()
-    # Get embedding
     with torch.no_grad():
-        embedding = feature_net.forward_once(face)
         embedding_np = embedding.cpu().numpy()
     # Predict class using trained classifier

 import joblib
 import torch.nn.functional as F
 # Current_path stores absolute path of the file from where it runs.
 current_path = os.path.dirname(os.path.abspath(__file__))
+# Define the new model path constant for clarity
+SIAMESE_MODEL_FILENAME = 'face_recognition_model.t7'
 # -------------------------
 # Load trained classifier and label encoder
 # -------------------------
+# NOTE: Ensure 'team_classifier.joblib' is retrained on 80,000 features!
 clf_path = os.path.join(current_path, "team_classifier.joblib")
 le_path = os.path.join(current_path, "label_encoder.joblib")
 clf = joblib.load(clf_path)
 # Face Detection
 # -------------------------
 def detected_face(image):
+    """
+    Detects faces in the image and returns the largest detected face (PIL Image).
+    Returns 0 if no face is detected.
+    """
     eye_haar = current_path + '/haarcascade_eye.xml'
     face_haar = current_path + '/haarcascade_frontalface_default.xml'
     face_cascade = cv2.CascadeClassifier(face_haar)
     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=5)
+    if len(faces) == 0:
+        return 0 # No face detected
+    # Select the face with the largest area
+    face_areas = [w * h for (x, y, w, h) in faces]
+    max_idx = np.argmax(face_areas)
+    x, y, w, h = faces[max_idx]
+    # Crop the largest face
+    face_cropped = gray[y:y + h, x:x + w]
+    return Image.fromarray(face_cropped)
 # -------------------------
 # Compute Similarity
     # Detect faces
     det_img1 = detected_face(img1)
     det_img2 = detected_face(img2)
+    # Fallback to entire image if no face detected
+    if det_img1 == 0:
         det_img1 = Image.fromarray(cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY))
+    if det_img2 == 0:
         det_img2 = Image.fromarray(cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY))
     # Transform images
     face1 = trnscm(det_img1).unsqueeze(0).to(device)
     face2 = trnscm(det_img2).unsqueeze(0).to(device)
+    # Load Siamese model (Updated filename)
+    model_path = os.path.join(current_path, SIAMESE_MODEL_FILENAME)
     checkpoint = torch.load(model_path, map_location=device)
     feature_net = Siamese().to(device)
     feature_net.load_state_dict(checkpoint['net_dict'])
 def get_face_class(img1):
     # Detect face
     det_img1 = detected_face(img1)
     if det_img1 == 0:
         det_img1 = Image.fromarray(cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY))
     # Transform image
     face = trnscm(det_img1).unsqueeze(0).to(device)
+    # Load Siamese model (Updated filename)
+    model_path = os.path.join(current_path, SIAMESE_MODEL_FILENAME)
     checkpoint = torch.load(model_path, map_location=device)
     feature_net = Siamese().to(device)
     feature_net.load_state_dict(checkpoint['net_dict'])
     feature_net.eval()
+    # Get embedding (CRITICAL FIX: Use extract_features for classification)
     with torch.no_grad():
+        embedding = feature_net.extract_features(face) # <--- FIX APPLIED HERE
         embedding_np = embedding.cpu().numpy()
     # Predict class using trained classifier

app/Hackathon_setup/face_recognition_model.py CHANGED Viewed

@@ -5,16 +5,20 @@ import torch.nn as nn
 import torch.nn.functional as F
 from torchvision import transforms
 # ---------------------------
 # Device configuration
 # ---------------------------
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # ---------------------------
 # Transformation Function
 # ---------------------------
-# Same transforms as used during training in Colab
 trnscm = transforms.Compose([
     transforms.Resize((100, 100)),
     transforms.ToTensor()
 ])
@@ -26,7 +30,7 @@ class Siamese(nn.Module):
     def __init__(self):
         super(Siamese, self).__init__()
-        # CNN layers (same as your Colab model)
         self.cnn1 = nn.Sequential(
             nn.ReflectionPad2d(1),
             nn.Conv2d(1, 4, kernel_size=3),
@@ -44,35 +48,44 @@ class Siamese(nn.Module):
             nn.BatchNorm2d(8)
         )
-        # Fully connected layers
         self.fc1 = nn.Sequential(
             nn.Linear(8 * 100 * 100, 500),
             nn.ReLU(inplace=True),
             nn.Linear(500, 500),
             nn.ReLU(inplace=True),
-            nn.Linear(500, 5)
         )
     def forward_once(self, x):
-        # Forward pass for one image
         output = self.cnn1(x)
         output = output.view(output.size(0), -1)
         output = self.fc1(output)
         return output
     def forward(self, x1, x2):
-        # Forward pass for both images
         output1 = self.forward_once(x1)
         output2 = self.forward_once(x2)
         return output1, output2
 ##########################################################################################################
-## Classifier for face recognition
-## Not used for face similarity; now we use a Sklearn classifier separately
 ##########################################################################################################
-classifier = None  # Keep as None; we use joblib-loaded Sklearn model in facerecognition.py
-# ---------------------------
 # Class labels (optional, for reference)
-# ---------------------------
 classes = ['person1', 'person2', 'person3', 'person4', 'person5', 'person6', 'person7']

 import torch.nn.functional as F
 from torchvision import transforms
 # ---------------------------
 # Device configuration
 # ---------------------------
+# Use GPU if available, otherwise fall back to CPU
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # ---------------------------
 # Transformation Function
 # ---------------------------
+# This pipeline must match the transforms used during Siamese training.
 trnscm = transforms.Compose([
+    # Crucial: Ensures 1-channel input for the CNN backbone
+    transforms.Grayscale(num_output_channels=1),
     transforms.Resize((100, 100)),
     transforms.ToTensor()
 ])
     def __init__(self):
         super(Siamese, self).__init__()
+        # CNN layers (Feature Extractor)
         self.cnn1 = nn.Sequential(
             nn.ReflectionPad2d(1),
             nn.Conv2d(1, 4, kernel_size=3),
             nn.BatchNorm2d(8)
         )
+        # Fully connected layers (Metric Head for Siamese Loss)
+        # Output size is 8 * 100 * 100 = 80,000 features before first Linear layer
         self.fc1 = nn.Sequential(
             nn.Linear(8 * 100 * 100, 500),
             nn.ReLU(inplace=True),
             nn.Linear(500, 500),
             nn.ReLU(inplace=True),
+            nn.Linear(500, 5) # Final 5-dimensional embedding
         )
+    # Method to return the final low-dimensional embedding (used for similarity)
     def forward_once(self, x):
         output = self.cnn1(x)
         output = output.view(output.size(0), -1)
         output = self.fc1(output)
         return output
+    # Method to return the high-dimensional features (used for classification)
+    def extract_features(self, x):
+        """
+        Returns the raw, high-dimensional features (80,000) from the CNN backbone.
+        This output is what the Gradient Boosting Classifier (team_classifier.joblib) expects.
+        """
+        output = self.cnn1(x)
+        # Flatten the output: 8 channels * 100 height * 100 width = 80,000 features
+        output = output.view(output.size(0), -1)
+        return output
     def forward(self, x1, x2):
+        # Forward pass for similarity comparison
         output1 = self.forward_once(x1)
         output2 = self.forward_once(x2)
         return output1, output2
 ##########################################################################################################
+## Utility Variables
 ##########################################################################################################
+classifier = None # Placeholder. The actual classifier is loaded via joblib in face_recognition.py
 # Class labels (optional, for reference)
 classes = ['person1', 'person2', 'person3', 'person4', 'person5', 'person6', 'person7']