Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import cv2 | |
| import torch | |
| from PIL import Image | |
| import os | |
| # Keep the dot when deploying (Spaces); remove locally if needed | |
| from .exp_recognition_model import facExpRec, processor, device | |
| ############################################################################################################################# | |
| # Caution: Don't change any of the filenames, function names and definitions # | |
| # Always use the current_path + file_name for referring any files, without it we cannot access files on the server # | |
| ############################################################################################################################# | |
| # ===================================================== | |
| # PATH SETUP | |
| # ===================================================== | |
| current_path = os.path.dirname(os.path.abspath(__file__)) | |
| # ===================================================== | |
| # LOAD MODEL ONCE (global) | |
| # ===================================================== | |
| print("Loading Expression Recognition Model...") | |
| try: | |
| exp_model = facExpRec().to(device) | |
| exp_model.eval() | |
| print("Expression model loaded successfully.") | |
| except Exception as e: | |
| print(f"Failed to load Expression Model: {e}") | |
| exp_model = None | |
| # ===================================================== | |
| # FACE DETECTION FUNCTION | |
| # ===================================================== | |
| def detected_face(image): | |
| """ | |
| Detects faces using Haar cascades and returns the face with the largest area. | |
| Returns 0 if no face detected. | |
| """ | |
| face_haar = os.path.join(current_path, "haarcascade_frontalface_default.xml") | |
| eye_haar = os.path.join(current_path, "haarcascade_eye.xml") | |
| face_cascade = cv2.CascadeClassifier(face_haar) | |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| faces = face_cascade.detectMultiScale(gray, 1.3, 5) | |
| if len(faces) == 0: | |
| return 0 | |
| face_areas, images = [], [] | |
| for (x, y, w, h) in faces: | |
| face_cropped = gray[y:y + h, x:x + w] | |
| face_areas.append(w * h) | |
| images.append(face_cropped) | |
| # Return the face with the maximum area | |
| required_image = images[np.argmax(face_areas)] | |
| required_image = Image.fromarray(required_image).convert("RGB") | |
| return required_image | |
| # ===================================================== | |
| # EXPRESSION PREDICTION FUNCTION | |
| # ===================================================== | |
| def get_expression(img): | |
| """ | |
| Takes an OpenCV BGR image as input, detects the face, and returns the | |
| predicted facial expression as a string. | |
| """ | |
| if exp_model is None: | |
| raise RuntimeError("Expression model not loaded properly.") | |
| # Detect face | |
| face = detected_face(img) | |
| if face == 0: | |
| # Fallback: no face detected, use entire image | |
| face = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) | |
| # Preprocess with ViT processor | |
| inputs = processor(images=face, return_tensors="pt").to(device) | |
| # Inference | |
| with torch.no_grad(): | |
| outputs = exp_model.model(**inputs) | |
| probs = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
| pred_idx = torch.argmax(probs, dim=-1).item() | |
| confidence = probs[0][pred_idx].item() | |
| # Use model.config.id2label safely | |
| id2label = getattr(exp_model.model.config, "id2label", None) | |
| if id2label and pred_idx in id2label: | |
| expression_label = id2label[pred_idx] | |
| else: | |
| expression_label = "Unknown" | |
| print(f"Detected Expression: {expression_label} ({confidence:.2f})") | |
| return expression_label | |