Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,8 +5,8 @@ import torch
|
|
| 5 |
from transformers import ViTForImageClassification, ViTImageProcessor
|
| 6 |
import mediapipe as mp
|
| 7 |
|
| 8 |
-
# Load pretrained model (
|
| 9 |
-
model_name = "
|
| 10 |
model = ViTForImageClassification.from_pretrained(model_name)
|
| 11 |
processor = ViTImageProcessor.from_pretrained(model_name)
|
| 12 |
|
|
@@ -15,8 +15,8 @@ mp_hands = mp.solutions.hands
|
|
| 15 |
mp_drawing = mp.solutions.drawing_utils
|
| 16 |
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)
|
| 17 |
|
| 18 |
-
#
|
| 19 |
-
asl_words =
|
| 20 |
|
| 21 |
def extract_hand_landmarks(image):
|
| 22 |
"""Extracts hand landmarks from an image."""
|
|
@@ -35,14 +35,14 @@ def classify_asl_word(image):
|
|
| 35 |
if landmarks is None:
|
| 36 |
return "No hand detected"
|
| 37 |
|
| 38 |
-
# Convert
|
| 39 |
inputs = processor(images=image, return_tensors="pt")
|
| 40 |
|
| 41 |
with torch.no_grad():
|
| 42 |
outputs = model(**inputs)
|
| 43 |
predicted_class_idx = outputs.logits.argmax(-1).item()
|
| 44 |
|
| 45 |
-
return asl_words
|
| 46 |
|
| 47 |
def process_video(video_path):
|
| 48 |
"""Processes the uploaded video and returns detected ASL words."""
|
|
|
|
| 5 |
from transformers import ViTForImageClassification, ViTImageProcessor
|
| 6 |
import mediapipe as mp
|
| 7 |
|
| 8 |
+
# Load pretrained model (Sign Language Recognition)
|
| 9 |
+
model_name = "sokaina55/xclip-base-patch32-finetuned-ssl-sign-language-recognition"
|
| 10 |
model = ViTForImageClassification.from_pretrained(model_name)
|
| 11 |
processor = ViTImageProcessor.from_pretrained(model_name)
|
| 12 |
|
|
|
|
| 15 |
mp_drawing = mp.solutions.drawing_utils
|
| 16 |
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)
|
| 17 |
|
| 18 |
+
# Get the model's label mappings
|
| 19 |
+
asl_words = model.config.id2label # Dictionary mapping index to ASL words
|
| 20 |
|
| 21 |
def extract_hand_landmarks(image):
|
| 22 |
"""Extracts hand landmarks from an image."""
|
|
|
|
| 35 |
if landmarks is None:
|
| 36 |
return "No hand detected"
|
| 37 |
|
| 38 |
+
# Convert image into a format suitable for the model
|
| 39 |
inputs = processor(images=image, return_tensors="pt")
|
| 40 |
|
| 41 |
with torch.no_grad():
|
| 42 |
outputs = model(**inputs)
|
| 43 |
predicted_class_idx = outputs.logits.argmax(-1).item()
|
| 44 |
|
| 45 |
+
return asl_words.get(predicted_class_idx, "Unknown sign")
|
| 46 |
|
| 47 |
def process_video(video_path):
|
| 48 |
"""Processes the uploaded video and returns detected ASL words."""
|