Spaces:

AreebKhan
/

Sign_Language_Translator2

Sleeping

AreebKhan commited on Feb 22

Commit

ae4a851

verified ·

1 Parent(s): 04faee5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,8 +5,8 @@ import torch
 from transformers import ViTForImageClassification, ViTImageProcessor
 import mediapipe as mp
-# Load pretrained model (ASL word classifier)
-model_name = "your_pretrained_asl_model"  # Replace with actual model
 model = ViTForImageClassification.from_pretrained(model_name)
 processor = ViTImageProcessor.from_pretrained(model_name)
@@ -15,8 +15,8 @@ mp_hands = mp.solutions.hands
 mp_drawing = mp.solutions.drawing_utils
 hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)
-# Define the ASL words
-asl_words = ["Hello", "Thanks", "Yes", "No", "Love", "Please"]  # Expand this list
 def extract_hand_landmarks(image):
     """Extracts hand landmarks from an image."""
@@ -35,14 +35,14 @@ def classify_asl_word(image):
     if landmarks is None:
         return "No hand detected"
-    # Convert landmarks into a format suitable for the model
     inputs = processor(images=image, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
     predicted_class_idx = outputs.logits.argmax(-1).item()
-    return asl_words[predicted_class_idx] if predicted_class_idx < len(asl_words) else "Unknown sign"
 def process_video(video_path):
     """Processes the uploaded video and returns detected ASL words."""

 from transformers import ViTForImageClassification, ViTImageProcessor
 import mediapipe as mp
+# Load pretrained model (Sign Language Recognition)
+model_name = "sokaina55/xclip-base-patch32-finetuned-ssl-sign-language-recognition"
 model = ViTForImageClassification.from_pretrained(model_name)
 processor = ViTImageProcessor.from_pretrained(model_name)
 mp_drawing = mp.solutions.drawing_utils
 hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)
+# Get the model's label mappings
+asl_words = model.config.id2label  # Dictionary mapping index to ASL words
 def extract_hand_landmarks(image):
     """Extracts hand landmarks from an image."""
     if landmarks is None:
         return "No hand detected"
+    # Convert image into a format suitable for the model
     inputs = processor(images=image, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
     predicted_class_idx = outputs.logits.argmax(-1).item()
+    return asl_words.get(predicted_class_idx, "Unknown sign")
 def process_video(video_path):
     """Processes the uploaded video and returns detected ASL words."""