AreebKhan commited on
Commit
ae4a851
·
verified ·
1 Parent(s): 04faee5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -5,8 +5,8 @@ import torch
5
  from transformers import ViTForImageClassification, ViTImageProcessor
6
  import mediapipe as mp
7
 
8
- # Load pretrained model (ASL word classifier)
9
- model_name = "your_pretrained_asl_model" # Replace with actual model
10
  model = ViTForImageClassification.from_pretrained(model_name)
11
  processor = ViTImageProcessor.from_pretrained(model_name)
12
 
@@ -15,8 +15,8 @@ mp_hands = mp.solutions.hands
15
  mp_drawing = mp.solutions.drawing_utils
16
  hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)
17
 
18
- # Define the ASL words
19
- asl_words = ["Hello", "Thanks", "Yes", "No", "Love", "Please"] # Expand this list
20
 
21
  def extract_hand_landmarks(image):
22
  """Extracts hand landmarks from an image."""
@@ -35,14 +35,14 @@ def classify_asl_word(image):
35
  if landmarks is None:
36
  return "No hand detected"
37
 
38
- # Convert landmarks into a format suitable for the model
39
  inputs = processor(images=image, return_tensors="pt")
40
 
41
  with torch.no_grad():
42
  outputs = model(**inputs)
43
  predicted_class_idx = outputs.logits.argmax(-1).item()
44
 
45
- return asl_words[predicted_class_idx] if predicted_class_idx < len(asl_words) else "Unknown sign"
46
 
47
  def process_video(video_path):
48
  """Processes the uploaded video and returns detected ASL words."""
 
5
  from transformers import ViTForImageClassification, ViTImageProcessor
6
  import mediapipe as mp
7
 
8
+ # Load pretrained model (Sign Language Recognition)
9
+ model_name = "sokaina55/xclip-base-patch32-finetuned-ssl-sign-language-recognition"
10
  model = ViTForImageClassification.from_pretrained(model_name)
11
  processor = ViTImageProcessor.from_pretrained(model_name)
12
 
 
15
  mp_drawing = mp.solutions.drawing_utils
16
  hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)
17
 
18
+ # Get the model's label mappings
19
+ asl_words = model.config.id2label # Dictionary mapping index to ASL words
20
 
21
  def extract_hand_landmarks(image):
22
  """Extracts hand landmarks from an image."""
 
35
  if landmarks is None:
36
  return "No hand detected"
37
 
38
+ # Convert image into a format suitable for the model
39
  inputs = processor(images=image, return_tensors="pt")
40
 
41
  with torch.no_grad():
42
  outputs = model(**inputs)
43
  predicted_class_idx = outputs.logits.argmax(-1).item()
44
 
45
+ return asl_words.get(predicted_class_idx, "Unknown sign")
46
 
47
  def process_video(video_path):
48
  """Processes the uploaded video and returns detected ASL words."""