AreebKhan commited on
Commit
4d449f1
·
verified ·
1 Parent(s): bdb318b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ import torch
5
+ from transformers import ViTForImageClassification, ViTImageProcessor
6
+ import mediapipe as mp
7
+
8
+ # Load pretrained model (ASL word classifier)
9
+ model_name = "your_pretrained_asl_model" # Replace with actual model
10
+ model = ViTForImageClassification.from_pretrained(model_name)
11
+ processor = ViTImageProcessor.from_pretrained(model_name)
12
+
13
+ # MediaPipe Hands setup
14
+ mp_hands = mp.solutions.hands
15
+ mp_drawing = mp.solutions.drawing_utils
16
+ hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)
17
+
18
+ # Define the ASL words
19
+ asl_words = ["Hello", "Thanks", "Yes", "No", "Love", "Please"] # Expand this list
20
+
21
+ def extract_hand_landmarks(image):
22
+ """Extracts hand landmarks from an image."""
23
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
24
+ results = hands.process(image_rgb)
25
+
26
+ if results.multi_hand_landmarks:
27
+ for hand_landmarks in results.multi_hand_landmarks:
28
+ landmarks = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]).flatten()
29
+ return landmarks
30
+ return None
31
+
32
+ def classify_asl_word(image):
33
+ """Predicts the ASL word from a hand gesture."""
34
+ landmarks = extract_hand_landmarks(image)
35
+ if landmarks is None:
36
+ return "No hand detected"
37
+
38
+ # Convert landmarks into a format suitable for the model
39
+ inputs = processor(images=image, return_tensors="pt")
40
+
41
+ with torch.no_grad():
42
+ outputs = model(**inputs)
43
+ predicted_class_idx = outputs.logits.argmax(-1).item()
44
+
45
+ return asl_words[predicted_class_idx] if predicted_class_idx < len(asl_words) else "Unknown sign"
46
+
47
+ def process_video(video_path):
48
+ """Processes the uploaded video and returns detected ASL words."""
49
+ cap = cv2.VideoCapture(video_path)
50
+ detected_words = []
51
+
52
+ while cap.isOpened():
53
+ ret, frame = cap.read()
54
+ if not ret:
55
+ break
56
+ word = classify_asl_word(frame)
57
+ if word not in detected_words and word != "No hand detected":
58
+ detected_words.append(word)
59
+
60
+ cap.release()
61
+ return ", ".join(detected_words) if detected_words else "No ASL words detected"
62
+
63
+ # Gradio Interface
64
+ iface = gr.Interface(
65
+ fn=process_video,
66
+ inputs=gr.Video(type="file"),
67
+ outputs=gr.Textbox(label="Detected ASL Words"),
68
+ title="ASL Sign Language to Text",
69
+ description="Upload a video of ASL signs, and the model will translate them into text."
70
+ )
71
+
72
+ if __name__ == "__main__":
73
+ iface.launch()