Spaces:

AreebKhan
/

Sign_Language_Translator2

Sleeping

App Files Files Community

Sign_Language_Translator2 / app.py

AreebKhan

Update app.py

ae4a851 verified 10 months ago

raw

history blame

2.48 kB

	import gradio as gr
	import cv2
	import numpy as np
	import torch
	from transformers import ViTForImageClassification, ViTImageProcessor
	import mediapipe as mp

	# Load pretrained model (Sign Language Recognition)
	model_name = "sokaina55/xclip-base-patch32-finetuned-ssl-sign-language-recognition"
	model = ViTForImageClassification.from_pretrained(model_name)
	processor = ViTImageProcessor.from_pretrained(model_name)

	# MediaPipe Hands setup
	mp_hands = mp.solutions.hands
	mp_drawing = mp.solutions.drawing_utils
	hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5)

	# Get the model's label mappings
	asl_words = model.config.id2label # Dictionary mapping index to ASL words

	def extract_hand_landmarks(image):
	"""Extracts hand landmarks from an image."""
	image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	results = hands.process(image_rgb)

	if results.multi_hand_landmarks:
	for hand_landmarks in results.multi_hand_landmarks:
	landmarks = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]).flatten()
	return landmarks
	return None

	def classify_asl_word(image):
	"""Predicts the ASL word from a hand gesture."""
	landmarks = extract_hand_landmarks(image)
	if landmarks is None:
	return "No hand detected"

	# Convert image into a format suitable for the model
	inputs = processor(images=image, return_tensors="pt")

	with torch.no_grad():
	outputs = model(**inputs)
	predicted_class_idx = outputs.logits.argmax(-1).item()

	return asl_words.get(predicted_class_idx, "Unknown sign")

	def process_video(video_path):
	"""Processes the uploaded video and returns detected ASL words."""
	cap = cv2.VideoCapture(video_path)
	detected_words = []

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break
	word = classify_asl_word(frame)
	if word not in detected_words and word != "No hand detected":
	detected_words.append(word)

	cap.release()
	return ", ".join(detected_words) if detected_words else "No ASL words detected"

	# Gradio Interface
	iface = gr.Interface(
	fn=process_video,
	inputs=gr.Video(type="file"),
	outputs=gr.Textbox(label="Detected ASL Words"),
	title="ASL Sign Language to Text",
	description="Upload a video of ASL signs, and the model will translate them into text."
	)

	if __name__ == "__main__":
	iface.launch()