Spaces:

AreebKhan
/

Sign_Language_Translator2

Sleeping

App Files Files Community

Sign_Language_Translator2 / app.py

AreebKhan

Update app.py

86ac4de verified 9 months ago

raw

history blame

1.8 kB

	import gradio as gr
	import torch
	import cv2
	import os
	import numpy as np
	from transformers import VideoMAEForVideoClassification, VideoMAEImageProcessor

	# Load a lighter pretrained model
	model_name = "facebook/videomae-base"
	model = VideoMAEForVideoClassification.from_pretrained(model_name)
	processor = VideoMAEImageProcessor.from_pretrained(model_name)

	# Reduce frames for faster processing
	def preprocess_video(video_path):
	cap = cv2.VideoCapture(video_path)
	frames = []
	frame_skip = 5 # Skip every 5 frames to speed up processing

	count = 0
	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break
	if count % frame_skip == 0:
	frame = cv2.resize(frame, (224, 224)) # Resize to match model input
	frames.append(frame)
	count += 1

	cap.release()
	return frames

	# Function to predict sign language words
	def predict(video_path):
	frames = preprocess_video(video_path)

	if len(frames) == 0:
	return "No frames detected, try a different video."

	inputs = processor(images=frames, return_tensors="pt")

	with torch.no_grad():
	outputs = model(**inputs)

	logits = outputs.logits
	predicted_class_idx = logits.argmax(-1).item()

	# Mapping to common words (example, update with real labels)
	labels = ["Hello", "Thanks", "Yes", "No", "Goodbye", "Please", "Sorry"]
	predicted_label = labels[predicted_class_idx % len(labels)] # Placeholder mapping

	return predicted_label

	iface = gr.Interface(
	fn=predict,
	inputs=gr.Video(),
	outputs=gr.Textbox(label="Predicted Sign"),
	title="Sign Language to Text Converter",
	description="Upload a video of a hand gesture and get the predicted word."
	)

	if __name__ == "__main__":
	iface.launch()