Spaces:
Sleeping
Sleeping
File size: 1,796 Bytes
4d449f1 59d757a c8bf851 4d449f1 c8bf851 4d449f1 c8bf851 4d449f1 c8bf851 59d757a 86ac4de 59d757a c8bf851 59d757a 4d449f1 59d757a c8bf851 4d449f1 c8bf851 86ac4de c8bf851 4d449f1 c8bf851 4d449f1 c8bf851 86ac4de c8bf851 4d449f1 c8bf851 4d449f1 c8bf851 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import gradio as gr
import torch
from transformers import VideoMAEForVideoClassification, VideoMAEFeatureExtractor
import cv2
import numpy as np
import tempfile
import os
# Load the pre-trained model
model_name = "Sokaina55/xclip-base-patch32-finetuned-ssl-sign-language-recognition"
device = "cuda" if torch.cuda.is_available() else "cpu"
feature_extractor = VideoMAEFeatureExtractor.from_pretrained(model_name)
model = VideoMAEForVideoClassification.from_pretrained(model_name).to(device)
def process_video(video_path):
"""Processes video and predicts sign language word."""
if not os.path.exists(video_path):
return "Error: Video file not found"
# Read video
cap = cv2.VideoCapture(video_path)
frames = []
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frames.append(frame)
cap.release()
if len(frames) == 0:
return "Error: No frames extracted from the video"
# Preprocess frames
inputs = feature_extractor(frames, return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}
# Get predictions
with torch.no_grad():
outputs = model(**inputs)
predicted_class = outputs.logits.argmax(-1).item()
class_labels = model.config.id2label # Map predictions to words
return f"Predicted word: {class_labels.get(predicted_class, 'Unknown')}"
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## Sign Language to Text Recognition")
video_input = gr.Video(label="Upload a sign language video")
output_text = gr.Textbox(label="Predicted Word")
btn = gr.Button("Predict")
btn.click(fn=process_video, inputs=video_input, outputs=output_text)
demo.launch()
|