import gradio as gr import cv2 import torch from transformers import DetrImageProcessor, DetrForObjectDetection # Cargar el modelo y el procesador processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") def detect_objects(video_path): cap = cv2.VideoCapture(video_path) results = [] while cap.isOpened(): ret, frame = cap.read() if not ret: break # Procesar el frame inputs = processor(images=frame, return_tensors="pt") outputs = model(**inputs) # Extraer las predicciones target_sizes = torch.tensor([frame.shape[:2]]) detections = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0] results.append(detections) cap.release() return results # Crear la interfaz de Gradio iface = gr.Interface(fn=detect_objects, inputs="video", outputs="json") iface.launch()