Spaces:
Runtime error
Runtime error
| #@title Get bounding boxes for the subject | |
| from transformers import pipeline | |
| from moviepy.editor import VideoFileClip | |
| from PIL import Image | |
| import os | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import tqdm | |
| import pickle | |
| import torch | |
| checkpoint = "google/owlvit-large-patch14" | |
| detector = pipeline(model=checkpoint, task="zero-shot-object-detection", device='cuda:0') | |
| def get_bounding_boxes(clip_path, subject): | |
| # Read video from the path | |
| clip = VideoFileClip(clip_path) | |
| all_bboxes = [] | |
| bbox_present = [] | |
| num_bb = 0 | |
| for fidx,frame in enumerate(clip.iter_frames()): | |
| frame = Image.fromarray(frame) | |
| predictions = detector( | |
| frame, | |
| candidate_labels=[subject,], | |
| ) | |
| try: | |
| bbox = predictions[0]["box"] | |
| bbox = (bbox["xmin"], bbox["ymin"], bbox["xmax"], bbox["ymax"]) | |
| # Get a zeros array of the same size as the frame | |
| canvas = np.zeros(frame.size[::-1]) | |
| # Draw the bounding box on the canvas | |
| canvas[bbox[1]:bbox[3], bbox[0]:bbox[2]] = 1 | |
| # Add the canvas to the list of bounding boxes | |
| all_bboxes.append(canvas) | |
| bbox_present.append(True) | |
| num_bb += 1 | |
| except Exception as e: | |
| # Append an empty canvas, we will interpolate later | |
| all_bboxes.append(np.zeros(frame.size[::-1])) | |
| bbox_present.append(False) | |
| continue | |
| return all_bboxes, num_bb | |
| import pickle as pkl | |
| dir_path = '/your/result/path' | |
| video_filename = '2_of_40_2.mp4' | |
| output_bbox = [] | |
| with open("/ssv2dataset/path.pkl", "rb") as f: | |
| data = pkl.load(f) | |
| dataset_size = len(data) | |
| failed_cnt = 0 | |
| for i, d in tqdm.tqdm(enumerate(data)): | |
| try: | |
| # print(f"{d['subject']} || {d['caption']} || {d['video']}") | |
| filename = d['video'].split('.')[0] | |
| video_path = os.path.join(dir_path, filename, video_filename) | |
| fg_object = d['subject'] | |
| masks, num_bb = get_bounding_boxes(video_path, fg_object) | |
| output_bbox.append({ | |
| 'caption': d['caption'], | |
| 'video': d['video'], | |
| 'subject': d['subject'], | |
| 'mask': masks, | |
| 'num_bb': num_bb | |
| }) | |
| # print(num_bb) | |
| except: | |
| print(f"Missed #{i} with Caption: {d['caption']}") | |
| failed_cnt += 1 | |
| with open(f"/output/path/iou_eval/ssv2_modelscope_{video_filename.split('.')[0]}_bbox-v2.pkl", "wb") as f: | |
| pkl.dump(output_bbox, f) | |
| print(f"Failed: {failed_cnt} out of {dataset_size}") |