File size: 5,841 Bytes

import sys
import cv2
import os
from datasets import load_dataset

sys.path.append('Effort-AIGI-Detection/DeepfakeBench/training')

from effort_inf import infer_imgs
from tqdm import tqdm
print(infer_imgs([cv2.imread('Effort-AIGI-Detection/figs/deepfake_tab1.png')]))

local_eval = 'qafigs_machine' in os.environ

def get_first_n_frames(video_path, n):
    frames = []
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        raise IOError(f"Cannot open video file: {video_path}")

    count = 0
    while count < n:
        ret, frame = cap.read()
        if not ret:
            break  # stop if video ends before n frames
        frames.append(frame)
        count += 1

    cap.release()
    return frames

import cv2
import random

def get_random_n_frames(video_path, n):
    frames = []
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        raise IOError(f"Cannot open video file: {video_path}")

    # total number of frames in the video
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if n > total_frames:
        raise ValueError(f"Video only has {total_frames} frames, but {n} requested")

    # pick n unique random frame indices
    frame_indices = sorted(random.sample(range(total_frames), n))

    for idx in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)  # jump to frame index
        ret, frame = cap.read()
        if not ret:
            continue
        frames.append(frame)

    cap.release()
    return frames
from pathlib import Path
from datetime import datetime
import uuid
from typing import Optional, Union

def save_video_bytes(
    video_bytes: bytes,
    original_remote_path: Union[str, Path],
    dest_dir: Optional[Union[str, Path]] = None,
    prefer_original_name: bool = True,
) -> str:
    """
    Save raw video bytes to a local file, preserving the original extension(s).
    
    Args:
        video_bytes: The raw bytes of the video.
        original_remote_path: The file path/name from the other system
                              (used only to extract the extension and optionally the stem).
        dest_dir: Directory to save into (defaults to current working directory).
        prefer_original_name: If True, try to use the original filename; if it exists,
                              a unique suffix will be appended. If False, always generate
                              a new UUID-based name.
    Returns:
        The absolute path (as a string) to the saved local file.
    """
    if not isinstance(video_bytes, (bytes, bytearray)):
        raise TypeError("video_bytes must be bytes or bytearray")

    remote = Path(str(original_remote_path))
    # Keep compound suffixes if any (e.g., '.tar.gz'); for videos it's typically one suffix like '.mp4'
    compound_suffix = "".join(remote.suffixes)
    if not compound_suffix:
        raise ValueError("Original path has no extension; cannot determine output format.")

    # Choose destination directory
    out_dir = Path(dest_dir) if dest_dir is not None else Path.cwd()
    out_dir.mkdir(parents=True, exist_ok=True)

    # Build a candidate filename
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    if prefer_original_name and remote.stem:
        base_stem = remote.stem
    else:
        # short UUID to keep names readable
        base_stem = f"video-{uuid.uuid4().hex[:8]}"

    candidate = out_dir / f"{base_stem}{compound_suffix}"

    # Ensure we don't overwrite existing files; if it exists, append a unique suffix
    if candidate.exists():
        candidate = out_dir / f"{base_stem}-{timestamp}-{uuid.uuid4().hex[:6]}{compound_suffix}"

    # Write atomically-like: open with 'xb' to fail if somehow it still exists between the check and write
    try:
        with open(candidate, "xb") as f:
            f.write(video_bytes)
    except FileExistsError:
        # Extremely rare race; retry with a guaranteed-unique name
        candidate = out_dir / f"{base_stem}-{timestamp}-{uuid.uuid4().hex}{compound_suffix}"
        with open(candidate, "xb") as f:
            f.write(video_bytes)

    return str(candidate.resolve())


def predict_video(bytes, path, n=30):
    # first save the video somewhere
    local_path = save_video_bytes(bytes, path)
    frames = get_random_n_frames(local_path, n)
    all_probs = infer_imgs(frames)
    final_probs = sum(all_probs)/len(all_probs)
    os.remove(local_path)
    return 'real' if final_probs < 0.47 else 'generated', final_probs
    
test = False
# if 'qafigs_machine' in os.environ:
if test:
    path = '/home/rayan.banerjee/Downloads/real video.mp4'
    vbytes = open(path, 'rb')
    print(predict_video(vbytes.read(), path))
else:
    DATASET_PATH = "/tmp/data" if not local_eval else '/l/users/rayan.banerjee/safevideo/live_dataset'
    dataset_remote = load_dataset(DATASET_PATH, split="test", streaming=True)
    print(dataset_remote)
    ids = []
    preds = []
    scores = []
    times = []
    import time
    loaded = False
    for el in tqdm(dataset_remote, total = len([el for el in dataset_remote])):
        start_time = time.time_ns()
        try:
            pred, score = predict_video(el['video']['bytes'], el['video']['path'])
            loaded = True
        except:
            pred = 'generated'
            score = 1
        score = score * 2 - 1
        id = el['id']
        times.append((time.time_ns() - start_time)/10e9)
        ids.append(id)
        scores.append(score)
        preds.append(pred)
    import pandas as pd
    pd.DataFrame({
        'id': ids,
        'pred': preds,
        'score': scores,
        'time': times
    }).to_csv('submission.csv', index=False)

    if not loaded:
        # this means none of our submissions worked, we will throw an error just for the sake of debugging
        raise Exception('Failed to load a single video')