eft_trained / script.py
Qafig's picture
Upload folder using huggingface_hub
5231cb0 verified
import sys
import cv2
import os
from datasets import load_dataset
sys.path.append('Effort-AIGI-Detection/DeepfakeBench/training')
from effort_inf import infer_imgs
from tqdm import tqdm
print(infer_imgs([cv2.imread('Effort-AIGI-Detection/figs/deepfake_tab1.png')]))
local_eval = 'qafigs_machine' in os.environ
def get_first_n_frames(video_path, n):
frames = []
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise IOError(f"Cannot open video file: {video_path}")
count = 0
while count < n:
ret, frame = cap.read()
if not ret:
break # stop if video ends before n frames
frames.append(frame)
count += 1
cap.release()
return frames
import cv2
import random
def get_random_n_frames(video_path, n):
frames = []
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise IOError(f"Cannot open video file: {video_path}")
# total number of frames in the video
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if n > total_frames:
raise ValueError(f"Video only has {total_frames} frames, but {n} requested")
# pick n unique random frame indices
frame_indices = sorted(random.sample(range(total_frames), n))
for idx in frame_indices:
cap.set(cv2.CAP_PROP_POS_FRAMES, idx) # jump to frame index
ret, frame = cap.read()
if not ret:
continue
frames.append(frame)
cap.release()
return frames
from pathlib import Path
from datetime import datetime
import uuid
from typing import Optional, Union
def save_video_bytes(
video_bytes: bytes,
original_remote_path: Union[str, Path],
dest_dir: Optional[Union[str, Path]] = None,
prefer_original_name: bool = True,
) -> str:
"""
Save raw video bytes to a local file, preserving the original extension(s).
Args:
video_bytes: The raw bytes of the video.
original_remote_path: The file path/name from the other system
(used only to extract the extension and optionally the stem).
dest_dir: Directory to save into (defaults to current working directory).
prefer_original_name: If True, try to use the original filename; if it exists,
a unique suffix will be appended. If False, always generate
a new UUID-based name.
Returns:
The absolute path (as a string) to the saved local file.
"""
if not isinstance(video_bytes, (bytes, bytearray)):
raise TypeError("video_bytes must be bytes or bytearray")
remote = Path(str(original_remote_path))
# Keep compound suffixes if any (e.g., '.tar.gz'); for videos it's typically one suffix like '.mp4'
compound_suffix = "".join(remote.suffixes)
if not compound_suffix:
raise ValueError("Original path has no extension; cannot determine output format.")
# Choose destination directory
out_dir = Path(dest_dir) if dest_dir is not None else Path.cwd()
out_dir.mkdir(parents=True, exist_ok=True)
# Build a candidate filename
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
if prefer_original_name and remote.stem:
base_stem = remote.stem
else:
# short UUID to keep names readable
base_stem = f"video-{uuid.uuid4().hex[:8]}"
candidate = out_dir / f"{base_stem}{compound_suffix}"
# Ensure we don't overwrite existing files; if it exists, append a unique suffix
if candidate.exists():
candidate = out_dir / f"{base_stem}-{timestamp}-{uuid.uuid4().hex[:6]}{compound_suffix}"
# Write atomically-like: open with 'xb' to fail if somehow it still exists between the check and write
try:
with open(candidate, "xb") as f:
f.write(video_bytes)
except FileExistsError:
# Extremely rare race; retry with a guaranteed-unique name
candidate = out_dir / f"{base_stem}-{timestamp}-{uuid.uuid4().hex}{compound_suffix}"
with open(candidate, "xb") as f:
f.write(video_bytes)
return str(candidate.resolve())
def predict_video(bytes, path, n=30):
# first save the video somewhere
local_path = save_video_bytes(bytes, path)
frames = get_random_n_frames(local_path, n)
all_probs = infer_imgs(frames)
final_probs = sum(all_probs)/len(all_probs)
os.remove(local_path)
return 'real' if final_probs < 0.47 else 'generated', final_probs
test = False
# if 'qafigs_machine' in os.environ:
if test:
path = '/home/rayan.banerjee/Downloads/real video.mp4'
vbytes = open(path, 'rb')
print(predict_video(vbytes.read(), path))
else:
DATASET_PATH = "/tmp/data" if not local_eval else '/l/users/rayan.banerjee/safevideo/live_dataset'
dataset_remote = load_dataset(DATASET_PATH, split="test", streaming=True)
print(dataset_remote)
ids = []
preds = []
scores = []
times = []
import time
loaded = False
for el in tqdm(dataset_remote, total = len([el for el in dataset_remote])):
start_time = time.time_ns()
try:
pred, score = predict_video(el['video']['bytes'], el['video']['path'])
loaded = True
except:
pred = 'generated'
score = 1
score = score * 2 - 1
id = el['id']
times.append((time.time_ns() - start_time)/10e9)
ids.append(id)
scores.append(score)
preds.append(pred)
import pandas as pd
pd.DataFrame({
'id': ids,
'pred': preds,
'score': scores,
'time': times
}).to_csv('submission.csv', index=False)
if not loaded:
# this means none of our submissions worked, we will throw an error just for the sake of debugging
raise Exception('Failed to load a single video')