import pandas as pd import re # Input and output files input_csv = "results.csv" output_csv = "submission.csv" # Load frame-level results df = pd.read_csv(input_csv) # ✅ Print the first few rows of results.csv print("📄 Preview of results.csv:") print(df.head(), "\n") # shows the first 5 rows # Extract base video ID (remove _frame_### and file extension) def extract_video_id(filename): # Remove patterns like _frame_1, _frame_2, etc, and optional extensions (.png, .jpg, etc) return re.sub(r"_frame_\d+(?:\.\w+)?$", "", filename) df["video_id"] = df["file_name"].apply(extract_video_id) # Aggregate probabilities per video (mean of frame scores) video_scores = ( df.groupby("video_id")["predicted_prob"] .mean() .reset_index() .rename(columns={"video_id": "id", "predicted_prob": "score"}) ) # Assign label: generated if >0.5 else real video_scores["pred"] = video_scores["score"].apply( lambda x: "generated" if x > 0.15 else "real" ) # Reorder columns video_scores = video_scores[["id", "pred", "score"]] # Save submission file video_scores.to_csv(output_csv, index=False) print(f"✅ Saved submission file to {output_csv}")