| import pandas as pd | |
| import re | |
| # Input and output files | |
| input_csv = "results.csv" | |
| output_csv = "submission.csv" | |
| # Load frame-level results | |
| df = pd.read_csv(input_csv) | |
| # β Print the first few rows of results.csv | |
| print("π Preview of results.csv:") | |
| print(df.head(), "\n") # shows the first 5 rows | |
| # Extract base video ID (remove _frame_### and file extension) | |
| def extract_video_id(filename): | |
| # Remove patterns like _frame_1, _frame_2, etc, and optional extensions (.png, .jpg, etc) | |
| return re.sub(r"_frame_\d+(?:\.\w+)?$", "", filename) | |
| df["video_id"] = df["file_name"].apply(extract_video_id) | |
| # Aggregate probabilities per video (mean of frame scores) | |
| video_scores = ( | |
| df.groupby("video_id")["predicted_prob"] | |
| .mean() | |
| .reset_index() | |
| .rename(columns={"video_id": "id", "predicted_prob": "score"}) | |
| ) | |
| # Assign label: generated if >0.5 else real | |
| video_scores["pred"] = video_scores["score"].apply( | |
| lambda x: "generated" if x > 0.15 else "real" | |
| ) | |
| # Reorder columns | |
| video_scores = video_scores[["id", "pred", "score"]] | |
| # Save submission file | |
| video_scores.to_csv(output_csv, index=False) | |
| print(f"β Saved submission file to {output_csv}") | |