# test.py — In-process callable version (for ZeroGPU stateless) # Keep original logic; add build_parser(), run_cli(args_list), and run_inference(args) # Do NOT initialize CUDA at import-time. import os from os import path from argparse import ArgumentParser import shutil # 不在这里做 @spaces.GPU 装饰,避免与 app.py 的 @spaces.GPU 双重调度 # import spaces import torch import torch.nn.functional as F from torch.utils.data import DataLoader import numpy as np from PIL import Image from inference.data.test_datasets import DAVISTestDataset_221128_TransColorization_batch from inference.data.mask_mapper import MaskMapper from model.network import ColorMNet from inference.inference_core import InferenceCore from progressbar import progressbar from dataset.range_transform import inv_im_trans, inv_lll2rgb_trans from skimage import color, io import cv2 try: import hickle as hkl except ImportError: print('Failed to import hickle. Fine if not using multi-scale testing.') # ----------------- small utils ----------------- def detach_to_cpu(x): return x.detach().cpu() def tensor_to_np_float(image): image_np = image.numpy().astype('float32') return image_np def lab2rgb_transform_PIL(mask): mask_d = detach_to_cpu(mask) mask_d = inv_lll2rgb_trans(mask_d) im = tensor_to_np_float(mask_d) if len(im.shape) == 3: im = im.transpose((1, 2, 0)) else: im = im[:, :, None] im = color.lab2rgb(im) return im.clip(0, 1) # ----------------- argparse ----------------- def build_parser() -> ArgumentParser: parser = ArgumentParser() parser.add_argument('--model', default='saves/DINOv2FeatureV6_LocalAtten_s2_154000.pth') parser.add_argument('--FirstFrameIsNotExemplar', help='Whether the provided reference frame is exactly the first input frame', action='store_true') # dataset setting parser.add_argument('--d16_batch_path', default='input', help='Point to folder A/ which contains /00000.png etc.') parser.add_argument('--ref_path', default='ref', help='Kept for parity; dataset will also read ref.png under each video folder when args provided') parser.add_argument('--output', default='result', help='Directory to save results') parser.add_argument('--reverse', default=False, action='store_true', help='whether to reverse the frame order') parser.add_argument('--allow_resume', action='store_true', help='skip existing videos that have been colorized') # For generic (G) evaluation, point to a folder that contains "JPEGImages" and "Annotations" parser.add_argument('--generic_path') parser.add_argument('--dataset', help='D16/D17/Y18/Y19/LV1/LV3/G', default='D16_batch') parser.add_argument('--split', help='val/test', default='val') parser.add_argument('--save_all', action='store_true', help='Save all frames. Useful only in YouTubeVOS/long-time video') parser.add_argument('--benchmark', action='store_true', help='enable to disable amp for FPS benchmarking') # Long-term memory options parser.add_argument('--disable_long_term', action='store_true') parser.add_argument('--max_mid_term_frames', help='T_max in paper, decrease to save memory', type=int, default=10) parser.add_argument('--min_mid_term_frames', help='T_min in paper, decrease to save memory', type=int, default=5) parser.add_argument('--max_long_term_elements', help='LT_max in paper, increase if objects disappear for a long time', type=int, default=10000) parser.add_argument('--num_prototypes', help='P in paper', type=int, default=128) parser.add_argument('--top_k', type=int, default=30) parser.add_argument('--mem_every', help='r in paper. Increase to improve running speed.', type=int, default=5) parser.add_argument('--deep_update_every', help='Leave -1 normally to synchronize with mem_every', type=int, default=-1) # Multi-scale options parser.add_argument('--save_scores', action='store_true') parser.add_argument('--flip', action='store_true') parser.add_argument('--size', default=-1, type=int, help='Resize the shorter side to this size. -1 to use original resolution. ') return parser # ----------------- core inference ----------------- def run_inference(args): """ 真正的推理流程。必须在 ZeroGPU 的调度上下文里被调用(由 app.py 的 @spaces.GPU 包裹)。 不要在导入模块时做任何 CUDA 初始化。 """ config = vars(args) config['enable_long_term'] = not config['disable_long_term'] if args.output is None: args.output = f'.output/{args.dataset}_{args.split}' print(f'Output path not provided. Defaulting to {args.output}') # ----- Data preparation ----- is_youtube = args.dataset.startswith('Y') is_davis = args.dataset.startswith('D') is_lv = args.dataset.startswith('LV') if is_youtube or args.save_scores: out_path = path.join(args.output, 'Annotations') else: out_path = args.output if args.split != 'val': raise NotImplementedError('Only split=val is supported in this script.') # 数据集:支持 A/