Spaces:
Running
on
Zero
Running
on
Zero
| # Copyright (C) 2022-present Naver Corporation. All rights reserved. | |
| # Licensed under CC BY-NC-SA 4.0 (non-commercial use only). | |
| """ | |
| Utility script to pack metadata files of the dataset in order to be able to re-generate it elsewhere. | |
| """ | |
| import os | |
| import glob | |
| from tqdm import tqdm | |
| import shutil | |
| import json | |
| from datasets.habitat_sim.paths import * | |
| import argparse | |
| import collections | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("input_dir") | |
| parser.add_argument("output_dir") | |
| args = parser.parse_args() | |
| input_dirname = args.input_dir | |
| output_dirname = args.output_dir | |
| input_metadata_filenames = glob.iglob( | |
| f"{input_dirname}/**/metadata.json", recursive=True | |
| ) | |
| images_count = collections.defaultdict(lambda: 0) | |
| os.makedirs(output_dirname) | |
| for input_filename in tqdm(input_metadata_filenames): | |
| # Ignore empty files | |
| with open(input_filename, "r") as f: | |
| original_metadata = json.load(f) | |
| if ( | |
| "multiviews" not in original_metadata | |
| or len(original_metadata["multiviews"]) == 0 | |
| ): | |
| print("No views in", input_filename) | |
| continue | |
| relpath = os.path.relpath(input_filename, input_dirname) | |
| print(relpath) | |
| # Copy metadata, while replacing scene paths by generic keys depending on the dataset, for portability. | |
| # Data paths are sorted by decreasing length to avoid potential bugs due to paths starting by the same string pattern. | |
| scenes_dataset_paths = dict( | |
| sorted(SCENES_DATASET.items(), key=lambda x: len(x[1]), reverse=True) | |
| ) | |
| metadata = dict() | |
| for key, value in original_metadata.items(): | |
| if key in ("scene_dataset_config_file", "scene", "navmesh") and value != "": | |
| known_path = False | |
| for dataset, dataset_path in scenes_dataset_paths.items(): | |
| if value.startswith(dataset_path): | |
| value = os.path.join( | |
| dataset, os.path.relpath(value, dataset_path) | |
| ) | |
| known_path = True | |
| break | |
| if not known_path: | |
| raise KeyError("Unknown path:" + value) | |
| metadata[key] = value | |
| # Compile some general statistics while packing data | |
| scene_split = metadata["scene"].split("/") | |
| upper_level = ( | |
| "/".join(scene_split[:2]) if scene_split[0] == "hm3d" else scene_split[0] | |
| ) | |
| images_count[upper_level] += len(metadata["multiviews"]) | |
| output_filename = os.path.join(output_dirname, relpath) | |
| os.makedirs(os.path.dirname(output_filename), exist_ok=True) | |
| with open(output_filename, "w") as f: | |
| json.dump(metadata, f) | |
| # Print statistics | |
| print("Images count:") | |
| for upper_level, count in images_count.items(): | |
| print(f"- {upper_level}: {count}") | |