# Copyright (c) Meta Platforms, Inc. and affiliates. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import numpy as np import torch from collections.abc import Mapping, Sequence from huggingface_hub import hf_hub_download DATAS = ["sample1", "sample1_high_res", "sample1_dino"] def load( name: str = "sonata", download_root: str = None, ): if name in DATAS: print(f"Loading data from HuggingFace: {name} ...") data_path = hf_hub_download( repo_id="pointcept/demo", filename=f"{name}.npz", repo_type="dataset", revision="main", local_dir=download_root or os.path.expanduser("~/.cache/sonata/data"), ) elif os.path.isfile(name): print(f"Loading data in local path: {name} ...") data_path = name else: raise RuntimeError(f"Data {name} not found; available models = {DATAS}") return dict(np.load(data_path)) from torch.utils.data.dataloader import default_collate def collate_fn(batch): """ collate function for point cloud which support dict and list, 'coord' is necessary to determine 'offset' """ if not isinstance(batch, Sequence): raise TypeError(f"{batch.dtype} is not supported.") if isinstance(batch[0], torch.Tensor): return torch.cat(list(batch)) elif isinstance(batch[0], str): # str is also a kind of Sequence, judgement should before Sequence return list(batch) elif isinstance(batch[0], Sequence): for data in batch: data.append(torch.tensor([data[0].shape[0]])) batch = [collate_fn(samples) for samples in zip(*batch)] batch[-1] = torch.cumsum(batch[-1], dim=0).int() return batch elif isinstance(batch[0], Mapping): batch = { key: ( collate_fn([d[key] for d in batch]) if "offset" not in key # offset -> bincount -> concat bincount-> concat offset else torch.cumsum( collate_fn([d[key].diff(prepend=torch.tensor([0])) for d in batch]), dim=0, ) ) for key in batch[0] } return batch else: return default_collate(batch)