Spaces:
Runtime error
Runtime error
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # This software may be used and distributed according to the terms of the Chameleon License Agreement. | |
| import hashlib | |
| import subprocess | |
| import sys | |
| from pathlib import Path | |
| def download_file(url: str, output_path: Path): | |
| print(f"Downloading {output_path}") | |
| subprocess.check_call(["wget", "--continue", url, "-O", str(output_path)]) | |
| def validate_checksum(folder: Path): | |
| chks_parts = (folder / "checklist.chk").read_text().split() | |
| for expected_checksum, file in zip(chks_parts[::2], chks_parts[1::2]): | |
| file_path = folder / file | |
| checksum = hashlib.md5(file_path.read_bytes()).hexdigest() | |
| if checksum != expected_checksum: | |
| print(f"Checksum mismatch for {file_path}") | |
| sys.exit(1) | |
| def download_tokenizer(presigned_url: str, target_folder: Path): | |
| tokenizer_folder = target_folder / "tokenizer" | |
| tokenizer_folder.mkdir(parents=True, exist_ok=True) | |
| for filename in [ | |
| "text_tokenizer.json", | |
| "vqgan.ckpt", | |
| "vqgan.yaml", | |
| "checklist.chk", | |
| ]: | |
| download_file( | |
| presigned_url.replace("*", f"tokenizer/{filename}"), | |
| tokenizer_folder / filename, | |
| ) | |
| validate_checksum(tokenizer_folder) | |
| def download_model(presigned_url: str, target_folder: Path, model: str): | |
| model_folder = target_folder / "models" / model | |
| model_folder.mkdir(parents=True, exist_ok=True) | |
| download_filenames = ["params.json", "consolidate_params.json", "checklist.chk"] | |
| if model == "7b": | |
| download_filenames += ["consolidated.pth"] | |
| elif model == "30b": | |
| download_filenames += [f"consolidated.{i:02}.pth" for i in range(4)] | |
| else: | |
| print(f"Unknown model: {model}") | |
| sys.exit(1) | |
| for filename in download_filenames: | |
| download_file( | |
| presigned_url.replace("*", f"{model}/{filename}"), | |
| model_folder / filename, | |
| ) | |
| validate_checksum(model_folder) | |
| def main(): | |
| presigned_url = ( | |
| sys.argv[1] if len(sys.argv) > 1 else input("Enter the URL from email: ") | |
| ) | |
| target_folder = Path("./data") | |
| target_folder.mkdir(parents=True, exist_ok=True) | |
| download_tokenizer(presigned_url, target_folder) | |
| model_size = input( | |
| "Enter the list of models to download without spaces (7B,30B), or press Enter for all: " | |
| ) | |
| if not model_size: | |
| model_size = "7B,30B" | |
| for model in model_size.split(","): | |
| model = model.strip().lower() | |
| download_model(presigned_url, target_folder, model) | |
| if __name__ == "__main__": | |
| main() | |