Spaces:
Running
Running
| import os | |
| import zipfile | |
| from typing import List, Optional | |
| import hashlib | |
| def compress_files(file_paths: List[str], output_zip_path: str) -> str: | |
| """ | |
| Compress multiple files into a single zip file. | |
| Args: | |
| file_paths (List[str]): List of paths to files to be compressed. | |
| output_zip (str): Path and name of the output zip file. | |
| Raises: | |
| FileNotFoundError: If any of the input files doesn't exist. | |
| """ | |
| os.makedirs(os.path.dirname(output_zip_path), exist_ok=True) | |
| compression = zipfile.ZIP_DEFLATED | |
| with zipfile.ZipFile(output_zip_path, 'w', compression=compression) as zipf: | |
| for file_path in file_paths: | |
| if not os.path.exists(file_path): | |
| raise FileNotFoundError(f"File not found: {file_path}") | |
| file_name = os.path.basename(file_path) | |
| zipf.write(file_path, file_name) | |
| return output_zip_path | |
| def get_file_hash(file_path: str) -> str: | |
| """Generate the hash of a file using the specified hashing algorithm. It generates hash by content not path. """ | |
| hash_func = hashlib.new("sha256") | |
| try: | |
| with open(file_path, 'rb') as f: | |
| for chunk in iter(lambda: f.read(4096), b""): | |
| hash_func.update(chunk) | |
| return hash_func.hexdigest() | |
| except FileNotFoundError: | |
| return f"File not found: {file_path}" | |
| except Exception as e: | |
| return f"An error occurred: {str(e)}" | |
| def find_file_by_hash(dir_path: str, hash_str: str) -> Optional[str]: | |
| """Get file path from the directory based on its hash""" | |
| if not os.path.exists(dir_path) and os.path.isdir(dir_path): | |
| raise ValueError(f"Directory {dir_path} does not exist") | |
| files = [os.path.join(dir_path, f) for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f))] | |
| for f in files: | |
| f_hash = get_file_hash(f) | |
| if hash_str == f_hash: | |
| return f | |
| return None | |