Spaces:
Sleeping
Sleeping
| from pathlib import Path | |
| from shutil import copy2, rmtree | |
| from fastcore.utils import L | |
| import re | |
| from src.fabrics_processor.config import config | |
| def sentence2snake(name: str) -> str: | |
| """Convert any string to snake_case, replacing non-alphanumeric with underscore""" | |
| s1 = name.lower() | |
| s2 = re.sub(r'\W', r'_', s1) | |
| return re.sub(r'_+', r'_', s2) | |
| def round_timestamp(ts: float) -> int: | |
| """Round timestamp to handle filesystem differences""" | |
| return int(str(ts).split('.')[0][:-4]) | |
| def get_md_files_obsidian(path: Path) -> dict: | |
| """Get files from obsidian vault: stem -> (path, timestamp, size)""" | |
| # Rename files to snake_case and add identifier to distinguish own prompts from others | |
| return {sentence2snake(p.stem)+"-"+p.parent.name.lower(): (p, p.stat().st_mtime, p.stat().st_size) | |
| for p in Path(path).glob('**/*.md')} | |
| def get_md_files_fabricsfolder(path: Path) -> dict: | |
| """Get files from target structure: dir_name -> (system.md_path, timestamp, size)""" | |
| target_subdirs = [x for x in path.iterdir() if x.is_dir()] | |
| return {x.stem: (x/'system.md', (x/'system.md').stat().st_mtime, (x/'system.md').stat().st_size) | |
| for x in target_subdirs | |
| if (x/'system.md').exists()} | |
| def get_modified_files(source_files: dict, target_files: dict) -> list: | |
| """Compare timestamps between source and target files, returns dictionary of | |
| entries needing updates. The dictionary has the filename as key and the following | |
| values: | |
| path, timestamp, size""" | |
| existing_files = L(k for k in source_files.keys() if k in target_files) | |
| # removed checking for timestamp. Because you don't want false positives because of file system differences | |
| # or daylight savings. But you also want to be able to update files that have almost the same timestamp | |
| # when you change the file. | |
| # different_timestamps = L(k for k in existing_files | |
| # if round_timestamp(source_files[k][1]) > round_timestamp(target_files[k][1])) | |
| # return L(source_files[k][0] for k in different_timestamps | |
| # if source_files[k][2] != target_files[k][2]) | |
| return L({k: source_files[k]} for k in existing_files if source_files[k][2] != target_files[k][2]) | |
| def get_new_files(source_files: dict, target_files: dict) -> list: | |
| """Return list of dictionaries containing with the key as filename and these values: | |
| path, timestamp, size""" | |
| return L({k: source_files[k]} for k in source_files.keys() if k not in target_files) | |
| def process_file(source: dict, target_dir: Path) -> None: | |
| """ | |
| Process a single file: create directory, copy as system.md, create user.md | |
| Args: | |
| source: Dict of source file: filename:(path, timestamp, size) | |
| target_dir: Base target directory (e.g. 'md_target') | |
| """ | |
| filename = next(iter(source)) | |
| filepath = next(iter(source.values()))[0] | |
| subdir = target_dir/filename | |
| subdir.mkdir(mode=0o755, exist_ok=True) | |
| copy2(filepath, subdir/'system.md') | |
| (subdir/'user.md').touch() | |
| def sync_folders(source_dir: Path, target_dir: Path) -> None: | |
| """ | |
| Main function to synchronize folders | |
| Args: | |
| source_dir: Path to source directory (obsidian vault) | |
| target_dir: Path to target directory (fabrics folder) | |
| """ | |
| source_files = get_md_files_obsidian(Path(source_dir)) | |
| target_files = get_md_files_fabricsfolder(Path(target_dir)) | |
| # Get all files that need processing | |
| files_to_process = L(get_new_files(source_files, target_files) + | |
| get_modified_files(source_files, target_files)) | |
| # Process each file | |
| for i in files_to_process: | |
| process_file(i, target_dir) | |
| # Get all files that need deleting | |
| files_to_delete = L(k for k in target_files.keys() if k not in source_files and "-" in k) | |
| # Delete each directory and its contents | |
| for file_name in files_to_delete: | |
| rmtree(target_dir/file_name) |