File size: 1,266 Bytes
5f58699
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
"""I/O helpers for reading and writing artifacts."""

from __future__ import annotations

from pathlib import Path
from typing import Any

import pandas as pd


def read_table(path: str | Path, **kwargs: Any) -> pd.DataFrame:
    """Read CSV or JSONL files into a DataFrame."""

    path = Path(path)
    if not path.exists():
        msg = f"Input file does not exist: {path}"
        raise FileNotFoundError(msg)
    suffix = path.suffix.lower()
    if suffix in {".jsonl", ".json"}:
        return pd.read_json(path, lines=True, **kwargs)
    if suffix in {".csv", ""}:
        return pd.read_csv(path, **kwargs)
    msg = f"Unsupported file extension: {suffix}"
    raise ValueError(msg)


def write_table(frame: pd.DataFrame, path: str | Path, *, index: bool = False, **kwargs: Any) -> None:
    """Persist a DataFrame as CSV or JSONL, creating directories as needed."""

    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)
    suffix = path.suffix.lower()
    if suffix in {".jsonl", ".json"}:
        frame.to_json(path, orient="records", lines=True, **kwargs)
    elif suffix in {".csv", ""}:
        frame.to_csv(path, index=index, **kwargs)
    else:
        msg = f"Unsupported file extension: {suffix}"
        raise ValueError(msg)