Source code for graphcalc.additive_combinatorics.dataset_exports

from __future__ import annotations

import json
from pathlib import Path
from typing import Iterable

import pandas as pd

from graphcalc.additive_combinatorics.dataset_generators import (
    additive_dataset_column_definitions,
)

__all__ = [
    "additive_rows_to_dataframe",
    "additive_package_to_dataframe",
    "save_additive_package_csv",
    "save_additive_column_definitions_json",
    "save_additive_metadata_json",
]


[docs] def additive_rows_to_dataframe(rows: Iterable[dict]) -> pd.DataFrame: r""" Convert additive-combinatorics dataset rows to a pandas DataFrame. Parameters ---------- rows : iterable of dict Dataset rows, typically produced by :func:`graphcalc.additive_combinatorics.dataset_generators.generate_additive_set_dataset` or one of the snapshot helpers. Returns ------- pandas.DataFrame DataFrame whose columns are determined by the row keys. Notes ----- This function does not require any specific schema beyond standard dictionary-like dataset rows, but it is intended primarily for use with the additive-combinatorics conjecturing records defined by this package. """ return pd.DataFrame(list(rows))
[docs] def additive_package_to_dataframe(package: dict) -> pd.DataFrame: r""" Convert a dataset package dictionary to a pandas DataFrame. Parameters ---------- package : dict Dataset package containing a ``"rows"`` entry. Returns ------- pandas.DataFrame DataFrame built from ``package["rows"]``. Raises ------ KeyError If the package does not contain a ``"rows"`` entry. TypeError If ``package["rows"]`` is not iterable in the expected way. """ if "rows" not in package: raise KeyError("package must contain a 'rows' entry.") rows = package["rows"] return additive_rows_to_dataframe(rows)
[docs] def save_additive_package_csv(package: dict, path: str | Path, *, index: bool = False) -> Path: r""" Save a dataset package as a CSV file. Parameters ---------- package : dict Dataset package containing a ``"rows"`` entry. path : str or pathlib.Path Output CSV path. index : bool, default=False Whether to write the pandas index column. Returns ------- pathlib.Path The resolved output path that was written. """ output_path = Path(path) df = additive_package_to_dataframe(package) df.to_csv(output_path, index=index) return output_path
[docs] def save_additive_column_definitions_json(path: str | Path, *, indent: int = 2) -> Path: r""" Save the additive dataset column definitions as JSON. Parameters ---------- path : str or pathlib.Path Output JSON path. indent : int, default=2 Indentation level passed to :func:`json.dump`. Returns ------- pathlib.Path The resolved output path that was written. """ output_path = Path(path) definitions = additive_dataset_column_definitions() with output_path.open("w", encoding="utf-8") as fh: json.dump(definitions, fh, indent=indent, sort_keys=True) return output_path
[docs] def save_additive_metadata_json(metadata: dict, path: str | Path, *, indent: int = 2) -> Path: r""" Save dataset metadata as JSON. Parameters ---------- metadata : dict Metadata dictionary to serialize. path : str or pathlib.Path Output JSON path. indent : int, default=2 Indentation level passed to :func:`json.dump`. Returns ------- pathlib.Path The resolved output path that was written. """ output_path = Path(path) with output_path.open("w", encoding="utf-8") as fh: json.dump(metadata, fh, indent=indent, sort_keys=True) return output_path