Source code for graphcalc.additive_combinatorics.dataset_generators

from __future__ import annotations

from typing import Iterable, Sequence

import numpy as np

from graphcalc.additive_combinatorics.ambient_groups import FiniteAbelianGroup
from graphcalc.additive_combinatorics.generators import (
    all_subsets_of_group,
    arithmetic_progression,
    coset,
    empty_set,
    interval,
    random_subset,
    random_symmetric_subset,
    sample_random_subsets,
    singleton,
    subgroup_from_generators,
    union_of_cosets,
    whole_group,
)
from graphcalc.additive_combinatorics.invariants import (
    additive_energy,
    cardinality,
    diffset_defect,
    diffset_density,
    diffset_size,
    difference_constant,
    doubling_constant,
    doubling_minus_difference,
    energy_over_sumset_square,
    max_difference_representation_count,
    max_sum_representation_count,
    normalized_additive_energy,
    stabilizer_index,
    stabilizer_size,
    stabilizer_size_of_sumset,
    sumset_defect,
    sumset_density,
    sumset_size,
    sumset_stabilizer_index,
    tripling_constant,
)
from graphcalc.additive_combinatorics.properties import (
    contains_zero,
    has_full_diffset,
    has_full_sumset,
    has_small_doubling,
    is_aperiodic,
    is_coset,
    is_difference_larger_than_sumset,
    is_doubling_at_most_2,
    is_doubling_at_most_3_over_2,
    is_empty_set,
    is_periodic,
    is_sidon,
    is_subgroup,
    is_sum_free,
    is_symmetric,
    is_trivial_set,
    is_whole_group,
    sumset_is_periodic,
)
from graphcalc.additive_combinatorics.sets import AdditiveSet

__all__ = [
    "additive_dataset_column_definitions",
    "additive_set_to_record",
    "generate_additive_set_dataset",
    "small_additive_snapshot",
    "medium_additive_snapshot",
    "large_additive_snapshot",
]


[docs] def additive_dataset_column_definitions() -> dict[str, dict]: r""" Return the schema for additive-combinatorics conjecturing datasets. Returns ------- dict of str to dict Dictionary keyed by column name. Each value is a metadata record describing the column type and intended meaning. Notes ----- The schema is designed for conjecturing workflows in which each row describes one finite additive set together with ambient-group descriptors, numerical invariants, Boolean predicates, and optional provenance information. The returned dictionary is intended to be stable enough for downstream export helpers and dataframe construction. """ return { "ambient_moduli": { "type": "object", "description": "Tuple of cyclic moduli defining the ambient finite abelian group.", }, "ambient_rank": { "type": "int", "description": "Number of cyclic factors in the ambient group.", }, "ambient_order": { "type": "int", "description": "Cardinality of the ambient finite abelian group.", }, "elements": { "type": "object", "description": "Canonical tuple of elements of the additive set.", }, "cardinality": { "type": "int", "description": "Cardinality |A| of the additive set.", }, "sumset_size": { "type": "int", "description": "Cardinality |A+A| of the self-sumset.", }, "diffset_size": { "type": "int", "description": "Cardinality |A-A| of the self-difference set.", }, "doubling_constant": { "type": "float", "description": "Ratio |A+A| / |A| for nonempty sets, else None.", }, "difference_constant": { "type": "float", "description": "Ratio |A-A| / |A| for nonempty sets, else None.", }, "tripling_constant": { "type": "float", "description": "Ratio |A+A+A| / |A| for nonempty sets, else None.", }, "sumset_defect": { "type": "int", "description": "Difference |A+A| - |A|.", }, "diffset_defect": { "type": "int", "description": "Difference |A-A| - |A|.", }, "doubling_minus_difference": { "type": "int", "description": "Difference |A+A| - |A-A|.", }, "sumset_density": { "type": "float", "description": "Relative sumset size |A+A| / |G|.", }, "diffset_density": { "type": "float", "description": "Relative difference-set size |A-A| / |G|.", }, "additive_energy": { "type": "int", "description": "Additive energy E(A).", }, "normalized_additive_energy": { "type": "float", "description": "Ratio E(A) / |A|^3 for nonempty sets, else None.", }, "energy_over_sumset_square": { "type": "float", "description": "Ratio E(A) / |A+A|^2 when |A+A| > 0, else None.", }, "max_sum_representation_count": { "type": "int", "description": "Maximum ordered representation count in A+A.", }, "max_difference_representation_count": { "type": "int", "description": "Maximum ordered representation count in A-A.", }, "stabilizer_size": { "type": "int", "description": "Cardinality of Stab(A).", }, "stabilizer_size_of_sumset": { "type": "int", "description": "Cardinality of Stab(A+A).", }, "stabilizer_index": { "type": "int", "description": "Index |G| / |Stab(A)|.", }, "sumset_stabilizer_index": { "type": "int", "description": "Index |G| / |Stab(A+A)|.", }, "contains_zero": { "type": "bool", "description": "Whether the ambient identity belongs to A.", }, "is_empty_set": { "type": "bool", "description": "Whether A is empty.", }, "is_trivial_set": { "type": "bool", "description": "Whether |A| <= 1.", }, "is_whole_group": { "type": "bool", "description": "Whether A equals the whole ambient group.", }, "is_symmetric": { "type": "bool", "description": "Whether A = -A.", }, "is_subgroup": { "type": "bool", "description": "Whether A is a subgroup of the ambient group.", }, "is_coset": { "type": "bool", "description": "Whether A is a coset of a subgroup.", }, "is_sum_free": { "type": "bool", "description": "Whether (A+A) ∩ A is empty.", }, "is_sidon": { "type": "bool", "description": "Whether A is a Sidon set.", }, "has_small_doubling_3_over_2": { "type": "bool", "description": "Whether |A+A| <= (3/2)|A|.", }, "has_small_doubling_2": { "type": "bool", "description": "Whether |A+A| <= 2|A|.", }, "has_full_sumset": { "type": "bool", "description": "Whether A+A equals the ambient group.", }, "has_full_diffset": { "type": "bool", "description": "Whether A-A equals the ambient group.", }, "is_difference_larger_than_sumset": { "type": "bool", "description": "Whether |A-A| > |A+A|.", }, "is_periodic": { "type": "bool", "description": "Whether A has nontrivial stabilizer.", }, "is_aperiodic": { "type": "bool", "description": "Whether A has trivial stabilizer.", }, "sumset_is_periodic": { "type": "bool", "description": "Whether A+A has nontrivial stabilizer.", }, "construction": { "type": "object", "description": "Optional label describing how the set was generated.", }, "construction_parameters": { "type": "object", "description": "Optional parameter dictionary describing the construction.", }, }
[docs] def additive_set_to_record( A: AdditiveSet, *, construction: str | None = None, construction_parameters: dict | None = None, ) -> dict: r""" Convert an additive set into a conjecturing-oriented record. Parameters ---------- A : AdditiveSet Input additive set. construction : str, optional Optional label describing how the set was generated. construction_parameters : dict, optional Optional parameter dictionary recording the construction choices. Returns ------- dict Dictionary containing ambient descriptors, numerical invariants, Boolean predicates, and optional provenance metadata. Notes ----- Quantities that are undefined on the empty set, such as :math:`|A+A|/|A|`, are stored as ``None`` instead of raising an exception. This makes the resulting records easier to place into a pandas dataframe. """ nonempty = not A.is_empty sumset_sz = A.sumset().size return { "ambient_moduli": A.group.moduli, "ambient_rank": A.group.rank, "ambient_order": A.group.order, "elements": A.elements, "cardinality": cardinality(A), "sumset_size": sumset_size(A), "diffset_size": diffset_size(A), "doubling_constant": doubling_constant(A) if nonempty else None, "difference_constant": difference_constant(A) if nonempty else None, "tripling_constant": tripling_constant(A) if nonempty else None, "sumset_defect": sumset_defect(A), "diffset_defect": diffset_defect(A), "doubling_minus_difference": doubling_minus_difference(A), "sumset_density": sumset_density(A), "diffset_density": diffset_density(A), "additive_energy": additive_energy(A), "normalized_additive_energy": normalized_additive_energy(A) if nonempty else None, "energy_over_sumset_square": energy_over_sumset_square(A) if sumset_sz > 0 else None, "max_sum_representation_count": max_sum_representation_count(A), "max_difference_representation_count": max_difference_representation_count(A), "stabilizer_size": stabilizer_size(A), "stabilizer_size_of_sumset": stabilizer_size_of_sumset(A), "stabilizer_index": stabilizer_index(A), "sumset_stabilizer_index": sumset_stabilizer_index(A), "contains_zero": contains_zero(A), "is_empty_set": is_empty_set(A), "is_trivial_set": is_trivial_set(A), "is_whole_group": is_whole_group(A), "is_symmetric": is_symmetric(A), "is_subgroup": is_subgroup(A), "is_coset": is_coset(A), "is_sum_free": is_sum_free(A), "is_sidon": is_sidon(A), "has_small_doubling_3_over_2": is_doubling_at_most_3_over_2(A), "has_small_doubling_2": is_doubling_at_most_2(A), "has_full_sumset": has_full_sumset(A), "has_full_diffset": has_full_diffset(A), "is_difference_larger_than_sumset": is_difference_larger_than_sumset(A), "is_periodic": is_periodic(A), "is_aperiodic": is_aperiodic(A), "sumset_is_periodic": sumset_is_periodic(A), "construction": construction, "construction_parameters": construction_parameters, }
[docs] def generate_additive_set_dataset( instances: Iterable[ AdditiveSet | tuple[AdditiveSet, str] | tuple[AdditiveSet, str, dict] ], ) -> list[dict]: r""" Convert a collection of additive sets into dataset rows. Parameters ---------- instances : iterable Iterable whose entries are one of: - an :class:`AdditiveSet`, - a pair ``(A, construction)``, or - a triple ``(A, construction, construction_parameters)``. Returns ------- list of dict Dataset rows obtained by applying :func:`additive_set_to_record`. Raises ------ TypeError If an entry is not in one of the accepted formats. """ rows: list[dict] = [] for item in instances: if isinstance(item, AdditiveSet): rows.append(additive_set_to_record(item)) elif isinstance(item, tuple) and len(item) == 2: A, construction = item if not isinstance(A, AdditiveSet): raise TypeError("Dataset tuple entries must begin with an AdditiveSet.") rows.append(additive_set_to_record(A, construction=construction)) elif isinstance(item, tuple) and len(item) == 3: A, construction, construction_parameters = item if not isinstance(A, AdditiveSet): raise TypeError("Dataset tuple entries must begin with an AdditiveSet.") rows.append( additive_set_to_record( A, construction=construction, construction_parameters=construction_parameters, ) ) else: raise TypeError( "Each dataset entry must be an AdditiveSet, " "(AdditiveSet, construction), or " "(AdditiveSet, construction, construction_parameters)." ) return rows
def _small_snapshot_instances() -> list[tuple[AdditiveSet, str, dict]]: r""" Build a small deterministic collection of additive-set examples. Returns ------- list of tuple Triples ``(A, construction, construction_parameters)`` suitable for :func:`generate_additive_set_dataset`. Notes ----- This helper is internal. It keeps the public snapshot function concise while making the construction logic easier to expand and test. """ instances: list[tuple[AdditiveSet, str, dict]] = [] for n in (4, 5, 6, 7, 8): G = FiniteAbelianGroup((n,)) instances.append((empty_set(G), "empty_set", {"moduli": (n,)})) instances.append((whole_group(G), "whole_group", {"moduli": (n,)})) instances.append((singleton(G, G.zero()), "singleton", {"moduli": (n,), "element": G.zero()})) instances.append( ( interval(n, min(3, n), start=0), "interval", {"modulus": n, "length": min(3, n), "start": 0}, ) ) instances.append( ( arithmetic_progression(G, start=(0,), step=(1,), length=min(4, n)), "arithmetic_progression", {"moduli": (n,), "start": (0,), "step": (1,), "length": min(4, n)}, ) ) trivial_subgroup = subgroup_from_generators(G, [(0,)]) instances.append( ( trivial_subgroup, "subgroup_from_generators", {"moduli": (n,), "generators": [(0,)]}, ) ) if n % 2 == 0: even_subgroup = subgroup_from_generators(G, [(2,)]) instances.append( ( even_subgroup, "subgroup_from_generators", {"moduli": (n,), "generators": [(2,)]}, ) ) instances.append( ( coset(even_subgroup, (1,)), "coset", {"moduli": (n,), "base_generators": [(2,)], "translate": (1,)}, ) ) instances.append( ( union_of_cosets(even_subgroup, [(0,), (1,)]), "union_of_cosets", {"moduli": (n,), "base_generators": [(2,)], "translates": [(0,), (1,)]}, ) ) rng_random = np.random.default_rng(1000 + n) instances.append( ( random_subset(G, size=min(3, n), rng=rng_random), "random_subset_size", {"moduli": (n,), "size": min(3, n), "seed": 1000 + n}, ) ) rng_sym = np.random.default_rng(6000 + n) instances.append( ( random_symmetric_subset(G, density=0.5, rng=rng_sym), "random_symmetric_subset", {"moduli": (n,), "density": 0.5, "seed": 6000 + n}, ) ) if G.order <= 5: for idx, subset in enumerate(all_subsets_of_group(G)): instances.append( ( subset, "all_subsets_of_group", {"moduli": (n,), "index": idx}, ) ) return instances
[docs] def small_additive_snapshot() -> list[dict]: r""" Return a small deterministic additive-combinatorics dataset snapshot. Returns ------- list of dict Reproducible dataset rows spanning several structured and random families in small cyclic groups. Notes ----- This snapshot is intended for quick experimentation, smoke tests, and documentation examples. """ return generate_additive_set_dataset(_small_snapshot_instances())
def _medium_snapshot_instances() -> list[tuple[AdditiveSet, str, dict]]: r""" Build a medium deterministic collection of additive-set examples. Returns ------- list of tuple Triples ``(A, construction, construction_parameters)`` suitable for :func:`generate_additive_set_dataset`. """ instances: list[tuple[AdditiveSet, str, dict]] = [] cyclic_moduli = (5, 6, 7, 8, 9, 10) for n in cyclic_moduli: G = FiniteAbelianGroup((n,)) instances.append((empty_set(G), "empty_set", {"moduli": (n,)})) instances.append((whole_group(G), "whole_group", {"moduli": (n,)})) instances.append((singleton(G, G.zero()), "singleton", {"moduli": (n,), "element": G.zero()})) instances.append((singleton(G, (1,)), "singleton", {"moduli": (n,), "element": (1,)})) for start in (0, 1): for length in (0, 1, min(3, n), min(4, n)): instances.append( ( interval(n, length, start=start), "interval", {"modulus": n, "length": length, "start": start}, ) ) for step in (1, 2): instances.append( ( arithmetic_progression(G, start=(0,), step=(step,), length=min(4, n)), "arithmetic_progression", {"moduli": (n,), "start": (0,), "step": (step,), "length": min(4, n)}, ) ) for gen in ((0,), (1,), (2,)): H = subgroup_from_generators(G, [gen]) instances.append( ( H, "subgroup_from_generators", {"moduli": (n,), "generators": [gen]}, ) ) instances.append( ( coset(H, (1,)), "coset", {"moduli": (n,), "base_generators": [gen], "translate": (1,)}, ) ) if n % 2 == 0: H = subgroup_from_generators(G, [(2,)]) instances.append( ( union_of_cosets(H, [(0,), (1,)]), "union_of_cosets", {"moduli": (n,), "base_generators": [(2,)], "translates": [(0,), (1,)]}, ) ) rng_size = np.random.default_rng(2000 + n) rng_density = np.random.default_rng(3000 + n) rng_sym = np.random.default_rng(3500 + n) instances.append( ( random_subset(G, size=min(4, n), rng=rng_size), "random_subset_size", {"moduli": (n,), "size": min(4, n), "seed": 2000 + n}, ) ) instances.append( ( random_subset(G, density=0.4, rng=rng_density), "random_subset_density", {"moduli": (n,), "density": 0.4, "seed": 3000 + n}, ) ) instances.append( ( random_symmetric_subset(G, density=0.5, rng=rng_sym), "random_symmetric_subset", {"moduli": (n,), "density": 0.5, "seed": 3500 + n}, ) ) for moduli in ((2, 2), (2, 3), (3, 3), (2, 2, 2)): G = FiniteAbelianGroup(moduli) instances.append((empty_set(G), "empty_set", {"moduli": moduli})) instances.append((whole_group(G), "whole_group", {"moduli": moduli})) instances.append((singleton(G, G.zero()), "singleton", {"moduli": moduli, "element": G.zero()})) instances.append( ( subgroup_from_generators(G, []), "subgroup_from_generators", {"moduli": moduli, "generators": []}, ) ) step = tuple(1 for _ in moduli) instances.append( ( arithmetic_progression(G, start=G.zero(), step=step, length=3), "arithmetic_progression", {"moduli": moduli, "start": G.zero(), "step": step, "length": 3}, ) ) gens = [tuple(1 if i == j else 0 for i in range(len(moduli))) for j in range(len(moduli))] H = subgroup_from_generators(G, gens[:1]) instances.append( ( H, "subgroup_from_generators", {"moduli": moduli, "generators": gens[:1]}, ) ) instances.append( ( coset(H, tuple(1 for _ in moduli)), "coset", {"moduli": moduli, "base_generators": gens[:1], "translate": tuple(1 for _ in moduli)}, ) ) instances.append( ( union_of_cosets(H, [G.zero(), tuple(1 for _ in moduli)]), "union_of_cosets", {"moduli": moduli, "base_generators": gens[:1], "translates": [G.zero(), tuple(1 for _ in moduli)]}, ) ) rng_size = np.random.default_rng(4000 + sum(moduli)) rng_density = np.random.default_rng(5000 + sum(moduli)) rng_sym = np.random.default_rng(5500 + sum(moduli)) instances.append( ( random_subset(G, size=min(4, G.order), rng=rng_size), "random_subset_size", {"moduli": moduli, "size": min(4, G.order), "seed": 4000 + sum(moduli)}, ) ) instances.append( ( random_subset(G, density=0.5, rng=rng_density), "random_subset_density", {"moduli": moduli, "density": 0.5, "seed": 5000 + sum(moduli)}, ) ) instances.append( ( random_symmetric_subset(G, density=0.5, rng=rng_sym), "random_symmetric_subset", {"moduli": moduli, "density": 0.5, "seed": 5500 + sum(moduli)}, ) ) return instances
[docs] def medium_additive_snapshot() -> list[dict]: r""" Return a medium deterministic additive-combinatorics dataset snapshot. Returns ------- list of dict Reproducible dataset rows spanning cyclic and product groups together with structured and seeded random families. Notes ----- This snapshot is intended for richer conjecturing experiments while remaining small enough for routine unit testing and documentation use. """ return generate_additive_set_dataset(_medium_snapshot_instances())
[docs] def large_additive_snapshot( *, cyclic_moduli: Sequence[int] = tuple(range(4, 21)), product_moduli: Sequence[tuple[int, ...]] = ((2, 2), (2, 3), (3, 3), (2, 2, 2)), random_samples_per_group: int = 25, ) -> list[dict]: r""" Return a larger deterministic additive-combinatorics dataset snapshot. Parameters ---------- cyclic_moduli : sequence of int, default=range(4, 21) Cyclic moduli to include as ambient groups :math:`\mathbb{Z}/n\mathbb{Z}`. product_moduli : sequence of tuple of int, optional Product-group ambient moduli to include. random_samples_per_group : int, default=25 Number of fixed-size and density-based random samples to generate per ambient group. Returns ------- list of dict Deterministic dataset rows with many examples and descriptors. Raises ------ ValueError If ``random_samples_per_group`` is negative. Notes ----- This snapshot is designed for conjecturing workflows that benefit from many examples. All randomness is seeded deterministically from the ambient-group parameters. """ if random_samples_per_group < 0: raise ValueError("random_samples_per_group must be nonnegative.") instances: list[tuple[AdditiveSet, str, dict]] = [] instances.extend(_medium_snapshot_instances()) for n in cyclic_moduli: G = FiniteAbelianGroup((n,)) if G.order <= 6: for idx, subset in enumerate(all_subsets_of_group(G, max_order=6)): instances.append( ( subset, "all_subsets_of_group", {"moduli": (n,), "index": idx}, ) ) for step in (1, 2, 3): instances.append( ( arithmetic_progression(G, start=(0,), step=(step,), length=min(5, n)), "arithmetic_progression", {"moduli": (n,), "start": (0,), "step": (step,), "length": min(5, n)}, ) ) H = subgroup_from_generators(G, [(2,)]) instances.append( ( H, "subgroup_from_generators", {"moduli": (n,), "generators": [(2,)]}, ) ) instances.append( ( coset(H, (1,)), "coset", {"moduli": (n,), "base_generators": [(2,)], "translate": (1,)}, ) ) instances.append( ( union_of_cosets(H, [(0,), (1,)]), "union_of_cosets", {"moduli": (n,), "base_generators": [(2,)], "translates": [(0,), (1,)]}, ) ) random_by_size = sample_random_subsets( G, num_samples=random_samples_per_group, size=min(4, n), seed=7000 + n, ) for idx, A in enumerate(random_by_size): instances.append( ( A, "sample_random_subsets_size", {"moduli": (n,), "size": min(4, n), "seed": 7000 + n, "index": idx}, ) ) random_by_density = sample_random_subsets( G, num_samples=random_samples_per_group, density=0.4, seed=8000 + n, ) for idx, A in enumerate(random_by_density): instances.append( ( A, "sample_random_subsets_density", {"moduli": (n,), "density": 0.4, "seed": 8000 + n, "index": idx}, ) ) for idx in range(random_samples_per_group): rng_sym = np.random.default_rng(9000 + 100 * n + idx) A = random_symmetric_subset(G, density=0.5, rng=rng_sym) instances.append( ( A, "random_symmetric_subset", {"moduli": (n,), "density": 0.5, "seed": 9000 + 100 * n + idx}, ) ) for moduli in product_moduli: G = FiniteAbelianGroup(moduli) if G.order <= 6: for idx, subset in enumerate(all_subsets_of_group(G, max_order=6)): instances.append( ( subset, "all_subsets_of_group", {"moduli": moduli, "index": idx}, ) ) basis = [tuple(1 if i == j else 0 for i in range(len(moduli))) for j in range(len(moduli))] for gens in ([], basis[:1], basis[:2]): H = subgroup_from_generators(G, gens) instances.append( ( H, "subgroup_from_generators", {"moduli": moduli, "generators": gens}, ) ) instances.append( ( coset(H, tuple(1 for _ in moduli)), "coset", {"moduli": moduli, "base_generators": gens, "translate": tuple(1 for _ in moduli)}, ) ) random_by_size = sample_random_subsets( G, num_samples=random_samples_per_group, size=min(4, G.order), seed=10000 + sum(moduli), ) for idx, A in enumerate(random_by_size): instances.append( ( A, "sample_random_subsets_size", {"moduli": moduli, "size": min(4, G.order), "seed": 10000 + sum(moduli), "index": idx}, ) ) random_by_density = sample_random_subsets( G, num_samples=random_samples_per_group, density=0.5, seed=11000 + sum(moduli), ) for idx, A in enumerate(random_by_density): instances.append( ( A, "sample_random_subsets_density", {"moduli": moduli, "density": 0.5, "seed": 11000 + sum(moduli), "index": idx}, ) ) for idx in range(random_samples_per_group): rng_sym = np.random.default_rng(12000 + 100 * sum(moduli) + idx) A = random_symmetric_subset(G, density=0.5, rng=rng_sym) instances.append( ( A, "random_symmetric_subset", {"moduli": moduli, "density": 0.5, "seed": 12000 + 100 * sum(moduli) + idx}, ) ) return generate_additive_set_dataset(instances)