Source code for graphcalc.graphs.data.data_generation

import networkx as nx
import graphcalc.graphs as gc
import pandas as pd

__all__ = [
    "compute_graph_properties",
    "expand_list_columns",
    "compute_knowledge_table",
    "GRAPHCALC_PROPERTY_LIST",
    "all_properties",
    "append_graph_row",
]


[docs]
def compute_graph_properties(function_names, graph, return_as_dict=True):
    r"""
    Compute graph properties based on a list of function names.

    This function takes a list of string function names (defined in either the `graphcalc` or
    `networkx` packages) and a NetworkX graph as input. It computes the values of these functions
    on the given graph and returns the results either as a dictionary (default) or a list.

    Parameters
    ----------
    function_names : list of str
        A list of function names (as strings) defined in the `graphcalc` or `networkx` packages.
    graph : networkx.Graph
        The input graph on which the functions will be evaluated.
    return_as_dict : bool, optional
        If True (default), returns a dictionary mapping function names to their computed values.
        If False, returns a list of computed values in the same order as the input `function_names`.

    Returns
    -------
    dict or list
        By default, a dictionary where keys are function names and values are the computed values.
        If `return_as_dict=False`, a list of computed values is returned.

    Raises
    ------
    AttributeError
        If a function name in `function_names` does not exist in either `graphcalc` or `networkx`.
    Exception
        If any function in `function_names` raises an error during execution.

    Examples
    --------
    >>> import graphcalc.graphs as gc
    >>> from graphcalc.graphs.generators import cycle_graph
    >>> G = cycle_graph(6)  # A cycle graph with 6 nodes
    >>> function_names = ["spectral_radius", "number_of_nodes"]
    >>> dictionary_solution = gc.compute_graph_properties(function_names, G)
    >>> list_solution = gc.compute_graph_properties(function_names, G, return_as_dict=False)
    """


    # Collect results
    results = {}
    for func_name in function_names:
        func = None
        # Check for function in graphcalc
        if hasattr(gc, func_name):
            func = getattr(gc, func_name)
        # Check for function in networkx
        elif hasattr(nx, func_name):
            func = getattr(nx, func_name)
        else:
            raise AttributeError(
                f"Function '{func_name}' does not exist in either 'graphcalc' or 'networkx'."
            )

        # Try to execute the function on the graph
        try:
            results[func_name] = func(graph)
        except Exception as e:
            raise Exception(f"Error while executing function '{func_name}': {e}")

    # Return results as a dictionary or a list
    if return_as_dict:
        return results
    else:
        return list(results.values())




[docs]
def expand_list_columns(df: pd.DataFrame):
    r"""
    Expand columns with list entries into separate columns.

    For each column in the dataframe that contains lists as entries, this function:
    1. Finds the maximum length (N) of the lists in the column.
    2. Creates new columns for each index in the list, named as "<column_name>[i]".
    3. Fills missing entries with 0 for lists shorter than N.

    Parameters
    ----------
    df : pandas.DataFrame
        The input dataframe with list-valued columns.

    Returns
    -------
    pandas.DataFrame
        A new dataframe with list-valued columns expanded into separate columns.

    Examples
    --------
    >>> data = {'graph_id': [1, 2, 3],
    ...         'p_vector': [[3, 0, 1], [2, 1], []]}
    >>> df = pd.DataFrame(data)
    >>> new_df = expand_list_columns(df)
    """
    df_expanded = df.copy()

    for column in df.columns:
        if df[column].apply(lambda x: isinstance(x, list)).any():
            # Find the maximum list length in the column
            max_length = df[column].apply(lambda x: len(x) if isinstance(x, list) else 0).max()

            # Expand the column into separate columns
            for i in range(max_length):
                new_column_name = f"{column}[{i}]"
                df_expanded[new_column_name] = df[column].apply(
                    lambda x: x[i] if isinstance(x, list) and i < len(x) else 0
                )

            # Drop the original list column
            df_expanded.drop(columns=[column], inplace=True)

    return df_expanded




[docs]
def compute_knowledge_table(function_names: list, graphs: list) -> pd.DataFrame:
    r"""
    Compute graph properties for a collection of NetworkX graphs and return a pandas DataFrame.

    This function takes a list of string function names (defined in the `graphcalc` package)
    and a collection of NetworkX graphs. It computes the specified properties for each graph
    and organizes the results in a DataFrame, where each row corresponds to a graph instance
    and each column corresponds to a function name and its computed value.

    Parameters
    ----------
    function_names : list of str
        A list of function names (as strings) defined in the `graphcalc` package.
    graphs : list of networkx.Graph
        A collection of NetworkX graphs.

    Returns
    -------
    pandas.DataFrame
        A DataFrame where each row represents a graph and each column represents a computed
        graph property.

    Raises
    ------
    AttributeError
        If a function name in `function_names` does not exist in the `graphcalc` package.
    Exception
        If any function in `function_names` raises an error during execution for any graph.

    Examples
    --------
    >>> import graphcalc.graphs as gc
    >>> from graphcalc.graphs.generators import path_graph, cycle_graph
    >>> G1 = cycle_graph(6)
    >>> G2 = path_graph(5)
    >>> function_names = ["spectral_radius", "algebraic_connectivity"]
    >>> graphs = [G1, G2]
    >>> df = gc.compute_knowledge_table(function_names, graphs)
    """
    # Initialize a list to store results for each graph
    rows = []
    for graph in graphs:
        try:
            # Compute graph properties for this graph
            graph_properties = compute_graph_properties(function_names, graph)
            rows.append(graph_properties)
        except Exception as e:
            raise Exception(f"Error while processing a graph: {e}")

    # Create a DataFrame from the results
    df = pd.DataFrame(rows)
    return expand_list_columns(df)


GRAPHCALC_PROPERTY_LIST = [
    'order',
    'size',
    'connected',
    'diameter',
    'radius',
    'average_shortest_path_length',
    'bipartite',
    'chordal',
    'cubic',
    'eulerian',
    'planar',
    'regular',
    'subcubic',
    'tree',
    'K_4_free',
    'triangle_free',
    'claw_free',
    'planar',
    'cograph',
    'nontrivial',
    "independence_number",
    "clique_number",
    "chromatic_number",
    "vertex_cover_number",
    "edge_cover_number",
    "matching_number",
    "triameter",
    'average_degree',
    'maximum_degree',
    'minimum_degree',
    "slater",
    "sub_total_domination_number",
    "annihilation_number",
    "residue",
    "harmonic_index",
    "domination_number",
    "total_domination_number",
    "independent_domination_number",
    "outer_connected_domination_number",
    "roman_domination_number",
    "double_roman_domination_number",
    "two_rainbow_domination_number",
    "three_rainbow_domination_number",
    "min_maximal_matching_number",
    "restrained_domination_number",
    'algebraic_connectivity',
    'spectral_radius',
    'largest_laplacian_eigenvalue',
    'zero_adjacency_eigenvalues_count',
    'second_largest_adjacency_eigenvalue',
    'smallest_adjacency_eigenvalue',
    "zero_forcing_number",
    "two_forcing_number",
    "total_zero_forcing_number",
    "connected_zero_forcing_number",
    "positive_semidefinite_zero_forcing_number",
    "power_domination_number",
    "well_splitting_number",
    "burning_number",
    "vertex_clique_cover_number",
]


[docs]
def all_properties(graphs: list) -> pd.DataFrame:
    """
    Compute the full knowledge table of graph properties and invariants.

    This function evaluates all available invariants and Boolean properties
    implemented in the `graphcalc` package (as listed in
    ``GRAPHCALC_PROPERTY_LIST``) on each graph in the input collection.
    The results are aggregated into a pandas DataFrame, where each row
    corresponds to a graph instance and each column corresponds to a
    specific property or invariant.

    Parameters
    ----------
    graphs : list of networkx.Graph
        A collection of NetworkX graphs.

    Returns
    -------
    pandas.DataFrame
        A DataFrame where:
          - Rows correspond to the input graphs.
          - Columns correspond to the full set of `graphcalc` invariants
            and Boolean properties defined in ``GRAPHCALC_PROPERTY_LIST``.

    Raises
    ------
    Exception
        If any property function raises an error during execution for any graph.

    Notes
    -----
    This is a convenience wrapper around :func:`compute_knowledge_table`
    that uses the complete list of invariants and properties available in
    the `graphcalc` package. Use this if you want a comprehensive "fingerprint"
    of each graph in your dataset.

    Examples
    --------
    >>> import graphcalc.graphs as gc
    >>> from graphcalc.graphs.generators import cycle_graph, path_graph
    >>> G1 = cycle_graph(6)
    >>> G2 = path_graph(5)
    >>> df = gc.all_properties([G1, G2])
    >>> df.columns[:5]  # show a few property names
    Index(['order', 'size', 'connected', 'diameter', 'radius'], dtype='object')
    """
    return compute_knowledge_table(GRAPHCALC_PROPERTY_LIST, graphs)



[docs]
def append_graph_row(df: pd.DataFrame, G) -> pd.DataFrame:
    """
    Append a new row to an existing knowledge table with the properties
    of a new graph.

    Parameters
    ----------
    df : pandas.DataFrame
        Existing knowledge table (as returned by `compute_full_knowledge_table`
        or `compute_knowledge_table`).
    G : networkx.Graph
        A new graph to analyze.

    Returns
    -------
    pandas.DataFrame
        A new DataFrame with the additional row for G.

    Examples
    --------
    >>> import graphcalc.graphs as gc
    >>> from graphcalc.graphs.generators import cycle_graph, path_graph
    >>> df = gc.all_properties([cycle_graph(5)])
    >>> df.shape[0]
    1
    >>> df = gc.append_graph_row(df, path_graph(4))
    >>> df.shape[0]
    2
    """
    row = compute_graph_properties(GRAPHCALC_PROPERTY_LIST, G)
    # turn dict -> DataFrame (1 row), then concat
    return pd.concat([df, pd.DataFrame([row])], ignore_index=True)