Source code for xgi.stats.nodestats

"""Node statistics.

This module is part of the stats package, and it defines node-level statistics.  That
is, each function defined in this module is assumed to define a node-quantity mapping.
Each callable defined here is accessible via a `Network` object, or a
:class:`~xgi.core.reportviews.NodeView` object.  For more details, see the `tutorial
<https://xgi.readthedocs.io/en/stable/api/tutorials/Tutorial%206%20-%20Statistics.html>`_.

Examples
--------

>>> import xgi
>>> H = xgi.Hypergraph([[1, 2, 3], [2, 3, 4, 5], [3, 4, 5]])
>>> H.degree()
{1: 1, 2: 2, 3: 3, 4: 2, 5: 2}
>>> H.nodes.degree.asdict()
{1: 1, 2: 2, 3: 3, 4: 2, 5: 2}

"""

import numpy as np

import xgi

__all__ = [
    "attrs",
    "degree",
    "average_neighbor_degree",
    "local_clustering_coefficient",
    "clustering_coefficient",
    "two_node_clustering_coefficient",
    "clique_eigenvector_centrality",
    "h_eigenvector_centrality",
    "node_edge_centrality",
    "katz_centrality",
]


[docs]def attrs(net, bunch, attr=None, missing=None):
    """Access node attributes.

    Parameters
    ----------
    net : xgi.Hypergraph
        The network.
    bunch : Iterable
        Nodes in `net`.
    attr : str | None (default)
        If None, return all attributes.  Otherwise, return a single attribute with name
        `attr`.
    missing : Any
        Value to impute in case a node does not have an attribute with name `attr`.
        Default is None.

    Returns
    -------
    dict
        If attr is None, return a nested dict of the form `{node: {"attr": val}}`.
        Otherwise, return a simple dict of the form `{node: val}`.

    Notes
    -----
    When requesting all attributes (i.e. when `attr` is None), no value is imputed.

    Examples
    --------
    >>> import xgi
    >>> H = xgi.Hypergraph([[1, 2, 3], [2, 3, 4, 5], [3, 4, 5]])
    >>> H.add_nodes_from([
    ...         (1, {"color": "red", "name": "horse"}),
    ...         (2, {"color": "blue", "name": "pony"}),
    ...         (3, {"color": "yellow", "name": "zebra"}),
    ...         (4, {"color": "red", "name": "orangutan", "age": 20}),
    ...         (5, {"color": "blue", "name": "fish", "age": 2}),
    ...     ])

    Access all attributes as different types.

    >>> H.nodes.attrs.asdict() # doctest: +NORMALIZE_WHITESPACE
    {1: {'color': 'red', 'name': 'horse'},
     2: {'color': 'blue', 'name': 'pony'},
     3: {'color': 'yellow', 'name': 'zebra'},
     4: {'color': 'red', 'name': 'orangutan', 'age': 20},
     5: {'color': 'blue', 'name': 'fish', 'age': 2}}
    >>> H.nodes.attrs.asnumpy() # doctest: +NORMALIZE_WHITESPACE
    array([{'color': 'red', 'name': 'horse'},
           {'color': 'blue', 'name': 'pony'},
           {'color': 'yellow', 'name': 'zebra'},
           {'color': 'red', 'name': 'orangutan', 'age': 20},
           {'color': 'blue', 'name': 'fish', 'age': 2}],
          dtype=object)

    Access a single attribute as different types.

    >>> H.nodes.attrs('color').asdict()
    {1: 'red', 2: 'blue', 3: 'yellow', 4: 'red', 5: 'blue'}
    >>> H.nodes.attrs('color').aslist()
    ['red', 'blue', 'yellow', 'red', 'blue']

    By default, None is imputed when a node does not have the requested attribute.

    >>> H.nodes.attrs('age').asdict()
    {1: None, 2: None, 3: None, 4: 20, 5: 2}

    Use `missing` to change the imputed value.

    >>> H.nodes.attrs('age', missing=100).asdict()
    {1: 100, 2: 100, 3: 100, 4: 20, 5: 2}

    """
    if isinstance(attr, str):
        return {n: net._node_attr[n].get(attr, missing) for n in bunch}
    elif attr is None:
        return {n: net._node_attr[n] for n in bunch}
    else:
        raise ValueError('"attr" must be str or None')


[docs]def degree(net, bunch, order=None, weight=None):
    """Node degree.

    The degree of a node is the number of edges it belongs to.

    Parameters
    ----------
    net : xgi.Hypergraph
        The network.
    bunch : Iterable
        Nodes in `net`.
    order : int | None
        If not None (default), only count the edges of the given order.
    weight : str | None
        If not None, specifies the name of the edge attribute that determines the weight
        of each edge.

    Returns
    -------
    dict

    """
    if order is None and weight is None:
        return {n: len(net._node[n]) for n in bunch}
    if order is None and weight:
        return {
            n: sum(net._edge_attr[e].get(weight, 1) for e in net._node[n])
            for n in bunch
        }
    if order is not None and weight is None:
        return {
            n: len([e for e in net._node[n] if len(net._edge[e]) == order + 1])
            for n in bunch
        }
    if order is not None and weight:
        return {
            n: sum(
                net._edge_attr[e].get(weight, 1)
                for e in net._node[n]
                if len(net._edge[e]) == order + 1
            )
            for n in bunch
        }


[docs]def average_neighbor_degree(net, bunch):
    """Average neighbor degree.

    Parameters
    ----------
    net : xgi.Hypergraph
        The network.
    bunch : Iterable
        Nodes in `net`.

    Returns
    -------
    dict

    Examples
    --------
    >>> import xgi, numpy as np
    >>> H = xgi.Hypergraph([[1, 2, 3], [2, 3, 4, 5], [3, 4, 5]])
    >>> np.round(H.nodes.average_neighbor_degree.asnumpy(), 3)
    array([2.5  , 2.   , 1.75 , 2.333, 2.333])

    """
    result = {}
    for n in bunch:
        neighbors = net.nodes.neighbors(n)
        result[n] = sum(len(net._node[nbr]) for nbr in neighbors)
        result[n] = result[n] / len(neighbors) if neighbors else 0
    return result


[docs]def clustering_coefficient(net, bunch):
    """Local clustering coefficient.

    This clustering coefficient is defined as the
    clustering coefficient of the unweighted pairwise
    projection of the hypergraph, i.e., `num / denom`,
    where `num` equals `A^3[n, n]` and `denom` equals
    `nu*(nu-1)/2`.  Here `A` is the adjacency matrix
    of the network and `nu` is the number of pairwise
    neighbors of `n`.

    Parameters
    ----------
    net : xgi.Hypergraph
        The network.
    bunch : Iterable
        Nodes in `net`.

    Returns
    -------
    dict

    Notes
    -----
    This is a direct generalization of the definition of local clustering coefficient
    for graphs.  It has not been tested on hypergraphs.

    Examples
    --------
    >>> import xgi, numpy as np
    >>> H = xgi.Hypergraph([[1, 2, 3], [2, 3, 4, 5], [3, 4, 5]])
    >>> H.nodes.two_node_clustering_coefficient.asnumpy()
    array([0.41666667, 0.45833333, 0.58333333, 0.66666667, 0.66666667])

    """
    cc = xgi.clustering_coefficient(net)
    return {n: cc[n] for n in cc if n in bunch}


[docs]def local_clustering_coefficient(net, bunch):
    """Compute the local clustering coefficient.

    This clustering coefficient is based on the
    overlap of the edges connected to a given node,
    normalized by the size of the node's neighborhood.

    Parameters
    ----------
    net : xgi.Hypergraph
        The network.
    bunch : Iterable
        Nodes in `net`.

    Returns
    -------
    dict
        keys are node IDs and values are the
        clustering coefficients.

    References
    ----------
    "Properties of metabolic graphs: biological organization or representation
    artifacts?"  by Wanding Zhou and Luay Nakhleh.
    https://doi.org/10.1186/1471-2105-12-132

    "Hypergraphs for predicting essential genes using multiprotein complex data"
    by Florian Klimm, Charlotte M. Deane, and Gesine Reinert.
    https://doi.org/10.1093/comnet/cnaa028

    Example
    -------
    >>> import xgi
    >>> H = xgi.random_hypergraph(3, [1, 1])
    >>> H.nodes.local_clustering_coefficient.asdict()
    {0: 1.0, 1: 1.0, 2: 1.0}

    """
    cc = xgi.local_clustering_coefficient(net)
    return {n: cc[n] for n in cc if n in bunch}


[docs]def two_node_clustering_coefficient(net, bunch, kind="union"):
    """Return the clustering coefficients for
    each node in a Hypergraph.

    This definition averages over all of the
    two-node clustering coefficients involving the node.

    Parameters
    ----------
    net : xgi.Hypergraph
        The network.
    bunch : Iterable
        Nodes in `net`.
    kind : str
        The type of two-node clustering coefficient.
        Types are:

        * "union"
        * "min"
        * "max"

    by default, "union".

    Returns
    -------
    dict
        nodes are keys, clustering coefficients are values.

    References
    ----------
    "Clustering Coefficients in Protein Interaction Hypernetworks"
    by Suzanne Gallagher and Debra Goldberg.
    DOI: 10.1145/2506583.2506635

    Example
    -------
    >>> import xgi
    >>> H = xgi.random_hypergraph(3, [1, 1])
    >>> H.nodes.two_node_clustering_coefficient.asdict()
    {0: 0.5, 1: 0.5, 2: 0.5}
    """
    cc = xgi.two_node_clustering_coefficient(net, kind=kind)
    return {n: cc[n] for n in cc if n in bunch}


[docs]def clique_eigenvector_centrality(net, bunch, tol=1e-6):
    """Compute the clique motif eigenvector centrality of a hypergraph.

    Parameters
    ----------
    net : xgi.Hypergraph
        The hypergraph of interest.
    bunch : Iterable
        Nodes in `net`.
    tol : float > 0, default: 1e-6
        The desired L2 error in the centrality vector.

    Returns
    -------
    dict
        Centrality, where keys are node IDs and values are centralities.

    References
    ----------
    Three Hypergraph Eigenvector Centralities,
    Austin R. Benson,
    https://doi.org/10.1137/18M1203031
    """
    c = xgi.clique_eigenvector_centrality(net, tol)
    return {n: c[n] for n in c if n in bunch}


[docs]def h_eigenvector_centrality(net, bunch, max_iter=10, tol=1e-6):
    """Compute the H-eigenvector centrality of a hypergraph.

    Parameters
    ----------
    net : xgi.Hypergraph
        The hypergraph of interest.
    bunch : Iterable
        Nodes in `net`.
    max_iter : int, default: 10
        The maximum number of iterations before the algorithm terminates.
    tol : float > 0, default: 1e-6
        The desired L2 error in the centrality vector.

    Returns
    -------
    dict
        Centrality, where keys are node IDs and values are centralities.

    References
    ----------
    Three Hypergraph Eigenvector Centralities,
    Austin R. Benson,
    https://doi.org/10.1137/18M1203031
    """
    c = xgi.h_eigenvector_centrality(net, max_iter, tol)
    return {n: c[n] for n in c if n in bunch}


[docs]def node_edge_centrality(
    net,
    bunch,
    f=lambda x: np.power(x, 2),
    g=lambda x: np.power(x, 0.5),
    phi=lambda x: np.power(x, 2),
    psi=lambda x: np.power(x, 0.5),
    max_iter=100,
    tol=1e-6,
):
    """Computes node centralities.

    Parameters
    ----------
    net : Hypergraph
        The hypergraph of interest
    bunch : Iterable
        Edges in `net`
    f : lambda function, default: x^2
        The function f as described in Tudisco and Higham.
        Must accept a numpy array.
    g : lambda function, default: x^0.5
        The function g as described in Tudisco and Higham.
        Must accept a numpy array.
    phi : lambda function, default: x^2
        The function phi as described in Tudisco and Higham.
        Must accept a numpy array.
    psi : lambda function, default: x^0.5
        The function psi as described in Tudisco and Higham.
        Must accept a numpy array.
    max_iter : int, default: 100
        Number of iterations at which the algorithm terminates
        if convergence is not reached.
    tol : float > 0, default: 1e-6
        The total allowable error in the node and edge centralities.

    Returns
    -------
    dict, dict
        The node centrality where keys are node IDs and values are associated
        centralities and the edge centrality where keys are the edge IDs and
        values are associated centralities.

    Notes
    -----
    In the paper from which this was taken, it includes general functions
    for both nodes and edges, nodes and edges may be weighted, and one can
    choose different norms for normalization, all of which are not yet
    implemented.

    This method does not output the node centralities even though they are computed.

    References
    ----------
    Node and edge nonlinear eigenvector centrality for hypergraphs,
    Francesco Tudisco & Desmond J. Higham,
    https://doi.org/10.1038/s42005-021-00704-2
    """
    c, _ = xgi.node_edge_centrality(net, f, g, phi, psi, max_iter, tol)
    return {n: c[n] for n in c if n in bunch}


def katz_centrality(net, bunch, cutoff=100):
    """Compute the H-eigenvector centrality of a hypergraph.

    Parameters
    ----------
    net : xgi.Hypergraph
        The hypergraph of interest.
    bunch : Iterable
        Nodes in `net`.
    cutoff : int
        Power at which to stop the series :math:`A + \alpha A^2 + \alpha^2 A^3 + \dots`
        Default value is 100.

    Returns
    -------
    dict
        node IDs are keys and centrality values
        are values. The centralities are 1-normalized.

    Raises
    ------
    XGIError
        If the hypergraph is empty.

    Notes
    -----
    [1] The Katz-centrality is defined as

    .. math::
        c = [(I - \alpha A^{t})^{-1} - I]{\bf 1},

    where :math:`A` is the adjacency matrix of the the (hyper)graph.
    Since :math:`A^{t} = A` for undirected graphs (our case), we have:


    .. math::
        &[I + A + \alpha A^2 + \alpha^2 A^3 + \dots](I - \alpha A^{t})

        & = [I + A + \alpha A^2 + \alpha^2 A^3 + \dots](I - \alpha A)

        & = (I + A + \alpha A^2 + \alpha^2 A^3 + \dots) - A - \alpha A^2

        & - \alpha^2 A^3 - \alpha^3 A^4 - \dots

        & = I

    And :math:`(I - \alpha A^{t})^{-1} = I + A + \alpha A^2 + \alpha^2 A^3 + \dots`
    Thus we can use the power series to compute the Katz-centrality.
    [2] The Katz-centrality of isolated nodes (no hyperedges contains them) is
    zero. The Katz-centrality of an empty hypergraph is not defined.

    References
    ----------
    See https://en.wikipedia.org/wiki/Katz_centrality#Alpha_centrality (visited
    May 20 2023) for a clear definition of Katz centrality.
    """
    c = xgi.katz_centrality(net, cutoff=cutoff)
    return {n: c[n] for n in c if n in bunch}