Source code for xgi.algorithms.centrality

"""Algorithms for computing the centralities of nodes (and edges) in a hypergraph."""

from warnings import warn

import networkx as nx
import numpy as np
from numpy.linalg import norm
from scipy.sparse.linalg import eigsh

from ..convert import to_line_graph
from ..exception import XGIError
from ..linalg import clique_motif_matrix, incidence_matrix
from ..utils import convert_labels_to_integers
from .properties import is_uniform

__all__ = [
    "clique_eigenvector_centrality",
    "h_eigenvector_centrality",
    "node_edge_centrality",
    "line_vector_centrality",
    "katz_centrality",
]


[docs]def clique_eigenvector_centrality(H, tol=1e-6): """Compute the clique motif eigenvector centrality of a hypergraph. Parameters ---------- H : Hypergraph The hypergraph of interest. tol : float, optional The tolerance when computing the eigenvector. By default, 1e-6. Returns ------- dict Centrality, where keys are node IDs and values are centralities. The centralities are 1-normalized. See Also -------- h_eigenvector_centrality References ---------- Three Hypergraph Eigenvector Centralities, Austin R. Benson, https://doi.org/10.1137/18M1203031 """ from ..algorithms import is_connected # if there aren't any nodes, return an empty dict if H.num_nodes == 0: return dict() # if the hypergraph is not connected, # this metric doesn't make sense and should return nan. if not is_connected(H): return {n: np.nan for n in H.nodes} W, node_dict = clique_motif_matrix(H, index=True) _, v = eigsh(W.asfptype(), k=1, which="LM", tol=tol) # multiply by the sign to try and enforce positivity v = np.sign(v[0]) * v / norm(v, 1) return {node_dict[n]: v[n].item() for n in node_dict}
[docs]def h_eigenvector_centrality(H, max_iter=100, tol=1e-6): """Compute the H-eigenvector centrality of a uniform hypergraph. Parameters ---------- H : Hypergraph The hypergraph of interest. max_iter : int, optional The maximum number of iterations before the algorithm terminates. By default, 100. tol : float > 0, optional The desired L2 error in the centrality vector. By default, 1e-6. Returns ------- dict Centrality, where keys are node IDs and values are centralities. The centralities are 1-normalized. Raises ------ XGIError If the hypergraph is not uniform. See Also -------- clique_eigenvector_centrality References ---------- Three Hypergraph Eigenvector Centralities, Austin R. Benson, https://doi.org/10.1137/18M1203031 """ from ..algorithms import is_connected # if there aren't any nodes, return an empty dict if H.num_nodes == 0: return dict() # if the hypergraph is not connected, # this metric doesn't make sense and should return nan. if not is_connected(H): return {n: np.nan for n in H.nodes} m = is_uniform(H) if not m: raise XGIError("This method is not defined for non-uniform hypergraphs.") new_H = convert_labels_to_integers(H, "old-label") f = lambda v, m: np.power(v, 1.0 / m) # noqa: E731 g = lambda v, x: np.prod(v[list(x)]) # noqa: E731 x = np.random.uniform(size=(new_H.num_nodes)) x = x / norm(x, 1) for iter in range(max_iter): new_x = apply(new_H, x, g) new_x = f(new_x, m) # multiply by the sign to try and enforce positivity new_x = np.sign(new_x[0]) * new_x / norm(new_x, 1) if norm(x - new_x) <= tol: break x = new_x.copy() else: warn("Iteration did not converge!") return {new_H.nodes[n]["old-label"]: c for n, c in zip(new_H.nodes, new_x)}
def apply(H, x, g=lambda v, e: np.sum(v[list(e)])): """Apply a vector to the hypergraph given a function. Parameters ---------- H : Hypergraph Hypergraph of interest. x : 1D numpy array 1D vector g : lambda function, optional function to apply. By default, sum. Returns ------- 1D numpy array vector post application """ new_x = np.zeros(H.num_nodes) for edge in H.edges.members(): edge = list(edge) # ordered permutations for shift in range(len(edge)): new_x[edge[shift]] += g(x, edge[shift + 1 :] + edge[:shift]) return new_x
[docs]def node_edge_centrality( H, f=lambda x: np.power(x, 2), g=lambda x: np.power(x, 0.5), phi=lambda x: np.power(x, 2), psi=lambda x: np.power(x, 0.5), max_iter=100, tol=1e-6, ): """Computes the node and edge centralities Parameters ---------- H : Hypergraph The hypergraph of interest f : lambda function, optional The function f as described in Tudisco and Higham. Must accept a numpy array. By default, :math:`f(x) = x^2`. g : lambda function, optional The function g as described in Tudisco and Higham. Must accept a numpy array. By default, :math`g(x) = \sqrt{x}`. phi : lambda function, optional The function phi as described in Tudisco and Higham. Must accept a numpy array. By default :math:`\phi(x) = x^2`. psi : lambda function, optional The function psi as described in Tudisco and Higham. Must accept a numpy array. By default: :math:`\psi(x) = \sqrt{x}`. max_iter : int, optional Number of iterations at which the algorithm terminates if convergence is not reached. By default, 100. tol : float > 0, optional The total allowable error in the node and edge centralities. By default, 1e-6. Returns ------- dict, dict The node centrality where keys are node IDs and values are associated centralities and the edge centrality where keys are the edge IDs and values are associated centralities. The centralities of both the nodes and edges are 1-normalized. Notes ----- In the paper from which this was taken, it is more general in that it includes general functions for both nodes and edges, nodes and edges may be weighted, and one can choose different norms for normalization. References ---------- Node and edge nonlinear eigenvector centrality for hypergraphs, Francesco Tudisco & Desmond J. Higham, https://doi.org/10.1038/s42005-021-00704-2 """ from ..algorithms import is_connected # if there aren't any nodes or edges, return an empty dict if H.num_nodes == 0 or H.num_edges == 0 or not is_connected(H): return {n: np.nan for n in H.nodes}, {e: np.nan for e in H.edges} # if the hypergraph is not connected, # this metric doesn't make sense and should return nan. # if not is_connected(H): # return {n: np.nan for n in H.nodes}, {e: np.nan for e in H.edges} n = H.num_nodes m = H.num_edges x = np.ones(n) / n y = np.ones(m) / m I, node_dict, edge_dict = incidence_matrix(H, index=True) check = np.inf for iter in range(max_iter): u = np.multiply(x, g(I @ f(y))) v = np.multiply(y, psi(I.T @ phi(x))) # multiply by the sign to try and enforce positivity new_x = np.sign(u[0]) * u / norm(u, 1) new_y = np.sign(v[0]) * v / norm(v, 1) check = norm(new_x - x) + norm(new_y - y) if check < tol: break x = new_x.copy() y = new_y.copy() else: warn("Iteration did not converge!") return {node_dict[n]: new_x[n] for n in node_dict}, { edge_dict[e]: new_y[e] for e in edge_dict }
[docs]def line_vector_centrality(H): """The vector centrality of nodes in the line graph of the hypergraph. Parameters ---------- H : Hypergraph The hypergraph of interest Returns ------- dict Centrality, where keys are node IDs and values are lists of centralities. References ---------- "Vector centrality in hypergraphs", K. Kovalenko, M. Romance, E. Vasilyeva, D. Aleja, R. Criado, D. Musatov, A.M. Raigorodskii, J. Flores, I. Samoylenko, K. Alfaro-Bittner, M. Perc, S. Boccaletti, https://doi.org/10.1016/j.chaos.2022.112397 """ from ..algorithms import is_connected # If the hypergraph is empty, then return an empty dictionary if H.num_nodes == 0: return dict() if not is_connected(H): raise XGIError("This method is not defined for disconnected hypergraphs.") LG = to_line_graph(H) LGcent = nx.eigenvector_centrality(LG) vc = {node: [] for node in H.nodes} edge_label_dict = {tuple(edge): index for index, edge in H._edge.items()} hyperedge_dims = {tuple(edge): len(edge) for edge in H.edges.members()} D = H.edges.size.max() for k in range(2, D + 1): c_i = np.zeros(len(H.nodes)) for edge, _ in list(filter(lambda x: x[1] == k, hyperedge_dims.items())): for node in edge: try: c_i[node] += LGcent[edge_label_dict[edge]] except IndexError: raise Exception( "Nodes must be written with the Pythonic indexing (0,1,2...)" ) c_i *= 1 / k for node in H.nodes: vc[node].append(c_i[node]) return vc
[docs]def katz_centrality(H, cutoff=100): r"""Returns the Katz-centrality vector of a non-empty hypergraph H. The Katz-centrality measures the relative importance of a node by counting how many distinct walks start from it. The longer the walk is the smaller its contribution will be (attenuation factor :math:`\alpha`). Initially defined for graphs, the Katz-centrality is here generalized to hypergraphs using the most basic definition of neighbors: two nodes that share an hyperedge. Parameters ---------- H : xgi.Hypergraph Hypergraph on which to compute the Katz-centralities. cutoff : int Power at which to stop the series :math:`A + \alpha A^2 + \alpha^2 A^3 + \dots` Default value is 100. Returns ------- c : dict `c` is a dictionary with node IDs as keys and centrality values as values. The centralities are 1-normalized. Raises ------ XGIError If the hypergraph is empty. Notes ----- [1] The Katz-centrality is defined as .. math:: c = [(I - \alpha A^{t})^{-1} - I]{\bf 1}, where :math:`A` is the adjacency matrix of the the (hyper)graph. Since :math:`A^{t} = A` for undirected graphs (our case), we have: .. math:: &[I + A + \alpha A^2 + \alpha^2 A^3 + \dots](I - \alpha A^{t}) & = [I + A + \alpha A^2 + \alpha^2 A^3 + \dots](I - \alpha A) & = (I + A + \alpha A^2 + \alpha^2 A^3 + \dots) - A - \alpha A^2 & - \alpha^2 A^3 - \alpha^3 A^4 - \dots & = I And :math:`(I - \alpha A^{t})^{-1} = I + A + \alpha A^2 + \alpha^2 A^3 + \dots` Thus we can use the power series to compute the Katz-centrality. [2] The Katz-centrality of isolated nodes (no hyperedges contains them) is zero. The Katz-centrality of an empty hypergraph is not defined. References ---------- See https://en.wikipedia.org/wiki/Katz_centrality#Alpha_centrality (visited May 20 2023) for a clear definition of Katz centrality. """ n = H.num_nodes m = H.num_edges if n == 0: # no nodes raise XGIError("The Katz-centrality of an empty hypergraph is not defined.") elif m == 0: c = np.zeros(n) else: # there is at least one edge, both N and M are non-zero A = clique_motif_matrix(H) alpha = 1 / 2**n mat = A for power in range(1, cutoff): mat = alpha * mat.dot(A) + A u = 1 / n * np.ones(n) c = mat.dot(u) c *= 1 / norm(c, 1) nodedict = dict(zip(range(n), H.nodes)) return {nodedict[idx]: c[idx] for idx in nodedict}