"""Algorithms for computing nodal clustering coefficients."""
from itertools import combinations
import numpy as np
from ..exception import XGIError
from ..linalg import adjacency_matrix
__all__ = [
"clustering_coefficient",
"local_clustering_coefficient",
"two_node_clustering_coefficient",
]
[docs]def clustering_coefficient(H):
r"""Return the clustering coefficients for
each node in a Hypergraph.
This clustering coefficient is defined as the
clustering coefficient of the unweighted pairwise
projection of the hypergraph, i.e.,
:math:`c = A^3_{i,i}/\binom{k}{2},`
where :math:`A` is the adjacency matrix of the network
and :math:`k` is the pairwise degree of :math:`i`.
Parameters
----------
H : Hypergraph
Hypergraph
Returns
-------
dict
nodes are keys, clustering coefficients are values.
Notes
-----
The clustering coefficient is undefined when the number of
neighbors is 0 or 1, but we set the clustering coefficient
to 0 in these cases. For more discussion, see
https://arxiv.org/abs/0802.2512
See Also
--------
local_clustering_coefficient
two_node_clustering_coefficient
References
----------
"Clustering Coefficients in Protein Interaction Hypernetworks"
by Suzanne Gallagher and Debra Goldberg.
DOI: 10.1145/2506583.2506635
Example
-------
>>> import xgi
>>> H = xgi.random_hypergraph(3, [1, 1])
>>> cc = xgi.clustering_coefficient(H)
>>> cc
{0: 1.0, 1: 1.0, 2: 1.0}
"""
adj, index = adjacency_matrix(H, index=True)
ndict = {n: i for i, n in index.items()}
k = adj.sum(axis=1)
denom = k * (k - 1) / 2
mat = adj.dot(adj).dot(adj)
with np.errstate(divide="ignore", invalid="ignore"):
result = np.nan_to_num(0.5 * mat.diagonal() / denom)
return {n: float(result[ndict[n]]) if n in ndict else 0 for n in H.nodes}
[docs]def local_clustering_coefficient(H):
"""Compute the local clustering coefficient.
This clustering coefficient is based on the
overlap of the edges connected to a given node,
normalized by the size of the node's neighborhood.
Parameters
----------
H : Hypergraph
Hypergraph
Returns
-------
dict
keys are node IDs and values are the
clustering coefficients.
Notes
-----
The clustering coefficient is undefined when the number of
neighbors is 0 or 1, but we set the clustering coefficient
to 0 in these cases. For more discussion, see
https://arxiv.org/abs/0802.2512
See Also
--------
clustering_coefficient
two_node_clustering_coefficient
References
----------
"Properties of metabolic graphs: biological organization or representation
artifacts?" by Wanding Zhou and Luay Nakhleh.
https://doi.org/10.1186/1471-2105-12-132
"Hypergraphs for predicting essential genes using multiprotein complex data"
by Florian Klimm, Charlotte M. Deane, and Gesine Reinert.
https://doi.org/10.1093/comnet/cnaa028
Example
-------
>>> import xgi
>>> H = xgi.random_hypergraph(3, [1, 1])
>>> cc = xgi.local_clustering_coefficient(H)
>>> cc
{0: 0.3333333333333333, 1: 0.3333333333333333, 2: 0.3333333333333333}
"""
result = {}
memberships = H.nodes.memberships()
members = H.edges.members()
for n in H.nodes:
ev = list(memberships[n])
dv = len(ev)
if dv <= 1:
result[n] = 0
else:
total_eo = 0
# go over all pairs of edges pairwise
for e1, e2 in combinations(ev, 2):
edge1 = members[e1]
edge2 = members[e2]
# set differences for the hyperedges
D1 = set(edge1) - set(edge2)
D2 = set(edge2) - set(edge1)
# if edges are the same by definition the extra overlap is zero
if len(D1.union(D2)) == 0:
eo = 0
else:
# otherwise we have to look at their neighbours
# the neighbours of D1 and D2, respectively.
neighD1 = {i for d in D1 for i in H.nodes.neighbors(d)}
neighD2 = {i for d in D2 for i in H.nodes.neighbors(d)}
# compute extra overlap [len() is used for cardinality of edges]
eo = (
len(neighD1.intersection(D2)) + len(neighD2.intersection(D1))
) / len(
D1.union(D2)
) # add it up
# add it up
total_eo = total_eo + eo
# include normalisation by degree k*(k-1)/2
result[n] = 2 * total_eo / (dv * (dv - 1))
return result
[docs]def two_node_clustering_coefficient(H, kind="union"):
"""Return the clustering coefficients for
each node in a Hypergraph.
This definition averages over all of the
two-node clustering coefficients involving the node.
Parameters
----------
H : Hypergraph
Hypergraph
kind : string, optional
The type of two node clustering coefficient. Options
are "union", "max", and "min". By default, "union".
Returns
-------
dict
nodes are keys, clustering coefficients are values.
Notes
-----
The clustering coefficient is undefined when the number of
neighbors is 0 or 1, but we set the clustering coefficient
to 0 in these cases. For more discussion, see
https://arxiv.org/abs/0802.2512
See Also
--------
clustering_coefficient
local_clustering_coefficient
References
----------
"Clustering Coefficients in Protein Interaction Hypernetworks"
by Suzanne Gallagher and Debra Goldberg.
DOI: 10.1145/2506583.2506635
Example
-------
>>> import xgi
>>> H = xgi.random_hypergraph(3, [1, 1])
>>> cc = xgi.two_node_clustering_coefficient(H, kind="union")
>>> cc
{0: 0.5, 1: 0.5, 2: 0.5}
"""
result = {}
memberships = H.nodes.memberships()
for n in H.nodes:
neighbors = H.nodes.neighbors(n)
result[n] = 0.0
for v in neighbors:
result[n] += _uv_cc(n, v, memberships, kind=kind) / len(neighbors)
return result
def _uv_cc(u, v, memberships, kind="union"):
"""Helper function to compute the two-node
clustering coefficient.
Parameters
----------
u : hashable
First node
v : hashable
Second node
memberships : dict
node IDs are keys, edge IDs to which they belong
are values.
kind : str, optional
Type of clustering coefficient to compute, by default "union".
Options:
- "union"
- "max"
- "min"
Returns
-------
float
The clustering coefficient
Raises
------
XGIError
If an invalid clustering coefficient kind
is specified.
References
----------
"Clustering Coefficients in Protein Interaction Hypernetworks"
by Suzanne Gallagher and Debra Goldberg.
DOI: 10.1145/2506583.2506635
"""
m_u = memberships[u]
m_v = memberships[v]
num = len(m_u.intersection(m_v))
if kind == "union":
denom = len(m_u.union(m_v))
elif kind == "min":
denom = min(len(m_u), len(m_v))
elif kind == "max":
denom = max(len(m_u), len(m_v))
else:
raise XGIError("Invalid kind of clustering.")
if denom == 0:
return np.nan
return num / denom