Source code for xgi.algorithms.simpliciality

from itertools import chain, combinations

import numpy as np
from scipy.special import binom

from ..core import Hypergraph
from ..utils import Trie

__all__ = [
    "edit_simpliciality",
    "simplicial_edit_distance",
    "face_edit_simpliciality",
    "mean_face_edit_distance",
    "simplicial_fraction",
]


[docs]def edit_simpliciality(H, min_size=2, exclude_min_size=True): r"""Computes the edit simpliciality. The fraction of sub-edges contained when compared to a simplicial complex. Parameters ---------- H : xgi.Hypergraph The hypergraph of interest min_size: int, optional The minimum hyperedge size to include when calculating whether a hyperedge is a simplex by counting subfaces. For more details, see the Notes below. By default, 2. exclude_min_size : bool, optional Whether to exclude minimal simplices when counting simplices. For more detailed information, see the Notes below. By default, True. Returns ------- float The edit simpliciality See Also -------- simplicial_edit_distance Notes ----- 1. The formal definition of a simplicial complex can be unnecessarily strict when used to represent perfect inclusion structures. By definition, a simplex always contains singletons (edges comprising a single node) and the empty set. Several datasets will not include such interactions by construction. To circumvent this issue, we use a relaxed definition of downward closure that excludes edges of a certain size or smaller wherever it makes sense. By default, we set the minimum size to be 2 since some datasets do not contain singletons. 2. Hyperedges we call “minimal faces” may significantly skew the simpliciality of a dataset. The minimal faces of a hypergraph :math:`H` are the edges of the minimal size, i.e., :math:`|e| = \min(K)`, where :math:`K` is the set of sizes that we consider based on note 1. (In a traditional simplicial complex, the minimal faces are singletons). With the size restrictions in place, the minimal faces of a hypergraph are always simplices because, by definition, there are no smaller edges for these edges to include. When measuring the simpliciality of a dataset, it is most meaningful to focus on the faces for which inclusion is possible, and so, by default, we exclude these minimal faces when counting potential simplices. References ---------- "The simpliciality of higher-order order networks" by Nicholas Landry, Jean-Gabriel Young, and Nicole Eikmeier, *EPJ Data Science* **13**, 17 (2024). """ return 1 - simplicial_edit_distance( H, min_size=min_size, exclude_min_size=exclude_min_size )
[docs]def simplicial_edit_distance(H, min_size=2, exclude_min_size=True, normalize=True): r"""Computes the simplicial edit distance. The number (or fraction) of sub-edges needed to be added to a hypergraph to make it a simplicial complex. Parameters ---------- H : xgi.Hypergraph The hypergraph of interest min_size: int, optional The minimum hyperedge size to include when calculating whether a hyperedge is a simplex by counting subfaces. For more details, see the Notes below. By default, 2. exclude_min_size : bool, optional Whether to exclude minimal simplices when counting simplices. For more detailed information, see the Notes below. By default, True. normalize : bool, optional Whether to normalize by the total number of edges Returns ------- float The edit simpliciality See Also -------- edit_simpliciality Notes ----- 1. The formal definition of a simplicial complex can be unnecessarily strict when used to represent perfect inclusion structures. By definition, a simplex always contains singletons (edges comprising a single node) and the empty set. Several datasets will not include such interactions by construction. To circumvent this issue, we use a relaxed definition of downward closure that excludes edges of a certain size or smaller wherever it makes sense. By default, we set the minimum size to be 2 since some datasets do not contain singletons. 2. Hyperedges we call “minimal faces” may significantly skew the simpliciality of a dataset. The minimal faces of a hypergraph :math:`H` are the edges of the minimal size, i.e., :math:`|e| = \min(K)`, where :math:`K` is the set of sizes that we consider based on note 1. (In a traditional simplicial complex, the minimal faces are singletons). With the size restrictions in place, the minimal faces of a hypergraph are always simplices because, by definition, there are no smaller edges for these edges to include. When measuring the simpliciality of a dataset, it is most meaningful to focus on the faces for which inclusion is possible, and so, by default, we exclude these minimal faces when counting potential simplices. References ---------- "The simpliciality of higher-order order networks" by Nicholas Landry, Jean-Gabriel Young, and Nicole Eikmeier, *EPJ Data Science* **13**, 17 (2024). """ edges = H.edges.filterby("size", min_size, "geq").members() t = Trie() t.build_trie(edges) maxH = Hypergraph( H.edges.maximal() .filterby("size", min_size + exclude_min_size, "geq") .members(dtype=dict) ) if not maxH.edges: return np.nan id_to_num = dict(zip(maxH.edges, range(maxH.num_edges))) ms = 0 for id1, e in maxH.edges.members(dtype=dict).items(): redundant_missing_faces = set() for id2 in maxH.edges.neighbors(id1): if id_to_num[id2] < id_to_num[id1]: c = maxH._edge[id2].intersection(e) if len(c) >= min_size: redundant_missing_faces.update(_missing_subfaces(t, c, min_size)) # we don't have to worry about the intersection being a max face # because a) there are no multiedges and b) these are all maximal # faces so no inclusions. if not t.search(c): redundant_missing_faces.add(frozenset(c)) mf = _count_missing_subfaces(t, e, min_size) rmf = len(redundant_missing_faces) ms += mf - rmf if normalize: s = len(edges) mf = maxH.num_edges if s - mf + ms > 0: return ms / (s - mf + ms) else: return np.nan else: return ms
[docs]def face_edit_simpliciality(H, min_size=2, exclude_min_size=True): r"""Computes the face edit simpliciality. The average fraction of sub-edges contained in a hyperedge relative to a simplex. Parameters ---------- H : xgi.Hypergraph The hypergraph of interest min_size: int, optional The minimum hyperedge size to include when calculating whether a hyperedge is a simplex by counting subfaces. For more details, see the Notes below. By default, 2. exclude_min_size : bool, optional Whether to exclude minimal simplices when counting simplices. For more detailed information, see the Notes below. By default, True. Returns ------- float The face edit simpliciality See Also -------- mean_face_edit_distance Notes ----- 1. The formal definition of a simplicial complex can be unnecessarily strict when used to represent perfect inclusion structures. By definition, a simplex always contains singletons (edges comprising a single node) and the empty set. Several datasets will not include such interactions by construction. To circumvent this issue, we use a relaxed definition of downward closure that excludes edges of a certain size or smaller wherever it makes sense. By default, we set the minimum size to be 2 since some datasets do not contain singletons. 2. Hyperedges we call “minimal faces” may significantly skew the simpliciality of a dataset. The minimal faces of a hypergraph :math:`H` are the edges of the minimal size, i.e., :math:`|e| = \min(K)`, where :math:`K` is the set of sizes that we consider based on note 1. (In a traditional simplicial complex, the minimal faces are singletons). With the size restrictions in place, the minimal faces of a hypergraph are always simplices because, by definition, there are no smaller edges for these edges to include. When measuring the simpliciality of a dataset, it is most meaningful to focus on the faces for which inclusion is possible, and so, by default, we exclude these minimal faces when counting potential simplices. References ---------- "The simpliciality of higher-order order networks" by Nicholas Landry, Jean-Gabriel Young, and Nicole Eikmeier, *EPJ Data Science* **13**, 17 (2024). """ return 1 - mean_face_edit_distance( H, min_size=min_size, exclude_min_size=exclude_min_size )
[docs]def mean_face_edit_distance(H, min_size=2, exclude_min_size=True, normalize=True): r"""Computes the mean face edit distance The average number (or fraction) of sub-edges needed to be added to make a hyperedge a simplex. Parameters ---------- H : Hypergraph The hypergraph of interest min_size: int, optional The minimum hyperedge size to include when calculating whether a hyperedge is a simplex by counting subfaces. For more details, see the Notes below. By default, 2. exclude_min_size : bool, optional Whether to exclude minimal simplices when counting simplices. For more detailed information, see the Notes below. By default, True. normalize : bool, optional Whether to normalize the face edit distance, by default True Returns ------- float The mean face edit distance See Also -------- face_edit_simpliciality Notes ----- 1. The formal definition of a simplicial complex can be unnecessarily strict when used to represent perfect inclusion structures. By definition, a simplex always contains singletons (edges comprising a single node) and the empty set. Several datasets will not include such interactions by construction. To circumvent this issue, we use a relaxed definition of downward closure that excludes edges of a certain size or smaller wherever it makes sense. By default, we set the minimum size to be 2 since some datasets do not contain singletons. 2. Hyperedges we call “minimal faces” may significantly skew the simpliciality of a dataset. The minimal faces of a hypergraph :math:`H` are the edges of the minimal size, i.e., :math:`|e| = \min(K)`, where :math:`K` is the set of sizes that we consider based on note 1. (In a traditional simplicial complex, the minimal faces are singletons). With the size restrictions in place, the minimal faces of a hypergraph are always simplices because, by definition, there are no smaller edges for these edges to include. When measuring the simpliciality of a dataset, it is most meaningful to focus on the faces for which inclusion is possible, and so, by default, we exclude these minimal faces when counting potential simplices. References ---------- "The simpliciality of higher-order order networks" by Nicholas Landry, Jean-Gabriel Young, and Nicole Eikmeier, *EPJ Data Science* **13**, 17 (2024). """ t = Trie() t.build_trie(H.edges.filterby("size", min_size, "geq").members()) max_faces = ( H.edges.maximal().filterby("size", min_size + exclude_min_size, "geq").members() ) avg_d = 0 for e in max_faces: if len(e) >= min_size: d = _count_missing_subfaces(t, e, min_size=min_size) # missing subfaces m = _max_number_of_subfaces(min_size, len(e)) if normalize and m != 0: d *= 1.0 / m avg_d += d / len(max_faces) return avg_d
[docs]def simplicial_fraction(H, min_size=2, exclude_min_size=True): r"""Computing the simplicial fraction for a hypergraph. What fraction of the hyperedges are simplices? Parameters ---------- H : Hypergraph The hypergraph of interest min_size: int, optional The minimum hyperedge size to include when calculating whether a hyperedge is a simplex by counting subfaces. For more details, see the Notes below. By default, 2. exclude_min_size : bool, optional Whether to exclude minimal simplices when counting simplices. For more detailed information, see the Notes below. By default, True. Returns ------- float The simplicial fraction Notes ----- 1. The formal definition of a simplicial complex can be unnecessarily strict when used to represent perfect inclusion structures. By definition, a simplex always contains singletons (edges comprising a single node) and the empty set. Several datasets will not include such interactions by construction. To circumvent this issue, we use a relaxed definition of downward closure that excludes edges of a certain size or smaller wherever it makes sense. By default, we set the minimum size to be 2 since some datasets do not contain singletons. 2. Hyperedges we call “minimal faces” may significantly skew the simpliciality of a dataset. The minimal faces of a hypergraph :math:`H` are the edges of the minimal size, i.e., :math:`|e| = \min(K)`, where :math:`K` is the set of sizes that we consider based on note 1. (In a traditional simplicial complex, the minimal faces are singletons). With the size restrictions in place, the minimal faces of a hypergraph are always simplices because, by definition, there are no smaller edges for these edges to include. When measuring the simpliciality of a dataset, it is most meaningful to focus on the faces for which inclusion is possible, and so, by default, we exclude these minimal faces when counting potential simplices. References ---------- "The simpliciality of higher-order order networks" by Nicholas Landry, Jean-Gabriel Young, and Nicole Eikmeier, *EPJ Data Science* **13**, 17 (2024). """ try: ns = _count_simplices(H, min_size, exclude_min_size) ps = _potential_simplices(H, min_size, exclude_min_size) return ns / ps except ZeroDivisionError: return np.nan
#### Helper functions def _powerset(iterable, min_size=1, max_size=None): """Generates a modified powerset. User can specify the maximum and minimum size of the sets in the powerset. Parameters ---------- iterable : iterable The set for which to compute the powerset. min_size: int, default: 1 The minimum hyperedge size to include when calculating whether a hyperedge is a simplex by counting subfaces. max_size : int, default: None. The maximum size to include when computing the power set. When max_size=None, it generates the powerset including the edge itself. Returns ------- itertools.chain a generator of the sets in the powerset. """ s = iterable if max_size is None: max_size = len(s) return chain.from_iterable( combinations(s, r) for r in range(min_size, max_size + 1) ) def _count_missing_subfaces(t, face, min_size=1): """Computing the edit distance for a single face. Parameters ---------- t : Trie The trie representing the hypergraph face : iterable The edge for which to find the edit distance min_size: int, default: 1 The minimum hyperedge size to include when calculating whether a hyperedge is a simplex by counting subfaces. Returns ------- int The edit distance """ sub_edges = list(_powerset(face, min_size=min_size, max_size=len(face) - 1)) count = 0 for e in sub_edges: if not t.search(e): count += 1 return count def _missing_subfaces(t, face, min_size=1): """Computing the edit distance for a single face. Parameters ---------- t : Trie The trie representing the hypergraph face : iterable The edge for which to find the edit distance min_size: int, default: 1 The minimum hyperedge size to include when calculating whether a hyperedge is a simplex by counting subfaces. Returns ------- int The edit distance """ sub_edges = list(_powerset(face, min_size=min_size, max_size=len(face) - 1)) ms = set() for e in sub_edges: if not t.search(e): ms.add(frozenset(e)) return ms def _max_number_of_subfaces(min_size, max_size): d = 2**max_size - 2 # subtract 2 for the face itself and the empty set for i in range(1, min_size): d -= binom(max_size, i) return int(d) def _potential_simplices(H, min_size=2, exclude_min_size=True): # record total number of hyperedges that are potential simplices return len(H.edges.filterby("size", min_size + exclude_min_size, "geq")) def _count_simplices(H, min_size=2, exclude_min_size=True): # build trie data structure t = Trie() all_edges = H.edges.members() t.build_trie(all_edges) edges = H.edges.filterby("size", min_size + exclude_min_size, "geq").members() # for each hyperedge, determine if it's a simplex count = 0 # The following loop is embarassingly parallel, so parallelize to increase speed would be good for e in edges: if _is_simplex(t, e, min_size): count += 1 return count def _is_simplex(t, edge, min_size=2): for e in _powerset(edge, min_size): if not t.search(e): return False return True