Source code for xgi.generators.uniform

"""Generate random uniform hypergraphs."""

import itertools
import operator
import random
import warnings
from functools import reduce

import numpy as np
from scipy.special import comb

from ..exception import XGIError
from .classic import complete_hypergraph, empty_hypergraph

__all__ = [
    "uniform_hypergraph_configuration_model",
    "uniform_HSBM",
    "uniform_HPPM",
    "uniform_erdos_renyi_hypergraph",
]


[docs]def uniform_hypergraph_configuration_model(k, m, seed=None): """ A function to generate an m-uniform configuration model Parameters ---------- k : dictionary This is a dictionary where the keys are node ids and the values are node degrees. m : int specifies the hyperedge size seed : integer or None (default) The seed for the random number generator Returns ------- Hypergraph object The generated hypergraph Warns ----- warnings.warn If the sums of the degrees are not divisible by m, the algorithm still runs, but raises a warning and adds an additional connection to random nodes to satisfy this condition. Notes ----- This algorithm normally creates multi-edges and loopy hyperedges. We remove the loopy hyperedges. References ---------- "The effect of heterogeneity on hypergraph contagion models" by Nicholas W. Landry and Juan G. Restrepo https://doi.org/10.1063/5.0020034 Example ------- >>> import xgi >>> import random >>> n = 1000 >>> m = 3 >>> k = {1: 1, 2: 2, 3: 3, 4: 3} >>> H = xgi.uniform_hypergraph_configuration_model(k, m) """ if seed is not None: random.seed(seed) # Making sure we have the right number of stubs remainder = sum(k.values()) % m if remainder != 0: warnings.warn( "This degree sequence is not realizable. " "Increasing the degree of random nodes so that it is." ) random_ids = random.sample(list(k.keys()), int(round(m - remainder))) for idx in random_ids: k[idx] = k[idx] + 1 stubs = [] # Creating the list to index through for idx in k: stubs.extend([idx] * int(k[idx])) H = empty_hypergraph() H.add_nodes_from(k.keys()) while len(stubs) != 0: u = random.sample(range(len(stubs)), m) edge = set() for index in u: edge.add(stubs[index]) if len(edge) == m: H.add_edge(edge) for index in sorted(u, reverse=True): del stubs[index] return H
[docs]def uniform_HSBM(n, m, p, sizes, seed=None): r"""Create a uniform hypergraph stochastic block model (HSBM). This uses a fast method for generating hyperedges so that instead of the algorithm being of complexity :math:`\mathcal{O}(N^m)`, it can be as fast as :math:`\mathcal{O}(m(N + |E|))`. See the references for more details. Parameters ---------- n : int The number of nodes m : int The hyperedge size p : m-dimensional numpy array tensor of probabilities between communities sizes : list or 1D numpy array The sizes of the community blocks in order seed : integer or None (default) The seed for the random number generator Returns ------- Hypergraph The constructed SBM hypergraph Raises ------ XGIError - If the length of sizes and p do not match. - If p is not a tensor with every dimension equal - If p is not m-dimensional - If the entries of p are not in the range [0, 1] - If the sum of the vector of sizes does not equal the number of nodes. Exception If there is an integer overflow error See Also -------- uniform_HPPM Notes ----- Because XGI only stores edges as sets, when self-loops occur, they become smaller edges (for example, the edge (0, 0, 0) will be mapped to {0}). However, because this is explicitly a *uniform* method, we discard these edges so that this is the case. For sparse networks, this is a rare occurrence and this method offers an order of magnitude speedup. References ---------- Nicholas W. Landry and Juan G. Restrepo, "Opinion disparity in hypergraphs with community structure", Phys. Rev. E **108**, 034311 (2024). https://doi.org/10.1103/PhysRevE.108.034311 """ # Check if dimensions match if len(sizes) != np.size(p, axis=0): raise XGIError("'sizes' and 'p' do not match.") if len(np.shape(p)) != m: raise XGIError("The dimension of p does not match m") # Check that p has the same length over every dimension. if len(set(np.shape(p))) != 1: raise XGIError("'p' must be a square tensor.") if np.max(p) > 1 or np.min(p) < 0: raise XGIError("Entries of 'p' not in [0, 1].") if np.sum(sizes) != n: raise XGIError("Sum of sizes does not match n") if seed is not None: np.random.seed(seed) node_labels = range(n) H = empty_hypergraph() H.add_nodes_from(node_labels) block_range = range(len(sizes)) # Split node labels in a partition (list of sets). size_cumsum = [sum(sizes[0:x]) for x in range(0, len(sizes) + 1)] partition = [ list(node_labels[size_cumsum[x] : size_cumsum[x + 1]]) for x in range(0, len(size_cumsum) - 1) ] for block in itertools.product(block_range, repeat=m): if p[block] == 1: # Test edges cases p_ij = 0 or 1 edges = itertools.product((partition[i] for i in block_range)) for e in edges: H.add_edge(e) elif p[block] > 0: partition_sizes = [len(partition[i]) for i in block] max_index = reduce(operator.mul, partition_sizes, 1) if max_index < 0: raise Exception("Index overflow error!") index = np.random.geometric(p[block]) - 1 while index < max_index: indices = _index_to_edge_partition(index, partition_sizes, m) e = {partition[block[i]][indices[i]] for i in range(m)} # edge ids are not guaranteed to be unique # and when casting to a set, they will no # longer be of size m. # for instance (0, 0, 0) becomes {0} # if we accept these edges, the hypergraph # will not longer be uniform, so we discard them. if len(e) == m: H.add_edge(e) index += np.random.geometric(p[block]) return H
[docs]def uniform_HPPM(n, m, k, epsilon, rho=0.5, seed=None): r"""Construct the m-uniform hypergraph planted partition model (m-HPPM) This uses a fast method for generating hyperedges so that instead of the algorithm being of complexity :math:`\mathcal{O}(N^m)`, it can be as fast as :math:`\mathcal{O}(m(N + |E|))`. See the references for more details. Parameters ---------- n : int > 0 Number of nodes m : int > 0 Hyperedge size k : float > 0 Mean degree epsilon : float > 0 Imbalance parameter rho : float between 0 and 1, optional The fraction of nodes in community 1, default 0.5 seed : integer or None (default) The seed for the random number generator Returns ------- Hypergraph The constructed m-HPPM hypergraph. Raises ------ XGIError - If rho is not between 0 and 1 - If the mean degree is negative. - If epsilon is not between 0 and 1 See Also -------- uniform_HSBM Notes ----- Because XGI only stores edges as sets, when self-loops occur, they become smaller edges (for example, the edge (0, 0, 0) will be mapped to {0}). However, because this is explicitly a *uniform* method, we discard these edges so that this is the case. For sparse networks, this is a rare occurrence and this method offers an order of magnitude speedup. References ---------- Nicholas W. Landry and Juan G. Restrepo, "Opinion disparity in hypergraphs with community structure", Phys. Rev. E **108**, 034311 (2024). https://doi.org/10.1103/PhysRevE.108.034311 """ if rho < 0 or rho > 1: raise XGIError("The value of rho must be between 0 and 1") if k < 0: raise XGIError("The mean degree must be non-negative") if epsilon < 0 or epsilon > 1: raise XGIError("epsilon must be between 0 and 1") sizes = [int(rho * n), n - int(rho * n)] p = k / (m * n ** (m - 1)) # ratio of inter- to intra-community edges q = rho**m + (1 - rho) ** m r = 1 / q - 1 p_in = (1 + r * epsilon) * p p_out = (1 - epsilon) * p p = p_out * np.ones([2] * m) p[tuple([0] * m)] = p_in p[tuple([1] * m)] = p_in return uniform_HSBM(n, m, p, sizes, seed=seed)
[docs]def uniform_erdos_renyi_hypergraph(n, m, p, p_type="prob", multiedges=False, seed=None): r"""Generate an m-uniform Erdős–Rényi hypergraph This creates a hypergraph with `n` nodes where hyperedges of size `m` are created at random with probability (or to obtain a mean degree of) `p`. This uses a fast method for generating hyperedges so that instead of the algorithm being of complexity :math:`\mathcal{O}(N^m)`, it can be as fast as :math:`\mathcal{O}(m(N + |E|))`. See the references for more details. Parameters ---------- n : int > 0 Number of nodes m : int > 0 Hyperedge size p : float or int >= 0 Probability of an m-hyperedge if p_type="prob" and mean expected degree if p_type="degree" p_type : str, optional Determines the way p is interpreted (see p for detail). Valid options are "prob" or "degree", by default "prob" multiedges : bool, optional Whether or not to allow multiedges. If True, there can be significant speedups but at the cost of creating (potentially unwanted) artifacts. When multiedges=True, it treats each edge permutation as distinct, which can lead to multiedges, especially for dense hypergraphs. For sparse hypergraphs, however, this is unlikely to be the case. By default, False. seed : integer or None (default) The seed for the random number generator Returns ------- Hypergraph The Erdos Renyi hypergraph See Also -------- ~xgi.generators.random.random_hypergraph Notes ----- When `multiedges=True`, there is the possibility of generating (potentially unwanted) artifacts, like multiedges and loopy hyperedges which are not present in the original Erdos-Renyi model. Because hypergraphs in XGI only store edges as sets, when an edge such as (0, 0, 0) is generated will be mapped to {0}. However, because this is explicitly a *uniform* method, we discard these edges. For sparse networks, this is a rare occurrence and allowing these artifacts offers an order of magnitude speedup. Although allowing loopy hyperedges and multiedges is not the default behavior, this (as well as the associated performance boost) is enabled by setting `multiedges=True`. References ---------- Nicholas W. Landry and Juan G. Restrepo, "Opinion disparity in hypergraphs with community structure", Phys. Rev. E **108**, 034311 (2024). https://doi.org/10.1103/PhysRevE.108.034311 """ if seed is not None: np.random.seed(seed) if p_type == "degree": if multiedges: q = p / (m * n ** (m - 1)) # wiring probability else: q = p * n / (m * comb(n, m)) elif p_type == "prob": q = p else: raise XGIError("Invalid p_type!") if q > 1 or q < 0: raise XGIError("Probability not in [0, 1].") if q == 1 and not multiedges: return complete_hypergraph(n, order=m - 1) if q == 0: H = empty_hypergraph() H.add_nodes_from(range(n)) return H H = empty_hypergraph() H.add_nodes_from(range(n)) if multiedges: max_index = n**m f = _index_to_edge_prod else: max_index = comb(n, m, exact=True) f = _index_to_edge_comb index = np.random.geometric(q) - 1 # -1 b/c zero indexing while index <= max_index: e = set(f(index, n, m)) # if f corresponds to _index_to_edge_prod, # edge ids are not guaranteed to be unique # and when casting to a set, they will no # longer be of size m. # for instance (0, 0, 0) becomes {0} # if we accept these edges, the hypergraph # will not longer be uniform, so we discard them. if len(e) == m: H.add_edge(e) # We no longer subtract 1 because if we did, the minimum # value of the right-hand side would be zero, meaning that # we sample the same index multiple times. index += np.random.geometric(q) return H
def _index_to_edge_prod(index, n, m): """Generate a hyperedge from an index given the number of nodes and size of hyperedges. In this method, it treats each edge permutation as distinct, which can lead to multiedges and loopy edges, especially for dense hypergraphs. Imagine that there is a hypergraph with 4 nodes and an edge size of 3. We write out each edge (allowing duplicate entries) incrementing the last entry first, followed by the second-to-last entry and so on, with each edge corresponding to an index starting at zero. For example, (0, 0, 0) has index 0, (0, 0, 1) has index 1, (0, 0, 2) has index 2, (0, 0, 3) has index 3, (0, 1, 0) has index 4, and so on. This function will, for instance, return (0, 0, 3) for index 3, network size 4, and edge size 3. Because hypergraphs in XGI only store edges as sets, the edge (0, 0, 0), for example, generated by this function will be mapped to {0}. However, because this is explicitly a *uniform* method, we discard these edges. For sparse networks, this is a rare occurrence and this method offers an order of magnitude speedup. Parameters ---------- index : int > 0 The index of the hyperedge in the list of all possible hyperedges. n : int > 0 The number of nodes m : int > 0 The hyperedge size. Returns ------- list The hyperedge to which that index corresponds See Also -------- _index_to_edge_partition _index_to_edge_comb References ---------- https://stackoverflow.com/questions/53834707/element-at-index-in-itertools-product """ return [(index // (n**r) % n) for r in range(m - 1, -1, -1)] def _index_to_edge_comb(index, n, m): """Generate a hyperedge from an index given the number of nodes and size of hyperedges. Imagine that there is a hypergraph with 4 nodes and an edge size of 3. We write out each edge incrementing the last entry first, followed by the second-to-last entry and so on, with each edge corresponding to an index starting at zero. For example, (0, 1, 2) has index 0, (0, 1, 3) has index 0, (0, 2, 3) has index 2, and (1, 2, 3) has index 3. This function will, for instance, return (0, 2, 3) for index 2, network size 4, and edge size 3. In this function, we prohibit multiedges, so each edge corresponds to a unique index. Parameters ---------- index : int >= 0 The index of the hyperedge in the list of all possible hyperedges. n : int > 0 The number of nodes m : int > 0 The hyperedge size. Returns ------- list The hyperedge to which that index corresponds See Also -------- _index_to_edge_partition _index_to_edge_prod References ---------- https://math.stackexchange.com/questions/1227409/indexing-all-combinations-without-making-list """ c = [] r = index + 1 # makes it zero indexed j = -1 for s in range(1, m + 1): cs = j + 1 while r - comb(n - 1 - cs, m - s, exact=True) > 0: r -= comb(n - 1 - cs, m - s, exact=True) cs += 1 c.append(cs) j = cs return c def _index_to_edge_partition(index, partition_sizes, m): """Generate a hyperedge from an index given the number of nodes, size of hyperedges, and community sizes. Imagine that there is a hypergraph with 10 nodes, an edge size of 3, and two communities, the first of size 8 and the second of size 2. We start out by specifying which community each node belongs to and index into each community. For example, suppose the nodes belong to communities 1, 1, and 2. Thene write out each edge (allowing duplicate entries) incrementing the last entry first, followed by the second-to-last entry and so on, with each edge corresponding to an index starting at zero. For example, (0, 0, 0) has index 0, (0, 0, 1) has index 1, (0, 1, 0) has index 2, (0, 1, 1) has index 3, (0, 2, 0) has index 4, and so on. These are indices in each partition, however, and we need the original labels of each node in each partition to recover the nodes in each edge. Parameters ---------- index : int > 0 The index of the hyperedge in the list of all possible hyperedges. partition_sizes : list or numpy array The sizes of the partitions to which the nodes belong. m : int > 0 The hyperedge size. Returns ------- list The indices in each partition to which that index corresponds See Also -------- _index_to_edge_prod _index_to_edge_comb """ try: return [ int(index // np.prod(partition_sizes[r + 1 :]) % partition_sizes[r]) for r in range(m) ] except KeyError: raise Exception("Invalid parameters")