Source code for xgi.generators.uniform

"""Generate random uniform hypergraphs."""
import itertools
import operator
import random
import warnings
from functools import reduce

import numpy as np

from ..exception import XGIError
from .classic import empty_hypergraph

__all__ = [
    "uniform_hypergraph_configuration_model",
    "uniform_HSBM",
    "uniform_HPPM",
    "uniform_erdos_renyi_hypergraph",
]


[docs]def uniform_hypergraph_configuration_model(k, m, seed=None): """ A function to generate an m-uniform configuration model Parameters ---------- k : dictionary This is a dictionary where the keys are node ids and the values are node degrees. m : int specifies the hyperedge size seed : integer or None (default) The seed for the random number generator Returns ------- Hypergraph object The generated hypergraph Warns ----- warnings.warn If the sums of the degrees are not divisible by m, the algorithm still runs, but raises a warning and adds an additional connection to random nodes to satisfy this condition. Notes ----- This algorithm normally creates multi-edges and loopy hyperedges. We remove the loopy hyperedges. References ---------- "The effect of heterogeneity on hypergraph contagion models" by Nicholas W. Landry and Juan G. Restrepo https://doi.org/10.1063/5.0020034 Example ------- >>> import xgi >>> import random >>> n = 1000 >>> m = 3 >>> k = {1: 1, 2: 2, 3: 3, 4: 3} >>> H = xgi.uniform_hypergraph_configuration_model(k, m) """ if seed is not None: random.seed(seed) # Making sure we have the right number of stubs remainder = sum(k.values()) % m if remainder != 0: warnings.warn( "This degree sequence is not realizable. " "Increasing the degree of random nodes so that it is." ) random_ids = random.sample(list(k.keys()), int(round(m - remainder))) for id in random_ids: k[id] = k[id] + 1 stubs = [] # Creating the list to index through for id in k: stubs.extend([id] * int(k[id])) H = empty_hypergraph() H.add_nodes_from(k.keys()) while len(stubs) != 0: u = random.sample(range(len(stubs)), m) edge = set() for index in u: edge.add(stubs[index]) if len(edge) == m: H.add_edge(edge) for index in sorted(u, reverse=True): del stubs[index] return H
[docs]def uniform_HSBM(n, m, p, sizes, seed=None): """Create a uniform hypergraph stochastic block model (HSBM). Parameters ---------- n : int The number of nodes m : int The hyperedge size p : m-dimensional numpy array tensor of probabilities between communities sizes : list or 1D numpy array The sizes of the community blocks in order seed : integer or None (default) The seed for the random number generator Returns ------- Hypergraph The constructed SBM hypergraph Raises ------ XGIError - If the length of sizes and p do not match. - If p is not a tensor with every dimension equal - If p is not m-dimensional - If the entries of p are not in the range [0, 1] - If the sum of the vector of sizes does not equal the number of nodes. Exception If there is an integer overflow error See Also -------- uniform_HPPM References ---------- Nicholas W. Landry and Juan G. Restrepo. "Polarization in hypergraphs with community structure." Preprint, 2023. https://doi.org/10.48550/arXiv.2302.13967 """ # Check if dimensions match if len(sizes) != np.size(p, axis=0): raise XGIError("'sizes' and 'p' do not match.") if len(np.shape(p)) != m: raise XGIError("The dimension of p does not match m") # Check that p has the same length over every dimension. if len(set(np.shape(p))) != 1: raise XGIError("'p' must be a square tensor.") if np.max(p) > 1 or np.min(p) < 0: raise XGIError("Entries of 'p' not in [0,1].") if np.sum(sizes) != n: raise XGIError("Sum of sizes does not match n") if seed is not None: np.random.seed(seed) node_labels = range(n) H = empty_hypergraph() H.add_nodes_from(node_labels) block_range = range(len(sizes)) # Split node labels in a partition (list of sets). size_cumsum = [sum(sizes[0:x]) for x in range(0, len(sizes) + 1)] partition = [ list(node_labels[size_cumsum[x] : size_cumsum[x + 1]]) for x in range(0, len(size_cumsum) - 1) ] for block in itertools.product(block_range, repeat=m): if p[block] == 1: # Test edges cases p_ij = 0 or 1 edges = itertools.product((partition[i] for i in block_range)) for e in edges: H.add_edge(e) elif p[block] > 0: partition_sizes = [len(partition[i]) for i in block] max_index = reduce(operator.mul, partition_sizes, 1) if max_index < 0: raise Exception("Index overflow error!") index = np.random.geometric(p[block]) - 1 while index < max_index: indices = _index_to_edge_partition(index, partition_sizes, m) e = {partition[block[i]][indices[i]] for i in range(m)} if len(e) == m: H.add_edge(e) index += np.random.geometric(p[block]) return H
[docs]def uniform_HPPM(n, m, k, epsilon, rho=0.5, seed=None): """Construct the m-uniform hypergraph planted partition model (m-HPPM) Parameters ---------- n : int > 0 Number of nodes m : int > 0 Hyperedge size k : float > 0 Mean degree epsilon : float > 0 Imbalance parameter rho : float between 0 and 1, optional The fraction of nodes in community 1, default 0.5 seed : integer or None (default) The seed for the random number generator Returns ------- Hypergraph The constructed m-HPPM hypergraph. Raises ------ XGIError - If rho is not between 0 and 1 - If the mean degree is negative. - If epsilon is not between 0 and 1 See Also -------- uniform_HSBM References ---------- Nicholas W. Landry and Juan G. Restrepo. "Polarization in hypergraphs with community structure." Preprint, 2023. https://doi.org/10.48550/arXiv.2302.13967 """ if rho < 0 or rho > 1: raise XGIError("The value of rho must be between 0 and 1") if k < 0: raise XGIError("The mean degree must be non-negative") if epsilon < 0 or epsilon > 1: raise XGIError("epsilon must be between 0 and 1") sizes = [int(rho * n), n - int(rho * n)] p = k / (m * n ** (m - 1)) # ratio of inter- to intra-community edges q = rho**m + (1 - rho) ** m r = 1 / q - 1 p_in = (1 + r * epsilon) * p p_out = (1 - epsilon) * p p = p_out * np.ones([2] * m) p[tuple([0] * m)] = p_in p[tuple([1] * m)] = p_in return uniform_HSBM(n, m, p, sizes, seed=seed)
[docs]def uniform_erdos_renyi_hypergraph(n, m, p, p_type="degree", seed=None): """Generate an m-uniform Erdős–Rényi hypergraph This creates a hypergraph with `n` nodes where hyperedges of size `m` are created at random to obtain a mean degree of `k`. Parameters ---------- n : int > 0 Number of nodes m : int > 0 Hyperedge size p : float or int > 0 Mean expected degree if p_type="degree" and probability of an m-hyperedge if p_type="prob" p_type : str "degree" or "prob", by default "degree" seed : integer or None (default) The seed for the random number generator Returns ------- Hypergraph The Erdos Renyi hypergraph See Also -------- random_hypergraph """ if seed is not None: np.random.seed(seed) node_labels = range(n) H = empty_hypergraph() H.add_nodes_from(node_labels) if p_type == "degree": q = p / (m * n ** (m - 1)) # wiring probability elif p_type == "prob": q = p else: raise XGIError("Invalid p_type!") if q > 1 or q < 0: raise XGIError("Probability not in [0,1].") index = np.random.geometric(q) - 1 # -1 b/c zero indexing max_index = n**m while index < max_index: e = set(_index_to_edge(index, n, m)) if len(e) == m: H.add_edge(e) index += np.random.geometric(q) return H
def _index_to_edge(index, n, m): """Generate a hyperedge given an index in the list of possible edges. Parameters ---------- index : int > 0 The index of the hyperedge in the list of all possible hyperedges. n : int > 0 The number of nodes m : int > 0 The hyperedge size. Returns ------- list The reconstructed hyperedge See Also -------- _index_to_edge_partition References ---------- https://stackoverflow.com/questions/53834707/element-at-index-in-itertools-product """ return [(index // (n**r) % n) for r in range(m - 1, -1, -1)] def _index_to_edge_partition(index, partition_sizes, m): """Generate a hyperedge given an index in the list of possible edges and a partition of community labels. Parameters ---------- index : int > 0 The index of the hyperedge in the list of all possible hyperedges. n : int > 0 The number of nodes m : int > 0 The hyperedge size. Returns ------- list The reconstructed hyperedge See Also -------- _index_to_edge """ try: return [ int(index // np.prod(partition_sizes[r + 1 :]) % partition_sizes[r]) for r in range(m) ] except KeyError: raise Exception("Invalid parameters")