Source code for xgi.readwrite.bipartite

"""Read from and write to bipartite formats."""

from ..exception import XGIError
from ..generators import empty_hypergraph

__all__ = [
    "read_bipartite_edgelist",
    "write_bipartite_edgelist",
    "parse_bipartite_edgelist",
]


[docs]def generate_bipartite_edgelist(H, delimiter=" "): """ A helper function to generate a bipartite edge list from a Hypergraph object. Parameters ---------- H: Hypergraph object The hypergraph of interest delimiter: char, default: space (" ") Specifies the delimiter between hyperedge members Yields ------- A iterator of strings Each entry is a line to be written to the output file. """ for id in H.edges: for node in H.edges.members(id): yield delimiter.join(map(str, [node, id]))
[docs]def write_bipartite_edgelist(H, path, delimiter=" ", encoding="utf-8"): """Write a Hypergraph object to a file as a bipartite edgelist. Parameters ---------- H: Hypergraph object The hypergraph of interest path: string The path of the file to write to delimiter: char, default: space (" ") Specifies the delimiter between hyperedge members encoding: string, default: "utf-8" Encoding of the file See Also -------- read_bipartite_edgelist Example ------- >>> import xgi >>> H = xgi.random_hypergraph(50, [0.01, 0.001]) >>> # xgi.write_bipartite_edgelist(H, "test.csv", delimiter=",") """ with open(path, "wb") as file: for line in generate_bipartite_edgelist(H, delimiter): line += "\n" file.write(line.encode(encoding))
[docs]def read_bipartite_edgelist( path, comments="#", delimiter=None, create_using=None, nodetype=None, edgetype=None, dual=False, encoding="utf-8", ): """Read a file containing a bipartite edge list and convert it to a Hypergraph object. Parameters ---------- path: string The path of the file to read from comments: string, default: "#" The token that denotes comments in the file delimiter: char, default: space (" ") Specifies the delimiter between hyperedge members create_using : Hypergraph constructor, optional The hypergraph object to add the data to, by default None nodetype: type type that the node labels will be cast to edgetype: type type that the edge labels will be cast to dual: bool, default: False Specifies whether the node IDs are in the second column. If False, the node IDs are in the first column. encoding: string, default: "utf-8" Encoding of the file Returns ------- A Hypergraph object The loaded hypergraph See Also -------- write_bipartite_edgelist Example ------- >>> import xgi >>> # H = xgi.read_bipartite_edgelist("test.csv", delimiter=",") """ with open(path, "rb") as file: lines = ( line if isinstance(line, str) else line.decode(encoding) for line in file ) return parse_bipartite_edgelist( lines, comments=comments, delimiter=delimiter, create_using=create_using, nodetype=nodetype, edgetype=edgetype, dual=dual, )
[docs]def parse_bipartite_edgelist( lines, comments="#", delimiter=None, create_using=None, nodetype=None, edgetype=None, dual=False, ): """ A helper function to read a iterable of strings containing a bipartite edge list and convert it to a Hypergraph object. Reads the first two entries of each line and assumes that the first entry is a node ID and that the second entry is an edge ID. Raises error if there are fewer than two entries. Parameters ---------- lines: iterable of strings Lines where each line is a bipartite edge comments: string, default: "#" The token that denotes comments to ignore delimiter: char, default: space (" ") Specifies the delimiter between hyperedge members create_using : Hypergraph constructor, optional The hypergraph object to add the data to, by default None nodetype: type type that the node labels will be cast to edgetype: type type that the edge labels will be cast to data: bool, default: False Specifies whether there is a dictionary of data at the end of the line. Raises ------ XGIError If a line contains fewer than two entries TypeError If node types fail to be converted Returns ------- Hypergraph The loaded hypergraph. """ H = empty_hypergraph(create_using) node_index = 1 if dual else 0 edge_index = 0 if dual else 1 for line in lines: if comments is not None: p = line.find(comments) if p >= 0: line = line[:p] if not line: continue s = line.strip().split(delimiter) if len(s) < 2: raise XGIError("Each line must contain at least two entries!") # no data or data type specified # convert node types if nodetype is not None: try: node = nodetype(s[node_index]) except ValueError as e: raise TypeError( "Failed to convert the node with " f"ID {s[node_index]} to type {nodetype}." ) from e else: node = s[node_index] # convert edge types if edgetype is not None: try: edge = edgetype(s[edge_index]) except ValueError as e: raise TypeError( "Failed to convert the edge with " f"ID {s[edge_index]} to type {edgetype}." ) from e else: edge = s[edge_index] H.add_node_to_edge(edge, node) return H