Source code for xgi.convert.pandas
"""Methods for converting to and from a Pandas dataframe."""
from collections import defaultdict
import pandas as pd
from ..core import SimplicialComplex
from ..exception import XGIError
from ..generators import empty_hypergraph
__all__ = ["from_bipartite_pandas_dataframe", "to_bipartite_pandas_dataframe"]
[docs]def from_bipartite_pandas_dataframe(
df, create_using=None, node_column=0, edge_column=1
):
"""Create a hypergraph from a pandas dataframe given
specified node and edge columns.
Parameters
----------
df : Pandas dataframe
A dataframe where specified columns list the node IDs
and the associated edge IDs
create_using : Hypergraph constructor, optional
The hypergraph object to add the data to, by default None
node_column : hashable, optional
The column with the node IDs, by default 0
Can specify names or indices
edge_column : hashable, optional
The column with the edge IDs, by default 1
Can specify names or indices
Returns
-------
Hypergraph object
The constructed hypergraph
Raises
------
XGIError
Raises an error if the user specifies invalid column names
"""
H = empty_hypergraph(create_using)
# try to get by labels first
try:
d = df[[node_column, edge_column]]
except KeyError:
# try to index the labels
try:
columns = list(df.columns)
d = df[[columns[node_column], columns[edge_column]]]
except (KeyError, TypeError):
raise XGIError("Invalid columns specified")
if isinstance(H, SimplicialComplex):
simplex_list = defaultdict(list)
for line in d.itertuples(index=False):
if line[0] not in simplex_list[line[1]]:
simplex_list[line[1]].append(line[0])
H.add_simplices_from(list(simplex_list.values()))
else:
for line in d.itertuples(index=False):
node = line[0]
edge = line[1]
H.add_node_to_edge(edge, node)
return H
[docs]def to_bipartite_pandas_dataframe(H):
"""Create a two column dataframe from a hypergraph.
Parameters
----------
H : Hypergraph or Simplicial Complex
A dataframe where specified columns list the node IDs
and the associated edge IDs
Returns
-------
Pandas Dataframe object
A two column dataframe
Raises
------
XGIError
Raises an error if the user specifies invalid column names
"""
data = []
for id1, members in H._node.items():
for id2 in members:
data.append([id1, id2])
return pd.DataFrame(data, columns=["Node ID", "Edge ID"])