Source code for xgi.readwrite.xgi_data

"""Load a data set from the xgi-data repository or a local file."""

from os.path import dirname, exists, join
from warnings import warn

from ..convert import cut_to_order, from_hif_dict, from_hypergraph_dict
from ..exception import XGIError
from ..utils import request_json_from_url, request_json_from_url_cached

__all__ = ["load_xgi_data", "download_xgi_data"]

[docs]def load_xgi_data( dataset=None, cache=True, read=False, path="", nodetype=None, edgetype=None, max_order=None, ): """Load a data set from the xgi-data repository or a local file. Parameters ---------- dataset : str, optional Dataset name. Valid options are the top-level tags of the index.json file in the xgi-data repository. If None (default), prints the list of available datasets. cache : bool, optional Whether to cache the input data, by default True. read : bool, optional If read==True, search for a local copy of the data set. Use the local copy if it exists, otherwise use the xgi-data repository. By default, False. path : str, optional Path to a local copy of the data set nodetype : type, optional Type to cast the node ID to, by default None. edgetype : type, optional Type to cast the edge ID to, by default None. max_order: int, optional Maximum order of edges to add to the hypergraph, by default None. Returns ------- Hypergraph The loaded hypergraph. If the dataset chosen is a collection, returns a dictionary of Hypergraph objects. Raises ------ XGIError The specified dataset does not exist. """ index_url = "" if read: cfp = join(path, dataset + ".json") if exists(cfp): from ..readwrite import read_json return read_json(cfp, nodetype=nodetype, edgetype=edgetype) else: warn( f"No local copy was found at {cfp}. The data is requested " "from the xgi-data repository instead. To download a local " "copy, use `download_xgi_data`." ) # If no dataset is specified, print a list of the available datasets. index_data = request_json_from_url(index_url) if dataset is None: print("Available datasets are the following:") print(*index_data, sep="\n") return key = dataset.lower() if key not in index_data: print("Valid dataset names:") print(*index_data, sep="\n") raise XGIError("Must choose a valid dataset name!") url = index_data[key]["url"] return _request_from_xgi_data( url, nodetype=nodetype, edgetype=edgetype, max_order=max_order, cache=cache )
[docs]def download_xgi_data(dataset, path="", collection_name=""): """Make a local copy of a dataset in the xgi-data repository. If the dataset is a collection, makes local copies of all the datasets in the collection and a main file pointing to all of the datasets. Parameters ---------- dataset : str Dataset name. Valid options are the top-level tags of the index.json file in the xgi-data repository. path : str, optional Directory where the local copy should be saved. If none is given, save file to local directory. collection_name : str, optional The name of the collection of data (if any). If `dataset` is not a collection, this argument is unused. """ from ..readwrite import write_json index_url = "" index_data = request_json_from_url(index_url) key = dataset.lower() if key not in index_data: print("Valid dataset names:") print(*index_data, sep="\n") raise XGIError("Must choose a valid dataset name!") url = index_data[key]["url"] H = _request_from_xgi_data( url, nodetype=None, edgetype=None, max_order=None, cache=True ) if isinstance(H, dict): write_json(H, path, collection_name=collection_name) else: filename = join(path, key + ".json") write_json(H, filename)
def _request_from_xgi_data( url, nodetype=None, edgetype=None, max_order=None, cache=True ): """Request a dataset from xgi-data. Parameters ---------- dataset : str, optional Dataset name. Valid options are the top-level tags of the index.json file in the xgi-data repository. If None, prints the list of available datasets. cache : bool, optional Whether or not to cache the output Returns ------- Data The requested data loaded from a json file. Raises ------ XGIError If the HTTP request is not successful or the dataset does not exist. See also --------- load_xgi_data """ if cache: jsondata = request_json_from_url_cached(url) else: jsondata = request_json_from_url(url) if "incidences" in jsondata: H = from_hif_dict(jsondata, nodetype=nodetype, edgetype=edgetype) if max_order: H = cut_to_order(H, order=max_order) return H if "type" in jsondata and jsondata["type"] == "collection": collection = {} for name, data in jsondata["datasets"].items(): relpath = data["relative-path"] H = _request_from_xgi_data( join(dirname(url), relpath), nodetype=nodetype, edgetype=edgetype, max_order=max_order, cache=cache, ) collection[name] = H return collection return from_hypergraph_dict( jsondata, nodetype=nodetype, edgetype=edgetype, max_order=max_order )