Source code for xgi.readwrite.xgi_data

"""Load a data set from the xgi-data repository or a local file."""
import json
import os
from warnings import warn

from .. import convert
from ..exception import XGIError
from ..utils import request_json_from_url, request_json_from_url_cached

__all__ = ["load_xgi_data", "download_xgi_data"]


[docs]def load_xgi_data( dataset=None, cache=True, read=False, path="", nodetype=None, edgetype=None, max_order=None, ): """Load a data set from the xgi-data repository or a local file. Parameters ---------- dataset : str, optional Dataset name. Valid options are the top-level tags of the index.json file in the xgi-data repository. If None (default), prints the list of available datasets. cache : bool, optional Whether to cache the input data, by default True. read : bool, optional If read==True, search for a local copy of the data set. Use the local copy if it exists, otherwise use the xgi-data repository. By default, False. path : str, optional Path to a local copy of the data set nodetype : type, optional Type to cast the node ID to, by default None. edgetype : type, optional Type to cast the edge ID to, by default None. max_order: int, optional Maximum order of edges to add to the hypergraph, by default None. Returns ------- Hypergraph The loaded hypergraph. Raises ------ XGIError The specified dataset does not exist. """ index_url = "https://raw.githubusercontent.com/xgi-org/xgi-data/main/index.json" # If no dataset is specified, print a list of the available datasets. if dataset is None: index_data = request_json_from_url(index_url) print("Available datasets are the following:") print(*index_data, sep="\n") return if read: cfp = os.path.join(path, dataset + ".json") if os.path.exists(cfp): data = json.load(open(cfp, "r")) return convert.dict_to_hypergraph( data, nodetype=nodetype, edgetype=edgetype, max_order=max_order ) else: warn( f"No local copy was found at {cfp}. The data is requested " "from the xgi-data repository instead. To download a local " "copy, use `download_xgi_data`." ) data = _request_from_xgi_data(index_url, dataset, cache=cache) return convert.dict_to_hypergraph( data, nodetype=nodetype, edgetype=edgetype, max_order=max_order )
[docs]def download_xgi_data(dataset, path=""): """Make a local copy of a dataset in the xgi-data repository. Parameters ---------- dataset : str Dataset name. Valid options are the top-level tags of the index.json file in the xgi-data repository. path : str, optional Path to where the local copy should be saved. If none is given, save file to local directory. """ index_url = "https://raw.githubusercontent.com/xgi-org/xgi-data/main/index.json" jsondata = _request_from_xgi_data(index_url, dataset) jsonfile = open(os.path.join(path, dataset + ".json"), "w") json.dump(jsondata, jsonfile) jsonfile.close()
def _request_from_xgi_data(index_url, dataset=None, cache=True): """Request a dataset from xgi-data. Parameters ---------- dataset : str, optional Dataset name. Valid options are the top-level tags of the index.json file in the xgi-data repository. If None, prints the list of available datasets. cache : bool, optional Whether or not to cache the output Returns ------- Data The requested data loaded from a json file. Raises ------ XGIError If the HTTP request is not successful or the dataset does not exist. See also --------- load_xgi_data """ index_data = request_json_from_url(index_url) key = dataset.lower() if key not in index_data: print("Valid dataset names:") print(*index_data, sep="\n") raise XGIError("Must choose a valid dataset name!") if cache: return request_json_from_url_cached(index_data[key]["url"]) else: return request_json_from_url(index_data[key]["url"])