Source code for xgi.readwrite.bigg_data

"""Load a metabolic network from the BiGG models database."""

from warnings import warn

from ..utils import request_json_from_url, request_json_from_url_cached

__all__ = ["load_bigg_data"]


[docs]def load_bigg_data(
    dataset=None,
    cache=True,
):
    """Load a metabolic network from the BiGG models database.

    The Biochemical, Genetic and Genomic (BiGG) knowledge base
    is hosted at http://bigg.ucsd.edu/. It contains metabolic
    reaction networks at the genome scale.

    We represent metabolites as nodes and metabolic reactions
    as directed edges where reactants are the tail of the directed
    edge and the products are the head of the directed edge.

    Parameters
    ----------
    dataset : str, default: None
        Dataset name. Valid options are the "bigg_id" tags in
        http://bigg.ucsd.edu/api/v2/models. If None, prints
        the list of available datasets.
    cache : bool, optional
        Whether to cache the input data

    Returns
    -------
    DiHypergraph
        The loaded dihypergraph.

    Raises
    ------
    XGIError
       The specified dataset does not exist.

    References
    ----------
    Zachary A. King, Justin Lu, Andreas Dräger,
    Philip Miller, Stephen Federowicz, Joshua A. Lerman,
    Ali Ebrahim, Bernhard O. Palsson, Nathan E. Lewis
    Nucleic Acids Research, Volume 44, Issue D1,
    4 January 2016, Pages D515–D522,
    https://doi.org/10.1093/nar/gkv1049
    """

    index_url = "http://bigg.ucsd.edu/api/v2/models"
    base_url = "http://bigg.ucsd.edu/static/models/"

    index_data = request_json_from_url(index_url)

    # If no dataset is specified, print a list of the available datasets.
    if dataset is None:
        ids = []
        for entry in index_data["results"]:
            ids.append(entry["bigg_id"])
        print("Available datasets are the following:")
        print(*ids, sep="\n")
        return

    if cache:
        model_data = request_json_from_url_cached(base_url + dataset + ".json")
    else:
        model_data = request_json_from_url(base_url + dataset + ".json")

    return _bigg_to_dihypergraph(index_data, model_data)


def _bigg_to_dihypergraph(d_index, d_model):
    """Convert a BIGG-formatted dict to dihypergraph.

    Parameters
    ----------
    d : dict
        A BIGG-formatted dict

    Returns
    -------
    DiHypergraph
        The dihypergraph from the selected BIGG model.

    Notes
    -----
    The code for parsing a metabolic reaction is rewritten
    from a function by @pietrotraversa.

    We use the `lower_bound` and `upper_bound` variables to
    determine whether a reaction is a forward, reverse,
    or reversible reaction.
    """
    from .. import DiHypergraph

    DH = DiHypergraph()

    id = d_model["id"]

    DH["name"] = id

    info = next((item for item in d_index["results"] if item["bigg_id"] == id), None)
    DH["organism"] = info["organism"]

    for m in d_model["metabolites"]:
        DH.add_node(m["id"], name=m["name"])

    for r in d_model["reactions"]:
        l = r["lower_bound"]
        u = r["upper_bound"]

        reactants = set()
        products = set()

        # forward direction
        if l >= 0 and u > 0:
            for m, val in r["metabolites"].items():
                if val > 0:
                    products.add(m)
                elif val <= 0:
                    reactants.add(m)

            if not reactants and not products:
                warn(f"{r['id']} is an empty reaction!")
                continue
            DH.add_edge((reactants, products), id=r["id"], name=r["name"])

        # reverse direction
        if l < 0 and u <= 0:
            for m, val in r["metabolites"].items():
                if val >= 0:
                    reactants.add(m)
                elif val < 0:
                    products.add(m)

            if not reactants and not products:
                warn(f"{r['id']} is an empty reaction!")
                continue
            DH.add_edge((reactants, products), id=r["id"], name=r["name"])

        # reversible
        if l < 0 and u > 0:
            for m, val in r["metabolites"].items():
                if val > 0:
                    products.add(m)
                elif val < 0:
                    reactants.add(m)

            if not reactants and not products:
                warn(f"{r['id']} is an empty reaction!")
                continue
            # add forward reaction
            DH.add_edge((reactants, products), id=r["id"], name=r["name"])
            # add reverse reaction
            DH.add_edge(
                (products, reactants), id=str(r["id"]) + "_reverse", name=r["name"]
            )

    return DH