Source code for xgi.readwrite.bigg_data
"""Load a metabolic network from the BiGG models database."""
from warnings import warn
from ..utils import request_json_from_url, request_json_from_url_cached
__all__ = ["load_bigg_data"]
[docs]def load_bigg_data(
dataset=None,
cache=True,
):
"""Load a metabolic network from the BiGG models database.
The Biochemical, Genetic and Genomic (BiGG) knowledge base
is hosted at http://bigg.ucsd.edu/. It contains metabolic
reaction networks at the genome scale.
We represent metabolites as nodes and metabolic reactions
as directed edges where reactants are the tail of the directed
edge and the products are the head of the directed edge.
Parameters
----------
dataset : str, default: None
Dataset name. Valid options are the "bigg_id" tags in
http://bigg.ucsd.edu/api/v2/models. If None, prints
the list of available datasets.
cache : bool, optional
Whether to cache the input data
Returns
-------
DiHypergraph
The loaded dihypergraph.
Raises
------
XGIError
The specified dataset does not exist.
References
----------
Zachary A. King, Justin Lu, Andreas Dräger,
Philip Miller, Stephen Federowicz, Joshua A. Lerman,
Ali Ebrahim, Bernhard O. Palsson, Nathan E. Lewis
Nucleic Acids Research, Volume 44, Issue D1,
4 January 2016, Pages D515–D522,
https://doi.org/10.1093/nar/gkv1049
"""
index_url = "http://bigg.ucsd.edu/api/v2/models"
base_url = "http://bigg.ucsd.edu/static/models/"
index_data = request_json_from_url(index_url)
# If no dataset is specified, print a list of the available datasets.
if dataset is None:
ids = []
for entry in index_data["results"]:
ids.append(entry["bigg_id"])
print("Available datasets are the following:")
print(*ids, sep="\n")
return
if cache:
model_data = request_json_from_url_cached(base_url + dataset + ".json")
else:
model_data = request_json_from_url(base_url + dataset + ".json")
return _bigg_to_dihypergraph(index_data, model_data)
def _bigg_to_dihypergraph(d_index, d_model):
"""Convert a BIGG-formatted dict to dihypergraph.
Parameters
----------
d : dict
A BIGG-formatted dict
Returns
-------
DiHypergraph
The dihypergraph from the selected BIGG model.
Notes
-----
The code for parsing a metabolic reaction is rewritten
from a function by @pietrotraversa.
We use the `lower_bound` and `upper_bound` variables to
determine whether a reaction is a forward, reverse,
or reversible reaction.
"""
from .. import DiHypergraph
DH = DiHypergraph()
idx = d_model["id"]
DH["name"] = idx
info = next((item for item in d_index["results"] if item["bigg_id"] == idx), None)
DH["organism"] = info["organism"]
for m in d_model["metabolites"]:
DH.add_node(m["id"], name=m["name"])
for r in d_model["reactions"]:
l = r["lower_bound"]
u = r["upper_bound"]
reactants = set()
products = set()
# forward direction
if l >= 0 and u > 0:
for m, val in r["metabolites"].items():
if val > 0:
products.add(m)
elif val <= 0:
reactants.add(m)
if not reactants and not products:
warn(f"{r['id']} is an empty reaction!")
continue
DH.add_edge((reactants, products), idx=r["id"], name=r["name"])
# reverse direction
if l < 0 and u <= 0:
for m, val in r["metabolites"].items():
if val >= 0:
reactants.add(m)
elif val < 0:
products.add(m)
if not reactants and not products:
warn(f"{r['id']} is an empty reaction!")
continue
DH.add_edge((reactants, products), idx=r["id"], name=r["name"])
# reversible
if l < 0 and u > 0:
for m, val in r["metabolites"].items():
if val > 0:
products.add(m)
elif val < 0:
reactants.add(m)
if not reactants and not products:
warn(f"{r['id']} is an empty reaction!")
continue
# add forward reaction
DH.add_edge((reactants, products), idx=r["id"], name=r["name"])
# add reverse reaction
DH.add_edge(
(products, reactants), idx=str(r["id"]) + "_reverse", name=r["name"]
)
return DH