import os
import re
import logging
from urllib.parse import urlparse
from urllib.request import HTTPError
from xml.dom import *
from xml.dom.minidom import parse
from bs4 import BeautifulSoup
from colomoto_jupyter import *
logger = logging.getLogger(__name__)
urlidentifier = re.compile("https?://[^/]*\\bcellcollective\\.org/[^/]*#(\\d+)(:(\\d+))?\\b")
[docs]def id_from_url(url):
uri = urlparse(url)
if uri.netloc:
if uri.scheme == "cellcollective":
identifier = uri.netloc
if ":" in identifier:
return tuple(identifier.split(":"))
else:
return identifier, 1
url = url.replace("module/", "")
urlmatch = urlidentifier.search(url)
if urlmatch:
return urlmatch.group(1),\
urlmatch.group(3) or 1
[docs]def url_matches(url):
return id_from_url(url) is not None
[docs]class CellCollectiveConnector(object):
def __init__(self, identifier, version=1):
idv = id_from_url(identifier) or identifier
if type(idv) is not tuple:
idv = idv, version
self.id, self.version = idv
@property
def sbml_urls(self):
url = f"https://research.cellcollective.org/api/model/{self.id}/export/version/{self.version}?type=sbml"
return [url, url.replace("type=sbml", "type=SBML")]
@property
def sbml_basename(self):
return f"cellcollective-{self.id}-{self.version}.sbml"
[docs]def connect(identifier):
return CellCollectiveConnector(identifier)
METADATA_UNITPROTID = "UniProtID"
METADATA_GENENAME = "GeneName"
METADATA_NCBIGENEID = "NCBIGeneID"
QUALNS = "http://www.sbml.org/sbml/level3/version1/qual/version1"
[docs]class CellCollectiveSBMLModel(object):
def __init__(self, localfile):
self.localfile = localfile
self.dom = parse(localfile)
self.root = self.dom.documentElement
species_elts = self.root.getElementsByTagNameNS(QUALNS, "qualitativeSpecies")
self.id2elt = dict([(e.getAttributeNS(QUALNS, "id"), e)
for e in species_elts])
self.name2id = dict([(e.getAttributeNS(QUALNS, "name"), id)
for id, e in self.id2elt.items()])
@property
def species(self):
"""
Returns the set of defined species
:rtype: set
"""
return set(self.name2id.keys())
_key2metadata = {
"uniprotid": METADATA_UNITPROTID,
"uniprotaccessionid": METADATA_UNITPROTID,
"genename": METADATA_GENENAME,
"ncbigeneid": METADATA_NCBIGENEID,
}
[docs] def species_uniprotkb(self, name):
uniprotid = self.species_metadata(name).get(METADATA_UNITPROTID)
if not uniprotid:
return
return URL("https://www.uniprot.org/uniprot/%s" % uniprotid)
[docs] def species_ncbi_gene(self, name):
id = self.species_metadata(name).get(METADATA_NCBIGENEID)
if not id:
return
return URL("https://www.ncbi.nlm.nih.gov/gene/%s" % id)
[docs]def load(identifier, auto_persistent=True):
"""
Load a CellCollective model from its URL or SBML export.
`identifier` can be:
- a URL of the form ``"cellcollective://{model_id}:{model_version}"``
- the model URL from `research.cellcollective.org`
- the location of the SBML file exported from CellCollective
Whenever `identifier` is one of the two first cases, the module relies on
the online API of CellCollective to download the SBML file.
As the API may change over time, it is strongly recommended to rely on
instead on downloaded SBML files and attach them with the notebook to ensure
its repeatibility over time.
With the option ``auto_persistent=True``, the module first looks for an
existing downloaded SBML file. If it does not exists, it uses the online
API to download it and move it alongside the notebook.
"""
conn = None
if isinstance(identifier, CellCollectiveConnector):
conn = identifier
elif url_matches(identifier):
conn = CellCollectiveConnector(identifier)
else:
from colomoto_jupyter.io import ensure_localfile
sbmlfile = ensure_localfile(identifier)
if conn:
from colomoto_jupyter.io import auto_download
urls = conn.sbml_urls
bname = conn.sbml_basename
if not os.path.isfile(bname) and not auto_persistent:
logger.warning(f"""This command relies on the online CellCollective API which may change over time!
To improve the repeatibility of this notebook, consider using the command
cellcollective.load("{identifier}", auto_persistent=True)
and attach the "{bname}" file along with your notebook.""")
for i, url in enumerate(urls):
try:
sbmlfile = auto_download(url, bname)
break
except HTTPError:
if i == len(urls)-1:
raise
return CellCollectiveSBMLModel(sbmlfile)
[docs]def to_biolqm(model):
biolqm = import_colomoto_tool("biolqm")
lqm = biolqm.load(model.localfile)
return biolqm.sanitize(lqm)