Source code for cellcollective


import os
import re

import logging

from urllib.parse import urlparse
from urllib.request import HTTPError

from xml.dom import *
from xml.dom.minidom import parse

from bs4 import BeautifulSoup

from colomoto_jupyter import *

logger = logging.getLogger(__name__)

urlidentifier = re.compile("https?://[^/]*\\bcellcollective\\.org/[^/]*#(\\d+)(:(\\d+))?\\b")

[docs]def id_from_url(url):
    uri = urlparse(url)
    if uri.netloc:
        if uri.scheme == "cellcollective":
            identifier = uri.netloc
            if ":" in identifier:
                return tuple(identifier.split(":"))
            else:
                return identifier, 1
        url = url.replace("module/", "")
        urlmatch = urlidentifier.search(url)
        if urlmatch:
            return urlmatch.group(1),\
                    urlmatch.group(3) or 1

[docs]def url_matches(url):
    return id_from_url(url) is not None

[docs]class CellCollectiveConnector(object):
    def __init__(self, identifier, version=1):
        idv = id_from_url(identifier) or identifier
        if type(idv) is not tuple:
            idv = idv, version
        self.id, self.version = idv
    @property
    def sbml_urls(self):
        url = f"https://research.cellcollective.org/api/model/{self.id}/export/version/{self.version}?type=sbml"
        return [url, url.replace("type=sbml", "type=SBML")]
    @property
    def sbml_basename(self):
        return f"cellcollective-{self.id}-{self.version}.sbml"

[docs]def connect(identifier):
    return CellCollectiveConnector(identifier)


METADATA_UNITPROTID = "UniProtID"
METADATA_GENENAME = "GeneName"
METADATA_NCBIGENEID = "NCBIGeneID"

QUALNS = "http://www.sbml.org/sbml/level3/version1/qual/version1"

[docs]class CellCollectiveSBMLModel(object):
    def __init__(self, localfile):
        self.localfile = localfile
        self.dom = parse(localfile)
        self.root = self.dom.documentElement
        species_elts = self.root.getElementsByTagNameNS(QUALNS, "qualitativeSpecies")
        self.id2elt = dict([(e.getAttributeNS(QUALNS, "id"), e)
                                for e in species_elts])
        self.name2id = dict([(e.getAttributeNS(QUALNS, "name"), id)
                                for id, e in self.id2elt.items()])

    @property
    def species(self):
        """
        Returns the set of defined species

        :rtype: set
        """
        return set(self.name2id.keys())

    _key2metadata = {
        "uniprotid": METADATA_UNITPROTID,
        "uniprotaccessionid": METADATA_UNITPROTID,
        "genename": METADATA_GENENAME,
        "ncbigeneid": METADATA_NCBIGENEID,
    }

[docs]    def species_metadata(self, name):
        metadata = {}
        notes = self.id2elt[self.name2id[name]].getElementsByTagName("notes")
        bodies = notes[0].getElementsByTagName("body") if notes else None
        ps = bodies[0].getElementsByTagName("p") if bodies else None
        if not ps:
            return metadata
        htmldata = BeautifulSoup(ps[0].firstChild.wholeText, "html.parser")

        def parse_statement(data):
            t = data.split(":")
            if len(t) == 2:
                key = t[0].strip().replace(" ","").lower()
                value = t[1].strip()
                if key in self._key2metadata:
                    metadata[self._key2metadata[key]] = value

        divs = htmldata.find_all("div")
        for div in divs:
            parse_statement(div.getText())
        if not divs:
            for p in ps:
                parse_statement(p.firstChild.wholeText)

        return metadata

[docs]    def species_uniprotkb(self, name):
        uniprotid = self.species_metadata(name).get(METADATA_UNITPROTID)
        if not uniprotid:
            return
        return URL("https://www.uniprot.org/uniprot/%s" % uniprotid)

[docs]    def species_ncbi_gene(self, name):
        id = self.species_metadata(name).get(METADATA_NCBIGENEID)
        if not id:
            return
        return URL("https://www.ncbi.nlm.nih.gov/gene/%s" % id)



[docs]def load(identifier, auto_persistent=True):
    """
    Load a CellCollective model from its URL or SBML export.

    `identifier` can be:
    - a URL of the form ``"cellcollective://{model_id}:{model_version}"``
    - the model URL from `research.cellcollective.org`
    - the location of the SBML file exported from CellCollective

    Whenever `identifier` is one of the two first cases, the module relies on
    the online API of CellCollective to download the SBML file.
    As the API may change over time, it is strongly recommended to rely on
    instead on downloaded SBML files and attach them with the notebook to ensure
    its repeatibility over time.

    With the option ``auto_persistent=True``, the module first looks for an
    existing downloaded SBML file. If it does not exists, it uses the online
    API to download it and move it alongside the notebook.
    """
    conn = None
    if isinstance(identifier, CellCollectiveConnector):
        conn = identifier
    elif url_matches(identifier):
        conn = CellCollectiveConnector(identifier)
    else:
        from colomoto_jupyter.io import ensure_localfile
        sbmlfile = ensure_localfile(identifier)
    if conn:
        from colomoto_jupyter.io import auto_download
        urls = conn.sbml_urls
        bname = conn.sbml_basename
        if not os.path.isfile(bname) and not auto_persistent:
            logger.warning(f"""This command relies on the online CellCollective API which may change over time!
To improve the repeatibility of this notebook, consider using the command

    cellcollective.load("{identifier}", auto_persistent=True)

and attach the "{bname}" file along with your notebook.""")
        for i, url in enumerate(urls):
            try:
                sbmlfile = auto_download(url, bname)
                break
            except HTTPError:
                if i == len(urls)-1:
                    raise
    return CellCollectiveSBMLModel(sbmlfile)

[docs]def to_biolqm(model):
    biolqm = import_colomoto_tool("biolqm")
    lqm = biolqm.load(model.localfile)
    return biolqm.sanitize(lqm)