From d7c8cefd15dd03b2e487a06544a8e1f32e8588ba Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 14 Nov 2022 14:27:28 +0100 Subject: [PATCH] API Improvements (#8) * Lazy load converter from Bioregistry Depends on https://github.com/biopragmatics/bioregistry/pull/652, helps with #7 * Add function for guessing the version of a parse results * Use robot merge * Add function for loading local obo graph json * Better access to properties * Inline upgrading Related to #7 * Update data.tsv * Update setup.cfg * Flake --- setup.cfg | 2 +- src/bioontologies/__init__.py | 8 ++++- src/bioontologies/obograph.py | 23 +++++++++----- src/bioontologies/robot.py | 51 ++++++++++++++++++++++++------ src/bioontologies/upgrade/data.tsv | 1 + 5 files changed, 66 insertions(+), 19 deletions(-) diff --git a/setup.cfg b/setup.cfg index 6054720..146e947 100644 --- a/setup.cfg +++ b/setup.cfg @@ -54,7 +54,7 @@ keywords = [options] install_requires = - bioregistry>=0.5.86 + bioregistry>=0.6.13 curies>=0.3.0 requests pydantic diff --git a/src/bioontologies/__init__.py b/src/bioontologies/__init__.py index d24e621..0de08ae 100644 --- a/src/bioontologies/__init__.py +++ b/src/bioontologies/__init__.py @@ -2,10 +2,16 @@ """Tools for biomedical ontologies.""" -from .robot import convert_to_obograph, get_obograph_by_iri, get_obograph_by_prefix +from .robot import ( + convert_to_obograph, + get_obograph_by_iri, + get_obograph_by_path, + get_obograph_by_prefix, +) __all__ = [ "convert_to_obograph", "get_obograph_by_prefix", "get_obograph_by_iri", + "get_obograph_by_path", ] diff --git a/src/bioontologies/obograph.py b/src/bioontologies/obograph.py index fa98cb9..cb66413 100644 --- a/src/bioontologies/obograph.py +++ b/src/bioontologies/obograph.py @@ -9,13 +9,11 @@ from operator import attrgetter from typing import Any, Iterable, List, Mapping, Optional, Set, Tuple, Union -from bioregistry import curie_to_str, manager -from curies import Converter +from bioregistry import curie_to_str, get_default_converter, manager from pydantic import BaseModel, Field from tqdm.auto import tqdm from typing_extensions import Literal -from . import upgrade from .relations import ground_relation __all__ = [ @@ -40,8 +38,6 @@ MaybeCURIE = Union[Tuple[str, str], Tuple[None, None]] -converter = Converter.from_reverse_prefix_map(manager.get_reverse_prefix_map(include_prefixes=True)) - class StandardizeMixin: """A mixin for classes representing standardizable data.""" @@ -85,6 +81,7 @@ def val_curie(self) -> str: def standardize(self): """Standardize this property.""" + self.val = self.val.replace("\n", " ") self.pred_prefix, self.pred_identifier = _parse_uri_or_curie_or_str(self.pred) self.val_prefix, self.val_identifier = _parse_uri_or_curie_or_str(self.val) self.standardized = True @@ -273,6 +270,14 @@ def xrefs(self) -> List[Xref]: return self.meta.xrefs return [] + @property + def properties(self) -> List[Property]: + """Get the properties for this node.""" + if not self.meta or self.meta.basicPropertyValues is None: + return [] + # TODO filter out ones grabbed by other getters + return self.meta.basicPropertyValues + @property def replaced_by(self) -> Optional[str]: """Get the identifier that this node was replaced by.""" @@ -565,13 +570,15 @@ def _parse_obo_rel(s: str, identifier: str) -> Union[Tuple[str, str], Tuple[None def _compress_uri_or_curie_or_str( s: str, *, debug: bool = False ) -> Union[Tuple[str, str], Tuple[None, str]]: + from .upgrade import insert, upgrade + s = s.replace(" ", "") - cv = upgrade.upgrade(s) + cv = upgrade(s) if cv: return cv - prefix, identifier = converter.parse_uri(s) + prefix, identifier = get_default_converter().parse_uri(s) if prefix and identifier: if prefix == "obo" and "#" in identifier: return _parse_obo_rel(s, identifier) @@ -587,7 +594,7 @@ def _compress_uri_or_curie_or_str( if s.startswith(x): prefix, identifier = ground_relation(s[len(x) :]) if prefix and identifier: - upgrade.insert(s, prefix, identifier) + insert(s, prefix, identifier) return prefix, identifier elif s not in WARNED: tqdm.write(f"could not parse legacy RO: {s}") diff --git a/src/bioontologies/robot.py b/src/bioontologies/robot.py index c86aa9c..3f0ae0e 100644 --- a/src/bioontologies/robot.py +++ b/src/bioontologies/robot.py @@ -35,6 +35,7 @@ # Processors "get_obograph_by_prefix", "get_obograph_by_iri", + "get_obograph_by_path", ] logger = logging.getLogger(__name__) @@ -92,6 +93,15 @@ def guess(self, prefix: str) -> Graph: return id_to_graph[CANONICAL[prefix]] raise ValueError(f"Several graphs in {prefix}: {sorted(id_to_graph)}") + def guess_version(self, prefix: str) -> Optional[str]: + """Guess the version.""" + try: + graph = self.guess(prefix) + except ValueError: + return None + else: + return graph.version or graph.version_iri + def get_obograph_by_iri( iri: str, @@ -102,6 +112,16 @@ def get_obograph_by_iri( return ParseResults(graph_document=graph_document, iri=iri) +def get_obograph_by_path(path: Union[str, Path], *, iri: Optional[str] = None) -> ParseResults: + """Get an ontology by its OBO Graph JSON file path.""" + res_json = json.loads(Path(path).resolve().read_text()) + graph_document = GraphDocument(**res_json) + if iri is None: + if graph_document.graphs and len(graph_document.graphs) == 1: + iri = graph_document.graphs[0].id + return ParseResults(graph_document=graph_document, iri=iri) + + def get_obograph_by_prefix( prefix: str, *, @@ -205,6 +225,7 @@ def convert_to_obograph( input_is_iri: bool = False, extra_args: Optional[List[str]] = None, from_iri: Optional[str] = None, + merge: bool = True, ) -> ParseResults: """Convert a local OWL file to a JSON file. @@ -223,6 +244,7 @@ def convert_to_obograph( :param extra_args: Extra positional arguments to pass in the command line :param from_iri: Use this parameter to say what IRI the graph came from + :param merge: Use ROBOT's merge command to squash all graphs together :returns: An object with the parsed OBO Graph JSON and text output from the ROBOT conversion program @@ -243,6 +265,7 @@ def convert_to_obograph( output_path=path, fmt="json", extra_args=extra_args, + merge=merge, ) messages = ret.strip().splitlines() graph_document_raw = json.loads(path.read_text()) @@ -301,21 +324,31 @@ def convert( output_path: Union[str, Path], input_flag: Optional[Literal["-i", "-I"]] = None, *, + merge: bool = True, fmt: Optional[str] = None, extra_args: Optional[List[str]] = None, ) -> str: """Convert an OBO file to an OWL file with ROBOT.""" if input_flag is None: input_flag = "-I" if _is_remote(input_path) else "-i" - args = [ - "robot", - "convert", - input_flag, - str(input_path), - "-o", - str(output_path), - *(extra_args or []), - ] + if merge: + args = [ + "robot", + "merge", + input_flag, + str(input_path), + "convert", + ] + else: + args = [ + "robot", + "convert", + input_flag, + str(input_path), + ] + args.extend(("-o", str(output_path))) + if extra_args: + args.extend(extra_args) if fmt: args.extend(("--format", fmt)) logger.debug("Running shell command: %s", args) diff --git a/src/bioontologies/upgrade/data.tsv b/src/bioontologies/upgrade/data.tsv index 08b80ee..df84566 100644 --- a/src/bioontologies/upgrade/data.tsv +++ b/src/bioontologies/upgrade/data.tsv @@ -60,6 +60,7 @@ http://purl.obolibrary.org/obo/nbo#has_participant ro 0000057 http://purl.obolibrary.org/obo/ncbitaxon#has_rank debio 0000023 http://purl.obolibrary.org/obo/ncbitaxon/subsets/taxslim#has_rank debio 0000023 http://purl.obolibrary.org/obo/obo#_PATO_0000047 pato 0000047 +http://purl.obolibrary.org/obo/pato#seeAlso rdf seeAlso http://purl.obolibrary.org/obo/so#has_origin debio 0000025 http://purl.obolibrary.org/obo/uberon/core#conduit_for ro 0002570 http://purl.obolibrary.org/obo/uberon/core#existence_starts_and_ends_during ro 0002491