Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend Gilda's grounder #9

Merged
merged 3 commits into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lexica/phenotype/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@
# TODO should there be others?
],
),
biolexica.Input(source="efo", processor="pyobo"), # TODO find subset of EFO
biolexica.Input(source="efo", processor="pyobo", ancestors=["EFO:0000408"]),
biolexica.Input(source="ncit", processor="pyobo", ancestors=["ncit:C2991"]),
# biolexica.Input(source="umls", processor="pyobo"), # TODO find subset of UMLS
# biolexica.Input(source="ncit", processor="pyobo"), # TODO find subset of NCIT
],
excludes=["doid:4"],
)
Expand Down
82 changes: 75 additions & 7 deletions src/biolexica/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import biosynonyms
import gilda
import pyobo
from curies import Reference
from gilda.grounder import load_entries_from_terms_file
from gilda.process import normalize
from pydantic import BaseModel, Field
Expand All @@ -30,6 +31,7 @@
logger = logging.getLogger(__name__)

HERE = Path(__file__).parent.resolve()
LEXICA = HERE.parent.parent.joinpath("lexica")
Processor = Literal["pyobo", "bioontologies", "biosynonyms", "gilda"]

GrounderHint = Union[gilda.Grounder, str, Path]
Expand All @@ -56,22 +58,88 @@
URL_FMT = "https://github.com/biopragmatics/biolexica/raw/main/lexica/{key}/terms.tsv.gz"


def load_grounder(grounder: GrounderHint) -> gilda.Grounder:
class Match(BaseModel):
"""Model a scored match from Gilda."""

reference: Reference
name: str
score: float

@property
def curie(self) -> str:
"""Get the reference's curie."""
return self.reference.curie

Check warning on line 71 in src/biolexica/api.py

View check run for this annotation

Codecov / codecov/patch

src/biolexica/api.py#L71

Added line #L71 was not covered by tests

@classmethod
def from_gilda(cls, scored_match: gilda.ScoredMatch):
"""Construct a match from a Gilda object."""
return cls(

Check warning on line 76 in src/biolexica/api.py

View check run for this annotation

Codecov / codecov/patch

src/biolexica/api.py#L76

Added line #L76 was not covered by tests
reference=Reference(prefix=scored_match.term.db, identifier=scored_match.term.id),
name=scored_match.term.entry_name,
score=scored_match.score,
)


class Grounder(gilda.Grounder):
"""Wrap a Gilda grounder with additional functionality."""

def get_matches(
self,
s: str,
context: Optional[str] = None,
organisms: Optional[List[str]] = None,
namespaces: Optional[List[str]] = None,
) -> List[Match]:
"""Get matches in Biolexica's format."""
return [
Match.from_gilda(scored_match)
for scored_match in super().ground(
s, context=context, organisms=organisms, namespaces=namespaces
)
]

def get_best_match(
self,
s: str,
context: Optional[str] = None,
organisms: Optional[List[str]] = None,
namespaces: Optional[List[str]] = None,
) -> Optional[Match]:
"""Get the best match in Biolexica's format."""
scored_matches = super().ground(

Check warning on line 109 in src/biolexica/api.py

View check run for this annotation

Codecov / codecov/patch

src/biolexica/api.py#L109

Added line #L109 was not covered by tests
s, context=context, organisms=organisms, namespaces=namespaces
)
if not scored_matches:
return None
return Match.from_gilda(scored_matches[0])

Check warning on line 114 in src/biolexica/api.py

View check run for this annotation

Codecov / codecov/patch

src/biolexica/api.py#L113-L114

Added lines #L113 - L114 were not covered by tests


def load_grounder(grounder: GrounderHint) -> Grounder:
"""Load a gilda grounder, potentially from a remote location."""
if isinstance(grounder, str):
if grounder in PREDEFINED:
grounder = URL_FMT.format(key=grounder)
if LEXICA.is_dir():
# If biolexica is installed in editable mode, try looking for
# the directory outside the package root and load the predefined
# index directly
grounder = LEXICA.joinpath(grounder, "terms.tsv.gz").as_posix()

Check warning on line 125 in src/biolexica/api.py

View check run for this annotation

Codecov / codecov/patch

src/biolexica/api.py#L125

Added line #L125 was not covered by tests
else:
grounder = URL_FMT.format(key=grounder)

Check warning on line 127 in src/biolexica/api.py

View check run for this annotation

Codecov / codecov/patch

src/biolexica/api.py#L127

Added line #L127 was not covered by tests
if grounder.startswith("http"):
with tempfile.TemporaryDirectory() as directory:
path = Path(directory).joinpath("terms.tsv.gz")
urlretrieve(grounder, path) # noqa:S310
return gilda.Grounder(path)
return Grounder(path)

Check warning on line 132 in src/biolexica/api.py

View check run for this annotation

Codecov / codecov/patch

src/biolexica/api.py#L132

Added line #L132 was not covered by tests
if isinstance(grounder, (str, Path)):
path = Path(grounder).resolve()
if not path.is_file():
raise FileNotFoundError(path)
return gilda.Grounder(grounder)
return grounder
return Grounder(grounder)
if isinstance(grounder, Grounder):
return grounder

Check warning on line 139 in src/biolexica/api.py

View check run for this annotation

Codecov / codecov/patch

src/biolexica/api.py#L139

Added line #L139 was not covered by tests
if isinstance(grounder, gilda.Grounder):
return Grounder(grounder.entries)
raise TypeError

Check warning on line 142 in src/biolexica/api.py

View check run for this annotation

Codecov / codecov/patch

src/biolexica/api.py#L141-L142

Added lines #L141 - L142 were not covered by tests


def assemble_grounder(
Expand All @@ -80,15 +148,15 @@
*,
extra_terms: Optional[List["gilda.Term"]] = None,
include_biosynonyms: bool = True,
) -> gilda.Grounder:
) -> Grounder:
"""Assemble terms from multiple resources and load into a grounder."""
terms = assemble_terms(
configuration=configuration,
mappings=mappings,
include_biosynonyms=include_biosynonyms,
extra_terms=extra_terms,
)
grounder = gilda.Grounder(list(terms))
grounder = Grounder(list(terms))

Check warning on line 159 in src/biolexica/api.py

View check run for this annotation

Codecov / codecov/patch

src/biolexica/api.py#L159

Added line #L159 was not covered by tests
return grounder


Expand Down
26 changes: 4 additions & 22 deletions src/biolexica/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,9 @@
from typing import List

import fastapi
import gilda
from curies import Reference
from fastapi import FastAPI, Request
from pydantic import BaseModel

from biolexica.api import GrounderHint, load_grounder
from biolexica.api import Grounder, GrounderHint, Match, load_grounder

Check warning on line 8 in src/biolexica/web.py

View check run for this annotation

Codecov / codecov/patch

src/biolexica/web.py#L8

Added line #L8 was not covered by tests

__all__ = [
"run_app",
Expand All @@ -18,16 +15,8 @@
api_router = fastapi.APIRouter()


class Match(BaseModel):
"""Model a scored match from Gilda."""

reference: Reference
name: str
score: float


def run_app(grounder: GrounderHint):
"""Costruct a FastAPI app from a Gilda grounder and run with :mod:`uvicorn`."""
"""Construct a FastAPI app from a Gilda grounder and run with :mod:`uvicorn`."""
import uvicorn

uvicorn.run(get_app(grounder))
Expand All @@ -41,19 +30,12 @@
return app


def _get_grounder(request: Request) -> gilda.Grounder:
def _get_grounder(request: Request) -> Grounder:

Check warning on line 33 in src/biolexica/web.py

View check run for this annotation

Codecov / codecov/patch

src/biolexica/web.py#L33

Added line #L33 was not covered by tests
return request.app.state


def _ground(request: Request, text: str) -> List[Match]:
return [
Match(
reference=Reference(prefix=scored_match.term.db, identifier=scored_match.term.id),
name=scored_match.term.entry_name,
score=scored_match.score,
)
for scored_match in _get_grounder(request).ground(text)
]
return _get_grounder(request).get_matches(text)

Check warning on line 38 in src/biolexica/web.py

View check run for this annotation

Codecov / codecov/patch

src/biolexica/web.py#L38

Added line #L38 was not covered by tests


@api_router.get("/summarize")
Expand Down
Loading