Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve federated SPARQL query testing to use docker and add virtuoso test #789

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: Tests

on:
workflow_dispatch:
push:
branches: [ main ]
pull_request:
Expand Down Expand Up @@ -70,6 +71,12 @@ jobs:
run: |
pip install --upgrade pip setuptools wheel
pip install "tox<4.0.0"
- name: Start blazegraph and virtuoso triplestores with docker
if: matrix.os == 'ubuntu-latest'
run: |
docker-compose up -d
sleep 20
docker compose exec virtuoso isql -U dba -P dba exec='GRANT "SPARQL_SELECT_FED" TO "SPARQL";'
- name: Test with pytest
run:
tox -e py
Expand Down
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ prune tests/.pytest_cache
exclude docs/*.md
exclude docs/_config.yml

exclude .appveyor.yml .bumpversion.cfg .coveragerc .flake8 .travis.yml .readthedocs.yml tox.ini .pre-commit-config.yaml Dockerfile CITATION.cff
exclude .appveyor.yml .bumpversion.cfg .coveragerc .flake8 .travis.yml .readthedocs.yml tox.ini .pre-commit-config.yaml Dockerfile CITATION.cff docker-compose.yml
include LICENSE *.md *.rst
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,31 @@ $ pip install --editable .
Build the docs locally with `tox -e ldocs` then view by opening
`docs/build/html/index.html`.

<details>
<summary>See the instructions to run the tests</summary>

Having [`tox`](https://tox.wiki/en/latest/installation.html) installed is required to run the tests. Optionally to run all tests you will need to start the Bioregistry, a Virtuoso endpoint, and a Blazegraph endpoint using `docker compose`:

```shell
$ docker compose up
```

The first time you deploy the docker compose stack, you will need to run an additional command from another terminal, in this repository root folder, to enable federated query in Virtuoso:

```shell
$ docker compose exec virtuoso isql -U dba -P dba exec='GRANT "SPARQL_SELECT_FED" TO "SPARQL";'
```

After cloning the repository and installing the dependencies, the unit tests in the `tests/` folder can be run reproducibly with:

```shell
$ tox
```

Additionally, these tests are automatically re-run with each commit in a [GitHub Action](https://github.com/biopragmatics/bioregistry/actions/workflows/tests.yml).

</details>

## 💪 Usage

### Normalizing Prefixes
Expand Down
19 changes: 19 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
version: "3"
services:

bioregistry:
build: .
ports:
- 8888:8766

blazegraph:
image: metaphacts/blazegraph-basic:2.2.0-20160908.003514-6-jetty9.4.44-jre8-45dbfff
ports:
- 8889:8080

virtuoso:
image: openlink/virtuoso-opensource-7:latest
ports:
- 8890:8890
environment:
- DBA_PASSWORD=${VIRTUOSO_PASSWORD:-dba}
63 changes: 45 additions & 18 deletions tests/test_sparql.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
"""Tests for the local SPARQL endpoint."""

import csv
import unittest
from typing import Set, Tuple
from xml import etree

import requests

PING_SPARQL = 'SELECT ?s ?o WHERE { BIND("hello" as ?s) . BIND("there" as ?o) . }'
LOCAL_BIOREGISTRY = "http://localhost:5000/sparql"
LOCAL_BLAZEGRAPH = "http://192.168.2.30:9999/blazegraph/sparql"
# NOTE: federated queries need to use docker internal URL
DOCKER_BIOREGISTRY = "http://bioregistry:8766/sparql"
LOCAL_BIOREGISTRY = "http://localhost:8888/sparql"
LOCAL_BLAZEGRAPH = "http://localhost:8889/blazegraph/namespace/kb/sparql"
LOCAL_VIRTUOSO = "http://localhost:8890/sparql"


def _handle_res_xml(res: requests.Response) -> Set[Tuple[str, str]]:
Expand All @@ -32,14 +36,13 @@ def _handle_res_json(res: requests.Response) -> Set[Tuple[str, str]]:


def _handle_res_csv(res: requests.Response) -> Set[Tuple[str, str]]:
header, *lines = (line.strip().split(",") for line in res.text.splitlines())
records = (dict(zip(header, line)) for line in lines)
return {(record["s"], record["o"]) for record in records}
reader = csv.DictReader(res.text.splitlines())
return {(record["s"], record["o"]) for record in reader}


HANDLERS = {
"application/json": _handle_res_json,
"application/xml": _handle_res_xml,
"application/sparql-results+xml": _handle_res_xml,
"text/csv": _handle_res_csv,
}

Expand All @@ -64,12 +67,22 @@ def sparql_service_available(endpoint: str) -> bool:
return list(records) == [("hello", "there")]


SPARQL = f"""\
SPARQL_VALUES = f"""\
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT DISTINCT ?s ?o WHERE {{
SERVICE <{LOCAL_BIOREGISTRY}> {{
VALUES ?s {{ <http://purl.obolibrary.org/obo/CHEBI_24867> }}
?s owl:sameAs ?o
SERVICE <{DOCKER_BIOREGISTRY}> {{
VALUES ?s {{ <http://purl.obolibrary.org/obo/CHEBI_24867> }} .
?s owl:sameAs ?o .
}}
}}
""".rstrip()

SPARQL_SIMPLE = f"""\
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT DISTINCT ?s ?o WHERE {{
SERVICE <{DOCKER_BIOREGISTRY}> {{
<http://purl.obolibrary.org/obo/CHEBI_24867> owl:sameAs ?o .
?s owl:sameAs ?o .
}}
}}
""".rstrip()
Expand All @@ -79,11 +92,11 @@ def sparql_service_available(endpoint: str) -> bool:
sparql_service_available(LOCAL_BIOREGISTRY), reason="No local Bioregistry is running"
)
class TestSPARQL(unittest.TestCase):
"""Tests for SPARQL."""
"""Tests for federated SPARQL queries to the Bioregistry mapping service."""

def assert_endpoint(self, endpoint: str, *, accept: str):
def assert_endpoint(self, endpoint: str, query: str, *, accept: str):
"""Assert the endpoint returns favorable results."""
records = get(endpoint, SPARQL, accept=accept)
records = get(endpoint, query, accept=accept)
self.assertIn(
("http://purl.obolibrary.org/obo/CHEBI_24867", "https://bioregistry.io/chebi:24867"),
records,
Expand All @@ -93,13 +106,27 @@ def assert_endpoint(self, endpoint: str, *, accept: str):
sparql_service_available(LOCAL_BLAZEGRAPH), reason="No local BlazeGraph is running"
)
def test_federate_blazegraph(self):
"""Test federating on a blazegraph.
"""Test federating on a Blazegraph triplestore.

How to run blazegraph locally:
To run blazegraph locally: docker compose up
"""
for mimetype in HANDLERS:
with self.subTest(mimetype=mimetype):
self.assert_endpoint(LOCAL_BLAZEGRAPH, SPARQL_SIMPLE, accept=mimetype)
self.assert_endpoint(LOCAL_BLAZEGRAPH, SPARQL_VALUES, accept=mimetype)

@unittest.skipUnless(
sparql_service_available(LOCAL_VIRTUOSO), reason="No local Virtuoso is running"
)
def test_federate_virtuoso(self):
"""Test federating on a OpenLink Virtuoso triplestore.

1. Get: https://github.com/blazegraph/database/releases/download/BLAZEGRAPH_2_1_6_RC/blazegraph.jar
2. Run: java -jar blazegraph.jar
To run Virtuoso locally:
1. docker compose up
2. docker compose exec virtuoso isql -U dba -P dba exec='GRANT "SPARQL_SELECT_FED" TO "SPARQL";'
"""
for mimetype in HANDLERS:
with self.subTest(mimetype=mimetype):
self.assert_endpoint(LOCAL_BLAZEGRAPH, accept=mimetype)
self.assert_endpoint(LOCAL_VIRTUOSO, SPARQL_SIMPLE, accept=mimetype)
# TODO: Virtuoso fails to resolves VALUES in federated query
# self.assert_endpoint(LOCAL_VIRTUOSO, SPARQL_VALUES, accept=mimetype)