diff --git a/.circleci/config.yml b/.circleci/config.yml index 8d3667af6f..858d03901a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,7 +1,7 @@ -version: 2 +version: 2.1 orbs: - python: circleci/python@2.0.3 + python: circleci/python@2.1.1 jobs: diff --git a/.dockerignore b/.dockerignore index fc93d25fe7..9da5b99503 100644 --- a/.dockerignore +++ b/.dockerignore @@ -7,3 +7,6 @@ !LICENSE !README.md !.git +!tests +!requirements_test.txt +!.gitmodules diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml new file mode 100644 index 0000000000..ab34859c55 --- /dev/null +++ b/.github/workflows/integration-test.yml @@ -0,0 +1,58 @@ +name: Run ocrd network integration tests + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build: + + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + python-version: + - '3.7' + - '3.8' + - '3.9' + - '3.10' + - '3.11' + os: + - ubuntu-22.04 + # - macos-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Homebrew + id: set-up-homebrew + uses: Homebrew/actions/setup-homebrew@master + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + if [[ "${{ matrix.os }}" == "ubuntu"* ]];then + sudo apt-get -y update + sudo make deps-ubuntu + else + HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 \ + HOMEBREW_NO_AUTO_UPDATE=1 \ + brew install imagemagick geos bash # opencv + fi + make install deps-test + - name: Install Docker on macOS + if: runner.os == 'macos' + run: | + brew install docker docker-compose + colima start + - name: Test network integration with pytest + run: | + if [[ "${{ matrix.os }}" == "macos"* ]];then + make integration-test DOCKER_COMPOSE=docker-compose + else + make integration-test + fi diff --git a/CHANGELOG.md b/CHANGELOG.md index 1888ac1a62..fed68492e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Added: + + * Basic integration test for `ocrd_network`, #1164 + +Fixed: + + * METS Server: UDS sockets are removed on process exit, #117 + +Changed: + + * replace license-incompatible sparkline library with a simpler implementation, #1176 + * remove all pkg_resources calls with modern alternatives, no more run-time setuptools dependency, #1174 + ## [2.61.2] - 2024-01-24 Fixed: diff --git a/Dockerfile b/Dockerfile index 9cdb648372..95130fdd4a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ ARG BASE_IMAGE -FROM $BASE_IMAGE +FROM $BASE_IMAGE as ocrd_core_base ARG FIXUP=echo MAINTAINER OCR-D ENV DEBIAN_FRONTEND noninteractive @@ -33,13 +33,25 @@ RUN apt-get update && apt-get -y install software-properties-common \ curl \ sudo \ git \ - && make deps-ubuntu \ - && python3 -m venv /usr/local \ + && make deps-ubuntu +RUN python3 -m venv /usr/local \ && hash -r \ && make install \ - && eval $FIXUP \ - && rm -rf /build-ocrd + && eval $FIXUP WORKDIR /data CMD ["/usr/local/bin/ocrd", "--help"] + +FROM ocrd_core_base as ocrd_core_test +WORKDIR /build-ocrd +COPY Makefile . +RUN make assets +COPY tests ./tests +COPY .gitmodules . +COPY requirements_test.txt . +RUN pip install -r requirements_test.txt +RUN mkdir /ocrd-data && chmod 777 /ocrd-data + +CMD ["yes"] +# CMD ["make", "test", "integration-test"] diff --git a/Dockerfile.cuda b/Dockerfile.cuda index 52d7a27619..89db9603cf 100644 --- a/Dockerfile.cuda +++ b/Dockerfile.cuda @@ -1,5 +1,5 @@ ARG BASE_IMAGE -FROM $BASE_IMAGE +FROM $BASE_IMAGE AS ocrd_core_base ENV MAMBA_EXE=/usr/local/bin/conda ENV MAMBA_ROOT_PREFIX=/conda diff --git a/Makefile b/Makefile index f863684d0d..5df74e18ea 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,8 @@ TESTDIR = $(CURDIR)/tests PYTEST_ARGS = --continue-on-collection-errors VERSION = $(shell cat VERSION) +DOCKER_COMPOSE = docker compose + SPHINX_APIDOC = BUILD_ORDER = ocrd_utils ocrd_models ocrd_modelfactory ocrd_validators ocrd_network ocrd @@ -129,7 +131,7 @@ build: # (Re)install the tool install: #build # not stricttly necessary but a precaution against outdated python build tools, https://github.com/OCR-D/core/pull/1166 - $(PIP) install -U pip wheel setuptools + $(PIP) install -U pip wheel $(PIP_INSTALL) . $(PIP_INSTALL_CONFIG_OPTION) @# workaround for shapely#1598 $(PIP) config set global.no-binary shapely @@ -213,9 +215,16 @@ test: assets $(PYTHON) \ -m pytest $(PYTEST_ARGS) --durations=10\ --ignore-glob="$(TESTDIR)/**/*bench*.py" \ + --ignore-glob="$(TESTDIR)/network/*.py" \ $(TESTDIR) cd ocrd_utils ; $(PYTHON) -m pytest --continue-on-collection-errors -k TestLogging -k TestDecorators $(TESTDIR) +INTEGRATION_TEST_IN_DOCKER = docker exec core_test +integration-test: + $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml up -d + -$(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_rmq or test_db or test_processing_server' -v + $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans + benchmark: $(PYTHON) -m pytest $(TESTDIR)/model/test_ocrd_mets_bench.py @@ -271,6 +280,7 @@ pyclean: rm -rf ./dist rm -rf htmlcov rm -rf .benchmarks + rm -rf **/*.egg-info rm -f **/*.pyc -find . -name '__pycache__' -exec rm -rf '{}' \; rm -rf .pytest_cache @@ -296,7 +306,7 @@ docker-cuda: DOCKER_FILE = Dockerfile.cuda docker-cuda: docker docker docker-cuda: - docker build --progress=plain -f $(DOCKER_FILE) -t $(DOCKER_TAG) --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) $(DOCKER_ARGS) . + docker build --progress=plain -f $(DOCKER_FILE) -t $(DOCKER_TAG) --target ocrd_core_base --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) $(DOCKER_ARGS) . # Build wheels and source dist and twine upload them pypi: build diff --git a/requirements.txt b/requirements.txt index 9a9b217218..2190ec648d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,24 @@ atomicwrites >= 1.3.0 -bagit >= 1.7.0 -bagit_profile >= 1.3.0 beanie~=1.7 click >=7 Deprecated == 1.2.0 docker -fastapi fastapi>=0.78.0 filetype Flask frozendict>=2.3.4 gdown httpx>=0.22.0 +importlib_metadata ; python_version < '3.8' +importlib_resources ; python_version < '3.9' jsonschema lxml memory-profiler >= 0.58.0 +# XXX explicitly do not restrict the numpy version because different +# tensorflow versions might require different versions numpy +ocrd-fork-bagit >= 1.8.1.post2 +ocrd-fork-bagit_profile >= 1.3.0.post1 opencv-python-headless paramiko pika>=1.2.0 @@ -27,8 +30,6 @@ pyyaml requests < 2.30 requests_unixsocket shapely -sparklines >= 0.4.2 -# tensorflow versions might require different versions uvicorn uvicorn>=0.17.6 -# XXX explicitly do not restrict the numpy version because different + diff --git a/src/ocrd/cli/workspace.py b/src/ocrd/cli/workspace.py index 318c1e971d..d77e981dd4 100644 --- a/src/ocrd/cli/workspace.py +++ b/src/ocrd/cli/workspace.py @@ -286,14 +286,14 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi \b Examples: ocrd workspace bulk-add \\ - --regex '(?P[^/]+)/page_(?P.*)\.[^.]+' \\ + --regex '(?P[^/]+)/page_(?P.*)\\.[^.]+' \\ --page-id 'PHYS_{{ pageid }}' \\ --file-grp "{{ fileGrp }}" \\ path/to/files/*/*.* \b echo "path/to/src/file.xml SEG/page_p0001.xml" \\ | ocrd workspace bulk-add \\ - --regex '(?P.*?) (?P.+?)/page_(?P.*)\.(?P[^\.]*)' \\ + --regex '(?P.*?) (?P.+?)/page_(?P.*)\\.(?P[^\\.]*)' \\ --file-id 'FILE_{{ fileGrp }}_{{ pageid }}' \\ --page-id 'PHYS_{{ pageid }}' \\ --file-grp "{{ fileGrp }}" \\ diff --git a/src/ocrd/constants.py b/src/ocrd/constants.py index 1d436a7fa9..796aff1895 100644 --- a/src/ocrd/constants.py +++ b/src/ocrd/constants.py @@ -1,7 +1,7 @@ """ Constants for ocrd. """ -from pkg_resources import resource_filename +from ocrd_utils import resource_filename __all__ = [ 'TMP_PREFIX', @@ -18,7 +18,7 @@ DEFAULT_UPLOAD_FOLDER = '/tmp/uploads-ocrd-core' DOWNLOAD_DIR = '/tmp/ocrd-core-downloads' DEFAULT_REPOSITORY_URL = 'http://localhost:5000/' -BASHLIB_FILENAME = resource_filename(__name__, 'lib.bash') -RESOURCE_LIST_FILENAME = resource_filename(__name__, 'resource_list.yml') +BASHLIB_FILENAME = resource_filename(__package__, 'lib.bash') +RESOURCE_LIST_FILENAME = resource_filename(__package__, 'resource_list.yml') RESOURCE_USER_LIST_COMMENT = "# OCR-D private resource list (consider sending a PR with your own resources to OCR-D/core)" BACKUP_DIR = '.backup' diff --git a/src/ocrd/mets_server.py b/src/ocrd/mets_server.py index 08011557b5..be2c1333e0 100644 --- a/src/ocrd/mets_server.py +++ b/src/ocrd/mets_server.py @@ -2,24 +2,24 @@ # METS server functionality """ import re -from os import environ, _exit, chmod -from io import BytesIO -from typing import Any, Dict, Optional, Union, List, Tuple +from os import _exit, chmod +from typing import Dict, Optional, Union, List, Tuple from pathlib import Path from urllib.parse import urlparse import socket +import atexit -from fastapi import FastAPI, Request, File, Form, Response +from fastapi import FastAPI, Request, Form, Response from fastapi.responses import JSONResponse -from requests import request, Session as requests_session +from requests import Session as requests_session from requests.exceptions import ConnectionError from requests_unixsocket import Session as requests_unixsocket_session from pydantic import BaseModel, Field, ValidationError import uvicorn -from ocrd_models import OcrdMets, OcrdFile, ClientSideOcrdFile, OcrdAgent, ClientSideOcrdAgent -from ocrd_utils import initLogging, getLogger, deprecated_alias +from ocrd_models import OcrdFile, ClientSideOcrdFile, OcrdAgent, ClientSideOcrdAgent +from ocrd_utils import getLogger, deprecated_alias # # Models @@ -197,9 +197,10 @@ def __init__(self, workspace, url): self.log = getLogger(f'ocrd.mets_server[{self.url}]') def shutdown(self): - self.log.info("Shutting down METS server") if self.is_uds: - Path(self.url).unlink() + if Path(self.url).exists(): + self.log.warning(f'UDS socket {self.url} still exists, removing it') + Path(self.url).unlink() # os._exit because uvicorn catches SystemExit raised by sys.exit _exit(0) @@ -296,7 +297,7 @@ async def stop(): """ Stop the server """ - getLogger('ocrd.models.ocrd_mets').info('Shutting down') + getLogger('ocrd.models.ocrd_mets').info(f'Shutting down METS Server {self.url}') workspace.save_mets() self.shutdown() @@ -308,6 +309,7 @@ async def stop(): self.log.debug(f"chmod 0o677 {self.url}") server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) server.bind(self.url) # creates the socket file + atexit.register(self.shutdown) server.close() chmod(self.url, 0o666) uvicorn_kwargs = {'uds': self.url} diff --git a/src/ocrd/processor/base.py b/src/ocrd/processor/base.py index 38b7848a03..6107688bc2 100644 --- a/src/ocrd/processor/base.py +++ b/src/ocrd/processor/base.py @@ -9,8 +9,6 @@ 'run_processor' ] -from warnings import warn -from pkg_resources import resource_filename from os.path import exists from shutil import copyfileobj import json @@ -30,7 +28,8 @@ list_resource_candidates, pushd_popd, list_all_resources, - get_processor_resource_types + get_processor_resource_types, + resource_filename, ) from ocrd_validators import ParameterValidator from ocrd_models.ocrd_page import MetadataItemType, LabelType, LabelsType @@ -266,7 +265,7 @@ def moduledir(self): """ The filesystem path of the module directory. """ - return resource_filename(self.module, '') + return resource_filename(self.module, '.') @property def input_files(self): diff --git a/src/ocrd/processor/builtin/dummy_processor.py b/src/ocrd/processor/builtin/dummy_processor.py index 74b2dafb86..0fb00af533 100644 --- a/src/ocrd/processor/builtin/dummy_processor.py +++ b/src/ocrd/processor/builtin/dummy_processor.py @@ -1,6 +1,5 @@ # pylint: disable=missing-module-docstring,invalid-name from os.path import join, basename -from pkg_resources import resource_string import click @@ -13,11 +12,12 @@ make_file_id, MIME_TO_EXT, MIMETYPE_PAGE, - parse_json_string_with_comments + parse_json_string_with_comments, + resource_string ) from ocrd_modelfactory import page_from_file -OCRD_TOOL = parse_json_string_with_comments(resource_string(__name__, 'dummy/ocrd-tool.json').decode('utf8')) +OCRD_TOOL = parse_json_string_with_comments(resource_string(__package__ + '.dummy', 'ocrd-tool.json')) class DummyProcessor(Processor): """ diff --git a/src/ocrd/processor/helpers.py b/src/ocrd/processor/helpers.py index 209fa5f67d..f5b6010636 100644 --- a/src/ocrd/processor/helpers.py +++ b/src/ocrd/processor/helpers.py @@ -11,7 +11,7 @@ from click import wrap_text from ocrd.workspace import Workspace -from ocrd_utils import freeze_args, getLogger, config, setOverrideLogLevel, getLevelName +from ocrd_utils import freeze_args, getLogger, config, setOverrideLogLevel, getLevelName, sparkline __all__ = [ @@ -106,7 +106,6 @@ def run_processor( if any(x in config.OCRD_PROFILE for x in ['RSS', 'PSS']): backend = 'psutil_pss' if 'PSS' in config.OCRD_PROFILE else 'psutil' from memory_profiler import memory_usage - from sparklines import sparklines try: mem_usage = memory_usage(proc=processor.process, # only run process once @@ -123,7 +122,7 @@ def run_processor( chdir(old_cwd) mem_usage_values = [mem for mem, _ in mem_usage] mem_output = 'memory consumption: ' - mem_output += ''.join(sparklines(mem_usage_values)) + mem_output += sparkline(mem_usage_values) mem_output += ' max: %.2f MiB min: %.2f MiB' % (max(mem_usage_values), min(mem_usage_values)) logProfile.info(mem_output) else: diff --git a/src/ocrd/workspace.py b/src/ocrd/workspace.py index 7772c54d79..633e45acf5 100644 --- a/src/ocrd/workspace.py +++ b/src/ocrd/workspace.py @@ -82,7 +82,7 @@ def __init__(self, resolver, directory, mets=None, mets_basename=DEFAULT_METS_BA mets = ClientSideOcrdMets(mets_server_url) if mets.workspace_path != self.directory: raise ValueError(f"METS server {mets_server_url} workspace directory {mets.workspace_path} differs " - "from local workspace directory {self.directory}. These are not the same workspaces.") + f"from local workspace directory {self.directory}. These are not the same workspaces.") else: mets = OcrdMets(filename=self.mets_target) self.mets = mets diff --git a/src/ocrd/workspace_bagger.py b/src/ocrd/workspace_bagger.py index f838a65894..5c10103ce5 100644 --- a/src/ocrd/workspace_bagger.py +++ b/src/ocrd/workspace_bagger.py @@ -9,7 +9,6 @@ import sys from bagit import Bag, make_manifests, _load_tag_file, _make_tag_file, _make_tagmanifest_file # pylint: disable=no-name-in-module from distutils.dir_util import copy_tree -from pkg_resources import get_distribution from ocrd_utils import ( pushd_popd, @@ -19,6 +18,7 @@ DEFAULT_METS_BASENAME, MIMETYPE_PAGE, VERSION, + dist_version, ) from ocrd_validators.constants import BAGIT_TXT, TMP_BAGIT_PREFIX, OCRD_BAGIT_PROFILE_URL from ocrd_modelfactory import page_from_file @@ -117,8 +117,8 @@ def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ocrd_bas bag.info['BagIt-Profile-Identifier'] = OCRD_BAGIT_PROFILE_URL bag.info['Bag-Software-Agent'] = 'ocrd/core %s (bagit.py %s, bagit_profile %s) [cmdline: "%s"]' % ( VERSION, # TODO - get_distribution('bagit').version, - get_distribution('bagit_profile').version, + dist_version('ocrd-fork-bagit'), + dist_version('ocrd-fork-bagit_profile'), ' '.join(sys.argv)) bag.info['Ocrd-Identifier'] = ocrd_identifier diff --git a/src/ocrd_models/constants.py b/src/ocrd_models/constants.py index 6c8b0e1017..01068b7f34 100644 --- a/src/ocrd_models/constants.py +++ b/src/ocrd_models/constants.py @@ -1,8 +1,7 @@ """ Constants for ocrd_models. """ -from pkg_resources import resource_string -import re +from ocrd_utils import resource_string __all__ = [ 'IDENTIFIER_PRIORITY', @@ -32,7 +31,7 @@ IDENTIFIER_PRIORITY = ['purl', 'urn', 'doi', 'url'] -METS_XML_EMPTY = resource_string(__name__, 'mets-empty.xml') +METS_XML_EMPTY = resource_string(__package__, 'mets-empty.xml') NAMESPACES = { 'mets': "http://www.loc.gov/METS/", diff --git a/src/ocrd_models/ocrd_mets.py b/src/ocrd_models/ocrd_mets.py index 3319f8f6ff..ae4d75e290 100644 --- a/src/ocrd_models/ocrd_mets.py +++ b/src/ocrd_models/ocrd_mets.py @@ -52,7 +52,7 @@ def empty_mets(now=None, cache_flag=False): """ if not now: now = datetime.now().isoformat() - tpl = METS_XML_EMPTY.decode('utf-8') + tpl = METS_XML_EMPTY tpl = tpl.replace('{{ VERSION }}', VERSION) tpl = tpl.replace('{{ NOW }}', '%s' % now) return OcrdMets(content=tpl.encode('utf-8'), cache_flag=cache_flag) diff --git a/src/ocrd_network/database.py b/src/ocrd_network/database.py index 4945c13353..946ed95a5b 100644 --- a/src/ocrd_network/database.py +++ b/src/ocrd_network/database.py @@ -28,17 +28,17 @@ from .utils import call_sync -async def initiate_database(db_url: str): +async def initiate_database(db_url: str, db_name: str = 'ocrd'): client = AsyncIOMotorClient(db_url) await init_beanie( - database=client.get_default_database(default='ocrd'), + database=client.get_default_database(default=db_name), document_models=[DBProcessorJob, DBWorkflowJob, DBWorkspace, DBWorkflowScript] ) @call_sync -async def sync_initiate_database(db_url: str): - await initiate_database(db_url) +async def sync_initiate_database(db_url: str, db_name: str = 'ocrd'): + await initiate_database(db_url, db_name) async def db_create_workspace(mets_path: str) -> DBWorkspace: @@ -60,6 +60,11 @@ async def db_create_workspace(mets_path: str) -> DBWorkspace: return workspace_db +@call_sync +async def sync_db_create_workspace(mets_path: str) -> DBWorkspace: + return await db_create_workspace(mets_path=mets_path) + + async def db_get_workspace(workspace_id: str = None, workspace_mets_path: str = None) -> DBWorkspace: workspace = None if not workspace_id and not workspace_mets_path: @@ -134,6 +139,15 @@ async def sync_db_update_workspace(workspace_id: str = None, workspace_mets_path return await db_update_workspace(workspace_id=workspace_id, workspace_mets_path=workspace_mets_path, **kwargs) +async def db_create_processing_job(db_processing_job: DBProcessorJob) -> DBProcessorJob: + return await db_processing_job.insert() + + +@call_sync +async def sync_db_create_processing_job(db_processing_job: DBProcessorJob) -> DBProcessorJob: + return await db_create_processing_job(db_processing_job=db_processing_job) + + async def db_get_processing_job(job_id: str) -> DBProcessorJob: job = await DBProcessorJob.find_one( DBProcessorJob.job_id == job_id) @@ -180,6 +194,15 @@ async def sync_db_update_processing_job(job_id: str, **kwargs) -> DBProcessorJob return await db_update_processing_job(job_id=job_id, **kwargs) +async def db_create_workflow_job(db_workflow_job: DBWorkflowJob) -> DBWorkflowJob: + return await db_workflow_job.insert() + + +@call_sync +async def sync_db_create_workflow_job(db_workflow_job: DBWorkflowJob) -> DBWorkflowJob: + return await db_create_workflow_job(db_workflow_job=db_workflow_job) + + async def db_get_workflow_job(job_id: str) -> DBWorkflowJob: job = await DBWorkflowJob.find_one(DBWorkflowJob.job_id == job_id) if not job: @@ -202,6 +225,15 @@ async def sync_db_get_processing_jobs(job_ids: List[str]) -> [DBProcessorJob]: return await db_get_processing_jobs(job_ids) +async def db_create_workflow_script(db_workflow_script: DBWorkflowScript) -> DBWorkflowScript: + return await db_workflow_script.insert() + + +@call_sync +async def sync_db_create_workflow_script(db_workflow_script: DBWorkflowScript) -> DBWorkflowScript: + return await db_create_workflow_script(db_workflow_script=db_workflow_script) + + async def db_get_workflow_script(workflow_id: str) -> DBWorkflowScript: workflow = await DBWorkflowScript.find_one(DBWorkflowScript.workflow_id == workflow_id) if not workflow: @@ -221,6 +253,7 @@ async def db_find_first_workflow_script_by_content(content_hash: str) -> DBWorkf return workflow +# TODO: Resolve the inconsistency between the async and sync versions of the same method @call_sync async def sync_db_find_first_workflow_script_by_content(workflow_id: str) -> DBWorkflowScript: return await db_get_workflow_script(workflow_id) diff --git a/src/ocrd_network/models/job.py b/src/ocrd_network/models/job.py index e5230aa5fd..6cb31bfb92 100644 --- a/src/ocrd_network/models/job.py +++ b/src/ocrd_network/models/job.py @@ -20,6 +20,8 @@ class StateEnum(str, Enum): success = 'SUCCESS' # Processing job failed failed = 'FAILED' + # Processing job has not been assigned yet + unset = 'UNSET' class PYJobInput(BaseModel): diff --git a/src/ocrd_network/processing_server.py b/src/ocrd_network/processing_server.py index e6606f90f0..3c09e8a107 100644 --- a/src/ocrd_network/processing_server.py +++ b/src/ocrd_network/processing_server.py @@ -1,3 +1,4 @@ +from datetime import datetime from hashlib import md5 import httpx import json @@ -127,6 +128,14 @@ def __init__(self, config_path: str, host: str, port: int) -> None: else: self.internal_job_callback_url = f'http://{host}:{port}/result_callback' + self.router.add_api_route( + path='/', + endpoint=self.home_page, + methods=['GET'], + status_code=status.HTTP_200_OK, + summary='Get information about the processing server' + ) + # Create routes self.router.add_api_route( path='/stop', @@ -219,6 +228,15 @@ def __init__(self, config_path: str, host: str, port: int) -> None: summary='Get information about a workflow run', ) + self.router.add_api_route( + path='/workflow/job-simple/{workflow_job_id}', + endpoint=self.get_workflow_info_simple, + methods=['GET'], + tags=['workflow', 'processing'], + status_code=status.HTTP_200_OK, + summary='Get simplified overall job status', + ) + self.router.add_api_route( path='/workflow', endpoint=self.upload_workflow, @@ -288,6 +306,14 @@ async def on_shutdown(self) -> None: """ await self.stop_deployed_agents() + async def home_page(self): + message = f"The home page of the {self.title}" + json_message = { + "message": message, + "time": datetime.now().strftime("%Y-%m-%d %H:%M") + } + return json_message + async def stop_deployed_agents(self) -> None: self.deployer.kill_all() @@ -322,6 +348,12 @@ def create_message_queues(self) -> None: unique_only=True ) + # TODO: Reconsider and refactor this. + # Added ocrd-dummy by default if not available for the integration tests. + # A proper Processing Worker / Processor Server registration endpoint is needed on the Processing Server side + if 'ocrd-dummy' not in queue_names: + queue_names.append('ocrd-dummy') + for queue_name in queue_names: # The existence/validity of the worker.name is not tested. # Even if an ocr-d processor does not exist, the queue is created @@ -378,6 +410,12 @@ def processing_agent_exists(self, processor_name: str, agent_type: str) -> bool: if agent_type not in [NETWORK_AGENT_SERVER, NETWORK_AGENT_WORKER]: return False if agent_type == NETWORK_AGENT_WORKER: + # TODO: Reconsider and refactor this. + # Added ocrd-dummy by default if not available for the integration tests. + # A proper Processing Worker / Processor Server registration endpoint + # is needed on the Processing Server side + if processor_name == 'ocrd-dummy': + return True if not self.check_if_queue_exists(processor_name): return False if agent_type == NETWORK_AGENT_SERVER: @@ -434,15 +472,25 @@ async def push_processor_job(self, processor_name: str, data: PYJobInput) -> PYJ validate_job_input(self.log, data.processor_name, ocrd_tool, data) - db_workspace = await db_get_workspace( - workspace_id=data.workspace_id, - workspace_mets_path=data.path_to_mets - ) - if not db_workspace: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail=f"Workspace with id: {data.workspace_id} or path: {data.path_to_mets} not found" - ) + if data.workspace_id: + try: + db_workspace = await db_get_workspace(workspace_id=data.workspace_id) + except ValueError as error: + self.log.exception(error) + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Workspace with id `{data.workspace_id}` not found in the DB." + ) + else: # data.path_to_mets provided instead + try: + # TODO: Reconsider and refactor this. Core cannot create workspaces by api, but processing-server needs + # the workspace in the database. Here the workspace is created if the path is available locally and + # not existing in the DB - since it has not been uploaded through the Workspace Server. + await db_create_workspace(data.path_to_mets) + except FileNotFoundError: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, + detail=f"Mets file not existing: {data.path_to_mets}") + workspace_key = data.path_to_mets if data.path_to_mets else data.workspace_id # initialize the request counter for the workspace_key self.cache_processing_requests.update_request_counter(workspace_key=workspace_key, by_value=0) @@ -882,6 +930,41 @@ async def get_workflow_info(self, workflow_job_id) -> Dict: }) return res + """ + Simplified version of the `get_workflow_info` that returns a single state for the entire workflow. + - If a single processing job fails, the entire workflow job status is set to FAILED. + - If there are any processing jobs running, regardless of other states, such as QUEUED and CACHED, + the entire workflow job status is set to RUNNING. + - If all processing jobs has finished successfully, only then the workflow job status is set to SUCCESS + """ + async def get_workflow_info_simple(self, workflow_job_id) -> Dict[str, StateEnum]: + """ Return list of a workflow's processor jobs + """ + try: + workflow_job = await db_get_workflow_job(workflow_job_id) + except ValueError: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, + detail=f"Workflow-Job with id: {workflow_job_id} not found") + job_ids: List[str] = [job_id for lst in workflow_job.processing_job_ids.values() for job_id in lst] + jobs = await db_get_processing_jobs(job_ids) + + workflow_job_state = StateEnum.unset + success_jobs = 0 + for job in jobs: + if job.state == StateEnum.cached or job.state == StateEnum.queued: + continue + if job.state == StateEnum.failed or job.state == StateEnum.cancelled: + workflow_job_state = StateEnum.failed + break + if job.state == StateEnum.running: + workflow_job_state = StateEnum.running + if job.state == StateEnum.success: + success_jobs += 1 + # if all jobs succeeded + if len(job_ids) == success_jobs: + workflow_job_state = StateEnum.success + return {"state": workflow_job_state} + async def upload_workflow(self, workflow: UploadFile) -> Dict: """ Store a script for a workflow in the database """ diff --git a/src/ocrd_network/utils.py b/src/ocrd_network/utils.py index 8f92706f46..4f66554bc3 100644 --- a/src/ocrd_network/utils.py +++ b/src/ocrd_network/utils.py @@ -3,7 +3,7 @@ from pika import URLParameters from pymongo import uri_parser as mongo_uri_parser from re import match as re_match -from requests import Session as Session_TCP +from requests import get, Session as Session_TCP from requests_unixsocket import Session as Session_UDS from typing import Dict, List from uuid import uuid4 @@ -49,6 +49,18 @@ def generate_id() -> str: return str(uuid4()) +def is_url_responsive(url: str, retries: int = 0) -> bool: + while True: + try: + response = get(url) + if response.status_code == 200: + return True + except Exception: + if retries <= 0: + return False + retries -= 1 + + def validate_and_load_config(config_path: str) -> Dict: # Load and validate the config with open(config_path) as fin: @@ -64,7 +76,7 @@ def verify_database_uri(mongodb_address: str) -> str: # perform validation check mongo_uri_parser.parse_uri(uri=mongodb_address, validate=True) except Exception as error: - raise ValueError(f"The database address '{mongodb_address}' is in wrong format, {error}") + raise ValueError(f"The MongoDB address '{mongodb_address}' is in wrong format, {error}") return mongodb_address diff --git a/src/ocrd_utils/__init__.py b/src/ocrd_utils/__init__.py index 90cd554779..d03c2a920c 100644 --- a/src/ocrd_utils/__init__.py +++ b/src/ocrd_utils/__init__.py @@ -158,7 +158,10 @@ from .introspect import ( freeze_args, set_json_key_value_overrides, - membername + membername, + resource_filename, + resource_string, + dist_version ) from .logging import ( @@ -198,6 +201,7 @@ partition_list, parse_json_string_or_file, parse_json_string_with_comments, + sparkline, remove_non_path_from_url, safe_filename) diff --git a/src/ocrd_utils/constants.py b/src/ocrd_utils/constants.py index 0b9f0ae02b..ff95a9fbbb 100644 --- a/src/ocrd_utils/constants.py +++ b/src/ocrd_utils/constants.py @@ -1,7 +1,7 @@ """ Constants for ocrd_utils. """ -from pkg_resources import get_distribution +from .introspect import dist_version from re import compile as regex_compile __all__ = [ @@ -18,7 +18,7 @@ 'VERSION', ] -VERSION = get_distribution('ocrd').version +VERSION = dist_version('ocrd') MIMETYPE_PAGE = 'application/vnd.prima.page+xml' @@ -110,3 +110,24 @@ RESOURCE_LOCATIONS = ['data', 'cwd', 'system', 'module'] DEFAULT_METS_BASENAME = 'mets.xml' + + +# 2581 ▁ LOWER ONE EIGHTH BLOCK +# 2582 ▂ LOWER ONE QUARTER BLOCK +# 2583 ▃ LOWER THREE EIGHTHS BLOCK +# 2584 ▄ LOWER HALF BLOCK +# 2585 ▅ LOWER FIVE EIGHTHS BLOCK +# 2586 ▆ LOWER THREE QUARTERS BLOCK +# 2587 ▇ LOWER SEVEN EIGHTHS BLOCK +# 2588 █ FULL BLOCK +SPARKLINE_CHARS = [ + ' ', + '\u2581', + '\u2582', + '\u2583', + '\u2584', + '\u2585', + '\u2586', + '\u2587', + '\u2588', +] diff --git a/src/ocrd_utils/introspect.py b/src/ocrd_utils/introspect.py index cfd3d32b52..a86b9b8984 100644 --- a/src/ocrd_utils/introspect.py +++ b/src/ocrd_utils/introspect.py @@ -3,8 +3,25 @@ """ import json from functools import wraps +from pathlib import Path from frozendict import frozendict +import atexit +from contextlib import ExitStack +# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files +import sys +if sys.version_info < (3, 9): + import importlib_resources +else: + import importlib.resources as importlib_resources + +if sys.version_info < (3, 8): + import importlib_metadata +else: + import importlib.metadata as importlib_metadata + +file_manager = ExitStack() +atexit.register(file_manager.close) # Taken from https://github.com/OCR-D/core/pull/884 def freeze_args(func): @@ -32,3 +49,14 @@ def set_json_key_value_overrides(obj, *kvpairs): except json.decoder.JSONDecodeError: obj[k] = v return obj + +def resource_filename(pkg : str, fname : str) -> Path: + ref = importlib_resources.files(pkg) / fname + return file_manager.enter_context(importlib_resources.as_file(ref)) + +def resource_string(pkg : str, fname : str) -> str: + with open(resource_filename(pkg, fname), 'r', encoding='utf-8') as f: + return f.read() + +def dist_version(module : str) -> str: + return importlib_metadata.version(module) diff --git a/src/ocrd_utils/str.py b/src/ocrd_utils/str.py index f5b9242d35..406fcc9754 100644 --- a/src/ocrd_utils/str.py +++ b/src/ocrd_utils/str.py @@ -4,7 +4,8 @@ import re import json -from .constants import REGEX_FILE_ID +from typing import List, Union +from .constants import REGEX_FILE_ID, SPARKLINE_CHARS from .deprecate import deprecation_warning from warnings import warn from numpy import array_split @@ -235,3 +236,18 @@ def partition_list(lst, chunks, chunk_index=None): if chunk_index is not None: return [ret[chunk_index]] return ret + +def sparkline(values : List[int]) -> str: + """ + Render a list of points with block characters + """ + if any(x is None or not isinstance(x, (int, float)) or x < 0 for x in values): + # return an empty string on non-positive-int values, better not to + # output a sparkline than to cancel execution due to problematic input + return '' + max_value = max(values) + max_mapping = len(SPARKLINE_CHARS) - 1 + # normalize to 0..1 and convert to index in SPARKLINE_CHARS + mapped = [int(x / max_value * max_mapping) for x in values] + return ''.join(SPARKLINE_CHARS[x] for x in mapped) + diff --git a/src/ocrd_validators/constants.py b/src/ocrd_validators/constants.py index fc1ff445ae..b8d145bbba 100644 --- a/src/ocrd_validators/constants.py +++ b/src/ocrd_validators/constants.py @@ -2,7 +2,7 @@ Constants for ocrd_validators. """ import yaml -from pkg_resources import resource_string, resource_filename +from ocrd_utils import resource_string, resource_filename __all__ = [ 'PROCESSING_SERVER_CONFIG_SCHEMA', @@ -21,10 +21,10 @@ 'XSD_PATHS', ] -PROCESSING_SERVER_CONFIG_SCHEMA = yaml.safe_load(resource_string(__name__, 'processing_server_config.schema.yml')) -MESSAGE_SCHEMA_PROCESSING = yaml.safe_load(resource_string(__name__, 'message_processing.schema.yml')) -MESSAGE_SCHEMA_RESULT = yaml.safe_load(resource_string(__name__, 'message_result.schema.yml')) -OCRD_TOOL_SCHEMA = yaml.safe_load(resource_string(__name__, 'ocrd_tool.schema.yml')) +PROCESSING_SERVER_CONFIG_SCHEMA = yaml.safe_load(resource_string(__package__, 'processing_server_config.schema.yml')) +MESSAGE_SCHEMA_PROCESSING = yaml.safe_load(resource_string(__package__, 'message_processing.schema.yml')) +MESSAGE_SCHEMA_RESULT = yaml.safe_load(resource_string(__package__, 'message_result.schema.yml')) +OCRD_TOOL_SCHEMA = yaml.safe_load(resource_string(__package__, 'ocrd_tool.schema.yml')) RESOURCE_LIST_SCHEMA = { 'type': 'object', 'additionalProperties': False, @@ -32,7 +32,7 @@ '^ocrd-.*': OCRD_TOOL_SCHEMA['properties']['tools']['patternProperties']['ocrd-.*']['properties']['resources'] } } -OCRD_BAGIT_PROFILE = yaml.safe_load(resource_string(__name__, 'bagit-profile.yml')) +OCRD_BAGIT_PROFILE = yaml.safe_load(resource_string(__package__, 'bagit-profile.yml')) BAGIT_TXT = 'BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8' FILE_GROUP_PREFIX = 'OCR-D-' @@ -42,5 +42,5 @@ XSD_METS_URL = 'https://www.loc.gov/standards/mets/mets.xsd' XSD_PAGE_URL = 'http://www.primaresearch.org/schema/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd' XSD_PATHS = {} -XSD_PATHS[XSD_METS_URL] = resource_filename(__name__, 'mets.xsd') -XSD_PATHS[XSD_PAGE_URL] = resource_filename(__name__, 'page.xsd') +XSD_PATHS[XSD_METS_URL] = resource_filename(__package__, 'mets.xsd') +XSD_PATHS[XSD_PAGE_URL] = resource_filename(__package__, 'page.xsd') diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000..1f35071075 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,15 @@ +from tests.network.config import test_config + +pytest_plugins = [ + "tests.network.fixtures_mongodb", + "tests.network.fixtures_rabbitmq" +] + +test_config.add("DB_NAME", description="...", default=(True, 'ocrd_network_test')) +test_config.add("DB_URL", description="...", default=(True, 'mongodb://network_test:network_test@0.0.0.0:27017')) + +test_config.add('DEFAULT_EXCHANGER_NAME', description="...", default=(True, 'ocrd-network-default')) +test_config.add('DEFAULT_QUEUE', description="...", default=(True, 'ocrd-network-default')) + +test_config.add('PROCESSING_SERVER_URL', description="...", default=(True, "http://0.0.0.0:8000")) +test_config.add('RABBITMQ_URL', description="...", default=(True, "amqp://network_test:network_test@0.0.0.0:5672/")) diff --git a/tests/network/.env b/tests/network/.env new file mode 100644 index 0000000000..7e96308c06 --- /dev/null +++ b/tests/network/.env @@ -0,0 +1,23 @@ +DOCKER_OCRD_NETWORK_NAME=ocrd_network_test +DOCKER_OCRD_NETWORK_MTU=1450 + +OCRD_NETWORK_LOGS_ROOT=/tmp/ocrd_network_logs +OCRD_NETWORK_SOCKETS_ROOT=/tmp/ocrd_network_sockets + +OCRD_PS_HOST=ps-docker-host +OCRD_PS_PORT=8000 +OCRD_PS_URL=http://${OCRD_PS_HOST}.${DOCKER_OCRD_NETWORK_NAME}:${OCRD_PS_PORT} + +MONGODB_NAME=ocrd_network_test +MONGODB_USER=network_test +MONGODB_PASS=network_test +MONGODB_HOST=mongodb-docker-host +MONGODB_PORT=27017 +MONGODB_URL=mongodb://${MONGODB_USER}:${MONGODB_PASS}@${MONGODB_HOST}.${DOCKER_OCRD_NETWORK_NAME}:${MONGODB_PORT} + +RABBITMQ_FEATURE_FLAGS=quorum_queue,implicit_default_bindings,classic_mirrored_queue_version +RABBITMQ_USER=network_test +RABBITMQ_PASS=network_test +RABBITMQ_HOST=rabbitmq-docker-host +RABBITMQ_PORT=5672 +RABBITMQ_URL=amqp://${RABBITMQ_USER}:${RABBITMQ_PASS}@${RABBITMQ_HOST}.${DOCKER_OCRD_NETWORK_NAME}:${RABBITMQ_PORT} diff --git a/tests/network/__init__.py b/tests/network/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/network/config.py b/tests/network/config.py new file mode 100644 index 0000000000..646833aeea --- /dev/null +++ b/tests/network/config.py @@ -0,0 +1,134 @@ +from pathlib import Path +from tempfile import gettempdir +from src.ocrd_utils.config import OcrdEnvConfig +from src.ocrd_utils.config import _ocrd_download_timeout_parser + +test_config = OcrdEnvConfig() + +test_config.add( + name='OCRD_METS_CACHING', + description='If set to `true`, access to the METS file is cached, speeding in-memory search and modification.', + validator=lambda val: val in ('true', 'false', '0', '1'), + parser=lambda val: val in ('true', '1') +) + +test_config.add( + name='OCRD_MAX_PROCESSOR_CACHE', + description=""" + Maximum number of processor instances (for each set of parameters) to be kept in memory (including loaded models) + for processing workers or processor servers. + """, + parser=int, + default=(True, 128) +) + +test_config.add( + name='OCRD_PROFILE', + description=""" + Whether to enable gathering runtime statistics + on the `ocrd.profile` logger (comma-separated): + - `CPU`: yields CPU and wall-time, + - `RSS`: also yields peak memory (resident set size) + - `PSS`: also yields peak memory (proportional set size) + """, + validator=lambda val: all(t in ('', 'CPU', 'RSS', 'PSS') for t in val.split(',')), + default=(True, '') +) + +test_config.add( + name="OCRD_PROFILE_FILE", + description=""" + If set, then the CPU profile is written to this file for later peruse with a analysis tools like snakeviz + """ +) + +test_config.add( + name="OCRD_DOWNLOAD_RETRIES", + description="Number of times to retry failed attempts for downloads of workspace files.", + validator=int, + parser=int, +) + +test_config.add( + name="OCRD_DOWNLOAD_TIMEOUT", + description="Timeout in seconds for connecting or reading (comma-separated) when downloading.", + parser=_ocrd_download_timeout_parser +) + +test_config.add( + name="OCRD_NETWORK_SERVER_ADDR_PROCESSING", + description="Default address of Processing Server to connect to (for `ocrd network client processing`).", + default=(True, '') +) + +test_config.add( + name="OCRD_NETWORK_SERVER_ADDR_WORKFLOW", + description="Default address of Workflow Server to connect to (for `ocrd network client workflow`).", + default=(True, '') +) + +test_config.add( + name="OCRD_NETWORK_SERVER_ADDR_WORKSPACE", + description="Default address of Workspace Server to connect to (for `ocrd network client workspace`).", + default=(True, '') +) + +test_config.add( + name="OCRD_NETWORK_WORKER_QUEUE_CONNECT_ATTEMPTS", + description=""" + Number of attempts for a worker to create its queue. Helpful if the rabbitmq-server needs time to be fully started + """, + parser=int, + default=(True, 1) +) + +test_config.add( + name="OCRD_NETWORK_SOCKETS_ROOT_DIR", + description="The root directory where all mets server related socket files are created", + parser=lambda val: Path(val), + default=(True, Path(gettempdir(), "ocrd_network_sockets")) +) +test_config.OCRD_NETWORK_SOCKETS_ROOT_DIR.mkdir(parents=True, exist_ok=True) +# Remove socket files left from previous integration tests +for path in test_config.OCRD_NETWORK_SOCKETS_ROOT_DIR.iterdir(): + if path.is_socket() and path.suffix == '.sock': + path.unlink() + +test_config.add( + name="OCRD_NETWORK_LOGS_ROOT_DIR", + description="The root directory where all ocrd_network related file logs are stored", + parser=lambda val: Path(val), + default=(True, Path(gettempdir(), "ocrd_network_logs")) +) +test_config.OCRD_NETWORK_LOGS_ROOT_DIR.mkdir(parents=True, exist_ok=True) + +test_config.add( + name="HOME", + description="Directory to look for `ocrd_logging.conf`, fallback for unset XDG variables.", + # description="HOME directory, cf. https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html", + validator=lambda val: Path(val).is_dir(), + parser=lambda val: Path(val), + default=(True, lambda: Path.home()) +) + +test_config.add( + name="XDG_DATA_HOME", + description="Directory to look for `./ocrd/resources.yml` (i.e. `ocrd resmgr` user database)", + parser=lambda val: Path(val), + default=(True, lambda: Path(test_config.HOME, '.local/share')) +) + +test_config.add( + name="XDG_CONFIG_HOME", + description="Directory to look for `./ocrd-resources/*` (i.e. `ocrd resmgr` data location)", + parser=lambda val: Path(val), + default=(True, lambda: Path(test_config.HOME, '.config')) +) + +test_config.add( + name="OCRD_LOGGING_DEBUG", + description="Print information about the logging setup to STDERR", + default=(True, False), + validator=lambda val: isinstance(val, bool) or val in ('true', 'false', '0', '1'), + parser=lambda val: val in ('true', '1') +) diff --git a/tests/network/docker-compose.yml b/tests/network/docker-compose.yml new file mode 100644 index 0000000000..a5cef49e23 --- /dev/null +++ b/tests/network/docker-compose.yml @@ -0,0 +1,120 @@ +networks: + ocrd_network_test: + name: ${DOCKER_OCRD_NETWORK_NAME} + driver: bridge + driver_opts: + com.docker.network.driver.mtu: ${DOCKER_OCRD_NETWORK_MTU} + +services: + + ocrd_network_mongo_db: + image: "mongo" + hostname: ${MONGODB_HOST} + container_name: ocrd_network_mongo_db + networks: + - ${DOCKER_OCRD_NETWORK_NAME} + ports: + - ${MONGODB_PORT}:27017 + environment: + - MONGO_INITDB_ROOT_USERNAME=${MONGODB_USER} + - MONGO_INITDB_ROOT_PASSWORD=${MONGODB_PASS} + healthcheck: + test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet + interval: 1s + timeout: 3s + retries: 30 + + ocrd_network_rabbit_mq: + image: "rabbitmq:3.12-management" + hostname: ${RABBITMQ_HOST} + container_name: ocrd_network_rabbit_mq + networks: + - ${DOCKER_OCRD_NETWORK_NAME} + ports: + - ${RABBITMQ_PORT}:5672 + - 15672:15672 + - 25672:25672 + environment: + - RABBITMQ_DEFAULT_USER=${RABBITMQ_USER} + - RABBITMQ_DEFAULT_PASS=${RABBITMQ_PASS} + - RABBITMQ_FEATURE_FLAGS=${RABBITMQ_FEATURE_FLAGS} + healthcheck: + test: rabbitmq-diagnostics check_port_connectivity + interval: 1s + timeout: 3s + retries: 30 + + ocrd_network_processing_server: + image: "ocrd_core_test" + build: + context: ../../ + dockerfile: Dockerfile + args: + BASE_IMAGE: 'ubuntu:22.04' + target: ocrd_core_test + hostname: ${OCRD_PS_HOST} + container_name: ocrd_network_processing_server + depends_on: + ocrd_network_mongo_db: + condition: service_healthy + ocrd_network_rabbit_mq: + condition: service_healthy + networks: + - ${DOCKER_OCRD_NETWORK_NAME} + ports: + - ${OCRD_PS_PORT}:8000 + environment: + DB_NAME: ${MONGODB_NAME} + DB_URL: ${MONGODB_URL} + RABBITMQ_URL: ${RABBITMQ_URL} + OCRD_NETWORK_LOGS_ROOT_DIR: /ocrd-data/ocrd_network_logs + OCRD_NETWORK_SOCKETS_ROOT_DIR: /ocrd-data/ocrd_network_sockets + healthcheck: + test: curl -f ${OCRD_PS_URL}/ + interval: 1s + timeout: 3s + retries: 30 + volumes: + - ${OCRD_NETWORK_LOGS_ROOT}:/ocrd-data/ocrd_network_logs + - ${OCRD_NETWORK_SOCKETS_ROOT}:/ocrd-data/ocrd_network_sockets + - "./dummy-workflow.txt:/ocrd-data/assets/dummy-workflow.txt" + - "./ocrd_logging.conf:/etc/ocrd_logging.conf" + - "./ps_config.yml:/ocrd-data/ps_config.yml" + command: ocrd network processing-server -a 0.0.0.0:8000 /ocrd-data/ps_config.yml + + ocrd_dummy_processing_worker: + image: "ocrd_core_test" + depends_on: + ocrd_network_processing_server: + condition: service_healthy + networks: + - ${DOCKER_OCRD_NETWORK_NAME} + environment: + OCRD_NETWORK_LOGS_ROOT_DIR: /ocrd-data/ocrd_network_logs + volumes: + - ${OCRD_NETWORK_LOGS_ROOT}:/ocrd-data/ocrd_network_logs + - ${OCRD_NETWORK_SOCKETS_ROOT}:/ocrd-data/ocrd_network_sockets + - "./dummy-workflow.txt:/ocrd-data/assets/dummy-workflow.txt" + - "./ocrd_logging.conf:/etc/ocrd_logging.conf" + command: ocrd-dummy worker --database ${MONGODB_URL} --queue ${RABBITMQ_URL} + + ocrd_network_core_test: + image: "ocrd_core_test" + container_name: core_test + depends_on: + ocrd_network_processing_server: + condition: service_healthy + networks: + - ${DOCKER_OCRD_NETWORK_NAME} + environment: + DB_NAME: ${MONGODB_NAME} + DB_URL: ${MONGODB_URL} + PROCESSING_SERVER_URL: ${OCRD_PS_URL} + RABBITMQ_URL: ${RABBITMQ_URL} + OCRD_NETWORK_LOGS_ROOT_DIR: /ocrd-data/ocrd_network_logs + OCRD_NETWORK_SOCKETS_ROOT_DIR: /ocrd-data/ocrd_network_sockets + volumes: + - ${OCRD_NETWORK_LOGS_ROOT}:/ocrd-data/ocrd_network_logs + - ${OCRD_NETWORK_SOCKETS_ROOT}:/ocrd-data/ocrd_network_sockets + - "./dummy-workflow.txt:/ocrd-data/assets/dummy-workflow.txt" + - "./ocrd_logging.conf:/etc/ocrd_logging.conf" diff --git a/tests/network/dummy-workflow.txt b/tests/network/dummy-workflow.txt new file mode 100644 index 0000000000..8d4e04d651 --- /dev/null +++ b/tests/network/dummy-workflow.txt @@ -0,0 +1,3 @@ +dummy -I OCR-D-IMG -O OCR-D-DUMMY1 +dummy -I OCR-D-DUMMY1 -O OCR-D-DUMMY2 +dummy -I OCR-D-DUMMY2 -O OCR-D-DUMMY3 diff --git a/tests/network/fixtures_mongodb.py b/tests/network/fixtures_mongodb.py new file mode 100644 index 0000000000..1409bafe72 --- /dev/null +++ b/tests/network/fixtures_mongodb.py @@ -0,0 +1,10 @@ +from pytest import fixture +from src.ocrd_network.database import sync_initiate_database +from src.ocrd_network.utils import verify_database_uri +from tests.network.config import test_config + + +@fixture(scope="package", name="mongo_client") +def fixture_mongo_client(): + verify_database_uri(test_config.DB_URL) + sync_initiate_database(test_config.DB_URL, test_config.DB_NAME) diff --git a/tests/network/fixtures_rabbitmq.py b/tests/network/fixtures_rabbitmq.py new file mode 100644 index 0000000000..a3b1300cf2 --- /dev/null +++ b/tests/network/fixtures_rabbitmq.py @@ -0,0 +1,75 @@ +from pika.credentials import PlainCredentials +from pytest import fixture +from src.ocrd_network.rabbitmq_utils import RMQConnector, RMQConsumer, RMQPublisher +from src.ocrd_network.utils import verify_and_parse_mq_uri +from tests.network.config import test_config + + +RABBITMQ_URL = test_config.RABBITMQ_URL +DEFAULT_EXCHANGER_NAME = test_config.DEFAULT_EXCHANGER_NAME +DEFAULT_QUEUE = test_config.DEFAULT_QUEUE + + +@fixture(scope="package", name="rabbitmq_defaults") +def fixture_rabbitmq_defaults(): + rmq_data = verify_and_parse_mq_uri(RABBITMQ_URL) + rmq_username = rmq_data["username"] + rmq_password = rmq_data["password"] + rmq_host = rmq_data["host"] + rmq_port = rmq_data["port"] + rmq_vhost = rmq_data["vhost"] + + test_connection = RMQConnector.open_blocking_connection( + credentials=PlainCredentials(rmq_username, rmq_password), + host=rmq_host, + port=rmq_port, + vhost=rmq_vhost + ) + test_channel = RMQConnector.open_blocking_channel(test_connection) + assert test_channel + RMQConnector.exchange_declare( + channel=test_channel, + exchange_name=DEFAULT_EXCHANGER_NAME, + exchange_type='direct', + durable=False + ) + RMQConnector.queue_declare(channel=test_channel, queue_name=DEFAULT_QUEUE, durable=False) + RMQConnector.queue_bind( + channel=test_channel, + exchange_name=DEFAULT_EXCHANGER_NAME, + queue_name=DEFAULT_QUEUE, + routing_key=DEFAULT_QUEUE + ) + # Clean all messages inside if any from previous tests + RMQConnector.queue_purge(channel=test_channel, queue_name=DEFAULT_QUEUE) + + +@fixture(scope="package", name="rabbitmq_publisher") +def fixture_rabbitmq_publisher(rabbitmq_defaults): + rmq_data = verify_and_parse_mq_uri(RABBITMQ_URL) + rmq_publisher = RMQPublisher( + host=rmq_data["host"], + port=rmq_data["port"], + vhost=rmq_data["vhost"] + ) + rmq_publisher.authenticate_and_connect( + username=rmq_data["username"], + password=rmq_data["password"] + ) + rmq_publisher.enable_delivery_confirmations() + yield rmq_publisher + + +@fixture(scope="package", name="rabbitmq_consumer") +def fixture_rabbitmq_consumer(rabbitmq_defaults): + rmq_data = verify_and_parse_mq_uri(RABBITMQ_URL) + rmq_consumer = RMQConsumer( + host=rmq_data["host"], + port=rmq_data["port"], + vhost=rmq_data["vhost"] + ) + rmq_consumer.authenticate_and_connect( + username=rmq_data["username"], + password=rmq_data["password"] + ) + yield rmq_consumer diff --git a/tests/network/ocrd_logging.conf b/tests/network/ocrd_logging.conf new file mode 100644 index 0000000000..ee081b7b56 --- /dev/null +++ b/tests/network/ocrd_logging.conf @@ -0,0 +1,150 @@ +# This is a template configuration file which allows customizing +# format and destination of log messages with OCR-D. +# It is meant as an example, and should be customized. +# To get into effect, you must put a copy (under the same name) +# into your CWD, HOME or /etc. These directories are searched +# in said order, and the first find wins. When no config file +# is found, the default logging configuration applies (cf. ocrd.logging.py). +# +# mandatory loggers section +# configure loggers with corresponding keys "root", "" +# each logger requires a corresponding configuration section below +# +[loggers] +keys=root,ocrd,ocrd_network,ocrd_tensorflow,ocrd_shapely_geos,ocrd_PIL,uvicorn,uvicorn_access,uvicorn_error,multipart + +# +# mandatory handlers section +# handle output for each logging "channel" +# i.e. console, file, smtp, syslog, http, ... +# each handler requires a corresponding configuration section below +# +[handlers] +keys=consoleHandler,fileHandler,processingServerHandler + +# +# optional custom formatters section +# format message fields, to be used differently by logging handlers +# each formatter requires a corresponding formatter section below +# +[formatters] +keys=defaultFormatter,detailedFormatter + +# +# default logger "root" using consoleHandler +# +[logger_root] +level=DEBUG +handlers=consoleHandler,fileHandler + + +# +# additional logger configurations can be added +# as separate configuration sections like below +# +# example logger "ocrd_workspace" uses fileHandler and overrides +# default log level "INFO" with custom level "DEBUG" +# "qualname" must match the logger label used in the corresponding +# ocrd module +# see in the module-of-interest (moi) +# +#[logger_ocrd_workspace] +#level=DEBUG +#handlers=fileHandler +#qualname=ocrd.workspace + +# ocrd loggers +[logger_ocrd] +level=DEBUG +handlers=consoleHandler,fileHandler +qualname=ocrd +propagate=0 + +[logger_ocrd_network] +level=DEBUG +handlers=consoleHandler,processingServerHandler +qualname=ocrd_network +propagate=0 + +# +# logger tensorflow +# +[logger_ocrd_tensorflow] +level=DEBUG +handlers=consoleHandler +qualname=tensorflow + +# +# logger shapely.geos +# +[logger_ocrd_shapely_geos] +level=DEBUG +handlers=consoleHandler +qualname=shapely.geos + + +# +# logger PIL +# +[logger_ocrd_PIL] +level=DEBUG +handlers=consoleHandler +qualname=PIL + +# +# uvicorn loggers +# +[logger_uvicorn] +level=INFO +handlers=consoleHandler +qualname=uvicorn +[logger_uvicorn_access] +level=DEBUG +handlers=consoleHandler +qualname=uvicorn.access +[logger_uvicorn_error] +level=DEBUG +handlers=consoleHandler +qualname=uvicorn.error +[logger_multipart] +level=DEBUG +handlers=consoleHandler +qualname=multipart + + + +# +# handle stderr output +# +[handler_consoleHandler] +class=StreamHandler +formatter=defaultFormatter +args=(sys.stderr,) + +# +# example logfile handler +# handle output with logfile +# +[handler_fileHandler] +class=FileHandler +formatter=defaultFormatter +args=('ocrd.log','a+') + +[handler_processingServerHandler] +class=FileHandler +formatter=defaultFormatter +args=('/ocrd-data/ocrd_processing_server_conf.log','a+') + +# +# default log format conforming to OCR-D (https://ocr-d.de/en/spec/cli#logging) +# +[formatter_defaultFormatter] +format=%(asctime)s.%(msecs)03d %(levelname)s %(name)s - %(message)s +datefmt=%H:%M:%S + +# +# store more logging context information +# +[formatter_detailedFormatter] +format=%(asctime)s.%(msecs)03d %(levelname)-8s (%(name)s)[%(filename)s:%(lineno)d] - %(message)s +datefmt=%H:%M:%S diff --git a/tests/network/ps_config.yml b/tests/network/ps_config.yml new file mode 100644 index 0000000000..655a847b8f --- /dev/null +++ b/tests/network/ps_config.yml @@ -0,0 +1,17 @@ +# the content of this config file is based on the .env +internal_callback_url: http://ps-docker-host.ocrd_network_test:8000 +process_queue: + address: rabbitmq-docker-host.ocrd_network_test + port: 5672 + skip_deployment: true + credentials: + username: network_test + password: network_test +database: + address: mongodb-docker-host.ocrd_network_test + port: 27017 + skip_deployment: true + credentials: + username: network_test + password: network_test +hosts: [] diff --git a/tests/network/test_db.py b/tests/network/test_db.py new file mode 100644 index 0000000000..6a69822883 --- /dev/null +++ b/tests/network/test_db.py @@ -0,0 +1,166 @@ +from datetime import datetime +from hashlib import md5 +from pathlib import Path +from pytest import raises +from tests.base import assets +from src.ocrd_network.models import DBProcessorJob, DBWorkflowScript, StateEnum +from src.ocrd_network.database import ( + sync_db_create_processing_job, + sync_db_get_processing_job, + sync_db_update_processing_job, + sync_db_create_workspace, + sync_db_get_workspace, + sync_db_update_workspace, + sync_db_create_workflow_script, + sync_db_get_workflow_script, + sync_db_find_first_workflow_script_by_content +) + + +def test_db_processing_job_create(mongo_client): + job_id = f'test_job_id_{datetime.now()}' + db_created_processing_job = sync_db_create_processing_job( + db_processing_job=DBProcessorJob( + job_id=job_id, + processor_name='ocrd-dummy', + state=StateEnum.cached, + path_to_mets='/ocrd/dummy/path', + input_file_grps=['DEFAULT'], + output_file_grps=['OCR-D-DUMMY'] + ) + ) + assert db_created_processing_job + db_found_processing_job = sync_db_get_processing_job(job_id=job_id) + assert db_found_processing_job + assert db_found_processing_job.job_id == job_id + assert db_found_processing_job.processor_name == 'ocrd-dummy' + assert db_found_processing_job.state == StateEnum.cached + assert db_found_processing_job.path_to_mets == '/ocrd/dummy/path' + assert db_found_processing_job.input_file_grps == ['DEFAULT'] + assert db_found_processing_job.output_file_grps == ['OCR-D-DUMMY'] + + with raises(ValueError): + sync_db_get_processing_job(job_id='non-existing-id') + + +def test_db_processing_job_update(mongo_client): + job_id = f'test_job_id_{datetime.now()}' + db_created_processing_job = sync_db_create_processing_job( + db_processing_job=DBProcessorJob( + job_id=job_id, + processor_name='ocrd-dummy', + state=StateEnum.cached, + path_to_mets='/ocrd/dummy/path', + input_file_grps=['DEFAULT'], + output_file_grps=['OCR-D-DUMMY'] + ) + ) + assert db_created_processing_job + db_found_processing_job = sync_db_get_processing_job(job_id=job_id) + assert db_found_processing_job + db_updated_processing_job = sync_db_update_processing_job(job_id=job_id, state=StateEnum.running) + assert db_found_processing_job != db_updated_processing_job + db_found_updated_processing_job = sync_db_get_processing_job(job_id=job_id) + assert db_found_updated_processing_job + assert db_found_updated_processing_job == db_updated_processing_job + assert db_found_updated_processing_job.state == StateEnum.running + + with raises(ValueError): + sync_db_update_processing_job(job_id='non-existing', state=StateEnum.running) + sync_db_update_processing_job(job_id=job_id, non_existing_field='dummy_value') + sync_db_update_processing_job(job_id=job_id, processor_name='non-updatable-field') + + +def test_db_workspace_create(mongo_client): + mets_path = assets.path_to('kant_aufklaerung_1784/data/mets.xml') + db_created_workspace = sync_db_create_workspace(mets_path=mets_path) + assert db_created_workspace + assert db_created_workspace.workspace_mets_path == mets_path + db_found_workspace = sync_db_get_workspace(workspace_mets_path=mets_path) + assert db_found_workspace + assert db_found_workspace == db_created_workspace + + with raises(ValueError): + sync_db_get_workspace(workspace_id='non-existing-id') + sync_db_get_workspace(workspace_mets_path='non-existing-mets') + + with raises(FileNotFoundError): + sync_db_create_workspace(mets_path='non-existing-mets') + + +def test_db_workspace_update(mongo_client): + mets_path = assets.path_to('kant_aufklaerung_1784-binarized/data/mets.xml') + dummy_mets_server_url = '/tmp/dummy.sock' + + db_created_workspace = sync_db_create_workspace(mets_path=mets_path) + assert db_created_workspace + db_found_workspace = sync_db_get_workspace(workspace_mets_path=mets_path) + assert db_found_workspace + assert db_created_workspace == db_found_workspace + + db_updated_workspace = sync_db_update_workspace( + workspace_mets_path=mets_path, + mets_server_url=dummy_mets_server_url + ) + assert db_updated_workspace + assert db_updated_workspace != db_created_workspace + + db_found_updated_workspace = sync_db_get_workspace(workspace_mets_path=mets_path) + assert db_found_updated_workspace + assert db_found_updated_workspace.workspace_mets_path == mets_path + assert db_found_updated_workspace.mets_server_url == dummy_mets_server_url + assert db_found_updated_workspace == db_updated_workspace + + +# TODO: There is no db wrapper implemented due to direct access in the processing server... +# TODO2: Should be refactored with proper asset access +def create_db_model_workflow_script( + workflow_id: str, + script_path: Path = Path(Path(__file__).parent, "dummy-workflow.txt") +) -> DBWorkflowScript: + workflow_id = workflow_id + with open(script_path, 'rb') as fp: + content = (fp.read()).decode("utf-8") + content_hash = md5(content.encode("utf-8")).hexdigest() + return DBWorkflowScript(workflow_id=workflow_id, content=content, content_hash=content_hash) + + +def test_db_workflow_script_create(mongo_client): + workflow_id = f'test_workflow_{datetime.now()}' + db_model_workflow_script = create_db_model_workflow_script(workflow_id=workflow_id) + db_created_workflow_script = sync_db_create_workflow_script( + db_workflow_script=db_model_workflow_script + ) + assert db_created_workflow_script + db_found_workflow_script = sync_db_get_workflow_script(workflow_id=workflow_id) + assert db_found_workflow_script + assert db_found_workflow_script == db_created_workflow_script + + with raises(ValueError): + sync_db_get_workflow_script(workflow_id='non-existing-id') + + +def test_db_find_workflow_script_by_content(mongo_client): + workflow_id = f'test_workflow_{datetime.now()}' + db_model_workflow_script = create_db_model_workflow_script(workflow_id=workflow_id) + db_created_workflow_script = sync_db_create_workflow_script( + db_workflow_script=db_model_workflow_script + ) + assert db_created_workflow_script + db_found_workflow_script = sync_db_find_first_workflow_script_by_content( + workflow_id=db_model_workflow_script.workflow_id + ) + assert db_found_workflow_script + assert db_found_workflow_script == db_created_workflow_script + + +# TODO: hard to implement without some refactoring in the ocrd_network +# and providing proper db wrappers +def test_db_workflow_job_create(): + pass + + +# TODO: hard to implement without some refactoring in the ocrd_network +# and providing proper db wrappers +def test_db_workflow_job_update(): + pass diff --git a/tests/network/test_processing_server.py b/tests/network/test_processing_server.py new file mode 100644 index 0000000000..6d039f5bc9 --- /dev/null +++ b/tests/network/test_processing_server.py @@ -0,0 +1,97 @@ +from time import sleep +from requests import get, post +from src.ocrd_network import NETWORK_AGENT_WORKER +from src.ocrd_network.models import StateEnum +from tests.base import assets +from tests.network.config import test_config + +PROCESSING_SERVER_URL = test_config.PROCESSING_SERVER_URL + + +def poll_till_timeout_fail_or_success(test_url: str, tries: int, wait: int) -> StateEnum: + job_state = StateEnum.unset + while tries > 0: + sleep(wait) + response = get(url=test_url) + assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" + job_state = response.json()["state"] + if job_state == StateEnum.success or job_state == StateEnum.failed: + break + tries -= 1 + return job_state + + +def test_processing_server_connectivity(): + test_url = f'{PROCESSING_SERVER_URL}/' + response = get(test_url) + assert response.status_code == 200, \ + f'Processing server is not reachable on: {test_url}, {response.status_code}' + message = response.json()['message'] + assert message.startswith('The home page of'), \ + f'Processing server home page message is corrupted' + + +# TODO: The processing workers are still not registered when deployed separately. +# Fix that by extending the processing server. +def test_processing_server_deployed_processors(): + test_url = f'{PROCESSING_SERVER_URL}/processor' + response = get(test_url) + processors = response.json() + assert response.status_code == 200, \ + f'Processing server: {test_url}, {response.status_code}' + assert processors == [], f'Mismatch in deployed processors' + + +def test_processing_server_processing_request(): + path_to_mets = assets.path_to('kant_aufklaerung_1784/data/mets.xml') + test_processing_job_input = { + "path_to_mets": path_to_mets, + "input_file_grps": ['OCR-D-IMG'], + "output_file_grps": ['OCR-D-DUMMY'], + "agent_type": NETWORK_AGENT_WORKER, + "parameters": {} + } + test_processor = 'ocrd-dummy' + test_url = f'{PROCESSING_SERVER_URL}/processor/run/{test_processor}' + response = post( + url=test_url, + headers={"accept": "application/json"}, + json=test_processing_job_input + ) + # print(response.json()) + assert response.status_code == 200, \ + f'Processing server: {test_url}, {response.status_code}' + processing_job_id = response.json()["job_id"] + assert processing_job_id + + job_state = poll_till_timeout_fail_or_success( + test_url=f"{PROCESSING_SERVER_URL}/processor/job/{processing_job_id}", + tries=10, + wait=10 + ) + assert job_state == StateEnum.success + + +def test_processing_server_workflow_request(): + # Note: the used workflow path is volume mapped + path_to_dummy_wf = "/ocrd-data/assets/dummy-workflow.txt" + path_to_mets = assets.path_to('kant_aufklaerung_1784/data/mets.xml') + + # submit the workflow job + test_url = f"{PROCESSING_SERVER_URL}/workflow/run?mets_path={path_to_mets}&page_wise=True" + response = post( + url=test_url, + headers={"accept": "application/json"}, + files={"workflow": open(path_to_dummy_wf, 'rb')} + ) + # print(response.json()) + assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" + wf_job_id = response.json()["job_id"] + assert wf_job_id + + job_state = poll_till_timeout_fail_or_success( + test_url=f"{PROCESSING_SERVER_URL}/workflow/job-simple/{wf_job_id}", + tries=30, + wait=10 + ) + assert job_state == StateEnum.success diff --git a/tests/network/test_rabbitmq.py b/tests/network/test_rabbitmq.py new file mode 100644 index 0000000000..951266e5dc --- /dev/null +++ b/tests/network/test_rabbitmq.py @@ -0,0 +1,79 @@ +from pika import BasicProperties +from pickle import dumps, loads +from tests.network.config import test_config + +DEFAULT_EXCHANGER_NAME = test_config.DEFAULT_EXCHANGER_NAME +DEFAULT_QUEUE = test_config.DEFAULT_QUEUE + + +def test_rmq_publish_then_consume_2_messages(rabbitmq_publisher, rabbitmq_consumer): + test_headers = {"Test Header": "Test Value"} + test_properties = BasicProperties( + app_id='webapi-processing-broker', + content_type='application/json', + headers=test_headers + ) + rabbitmq_publisher.publish_to_queue( + queue_name=DEFAULT_QUEUE, + message="RabbitMQ test 123", + exchange_name=DEFAULT_EXCHANGER_NAME, + properties=test_properties + ) + rabbitmq_publisher.publish_to_queue( + queue_name=DEFAULT_QUEUE, + message="RabbitMQ test 456", + exchange_name=DEFAULT_EXCHANGER_NAME, + properties=test_properties + ) + assert rabbitmq_publisher.message_counter == 2 + + # Consume the 1st message + method_frame, header_frame, message = rabbitmq_consumer.get_one_message( + queue_name=DEFAULT_QUEUE, + auto_ack=True + ) + assert method_frame.delivery_tag == 1 # 1st delivered message to this queue + assert method_frame.message_count == 1 # messages left in the queue + assert method_frame.redelivered is False + assert method_frame.exchange == DEFAULT_EXCHANGER_NAME + assert method_frame.routing_key == DEFAULT_QUEUE + # It's possible to assert header_frame the same way + assert message.decode() == "RabbitMQ test 123" + + # Consume the 2nd message + method_frame, header_frame, message = rabbitmq_consumer.get_one_message( + queue_name=DEFAULT_QUEUE, + auto_ack=True + ) + assert method_frame.delivery_tag == 2 # 2nd delivered message to this queue + assert method_frame.message_count == 0 # messages left in the queue + assert method_frame.redelivered is False + assert method_frame.exchange == DEFAULT_EXCHANGER_NAME + assert method_frame.routing_key == DEFAULT_QUEUE + # It's possible to assert header_frame the same way + assert message.decode() == "RabbitMQ test 456" + + +def test_rmq_publish_then_consume_ocrd_message(rabbitmq_publisher, rabbitmq_consumer): + ocrd_processing_message = { + "job_id": "Test_job_id", + "workflow_id": "Test_workflow_id", + "workspace_id": "Test_workspace_id" + } + message_bytes = dumps(ocrd_processing_message) + rabbitmq_publisher.publish_to_queue( + queue_name=DEFAULT_QUEUE, + message=message_bytes, + exchange_name=DEFAULT_EXCHANGER_NAME, + properties=None + ) + + method_frame, header_frame, message = rabbitmq_consumer.get_one_message( + queue_name=DEFAULT_QUEUE, + auto_ack=True + ) + assert method_frame.message_count == 0 # messages left in the queue + decoded_message = loads(message) + assert decoded_message["job_id"] == "Test_job_id" + assert decoded_message["workflow_id"] == "Test_workflow_id" + assert decoded_message["workspace_id"] == "Test_workspace_id" diff --git a/tests/test_resource_manager.py b/tests/test_resource_manager.py index fb6db3adc4..221b0a3af1 100644 --- a/tests/test_resource_manager.py +++ b/tests/test_resource_manager.py @@ -21,7 +21,8 @@ def test_resources_manager_config_default(monkeypatch, tmp_path): # arrange monkeypatch.setenv('HOME', str(tmp_path)) - monkeypatch.delenv('XDG_CONFIG_HOME', raising=False) + if 'XDG_CONFIG_HOME' in os.environ: + monkeypatch.delenv('XDG_CONFIG_HOME', raising=False) # act mgr = OcrdResourceManager() @@ -48,9 +49,9 @@ def test_resources_manager_config_default(monkeypatch, tmp_path): def test_resources_manager_from_environment(tmp_path, monkeypatch): # arrange - monkeypatch.setenv('XDG_CONFIG_HOME', tmp_path) - monkeypatch.setenv('XDG_DATA_HOME', tmp_path) - monkeypatch.setenv('HOME', tmp_path) + monkeypatch.setenv('XDG_CONFIG_HOME', str(tmp_path)) + monkeypatch.setenv('XDG_DATA_HOME', str(tmp_path)) + monkeypatch.setenv('HOME', str(tmp_path)) # act mgr = OcrdResourceManager() diff --git a/tests/test_utils.py b/tests/test_utils.py index d2093c465d..8fe3fd3733 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -21,6 +21,7 @@ is_string, membername, generate_range, + sparkline, nth_url_segment, remove_non_path_from_url, @@ -325,6 +326,12 @@ def test_partition_list(): assert partition_list(lst_13, chunks=4) == [[1, 2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 13]] assert partition_list(lst_13, chunks=4, chunk_index=1) == [[5, 6, 7]] +def test_sparkline(): + assert sparkline([5, 2, 3]) == '█▃▄' + assert sparkline([1000, 1, 2222]) == '▃ █' + assert sparkline([8, 7, 6, 5, 4, 3, 2, 1, 0]) == '█▇▆▅▄▃▂▁ ' + assert sparkline([-1, None, 'forty-two']) == '' + if __name__ == '__main__': main(__file__) diff --git a/tests/validator/test_xsd_validator.py b/tests/validator/test_xsd_validator.py index 4d4deadc15..d0150338dd 100644 --- a/tests/validator/test_xsd_validator.py +++ b/tests/validator/test_xsd_validator.py @@ -25,7 +25,7 @@ def test_constructor(self): def test_mets_empty(self): with TemporaryDirectory() as tempdir: mets_path = Path(tempdir, 'mets.xml') - mets_path.write_bytes(METS_XML_EMPTY) + mets_path.write_text(METS_XML_EMPTY) report = XsdMetsValidator.validate(mets_path) self.assertEqual(len(report.errors), 2) self.assertEqual(report.errors[0],