diff --git a/.circleci/config.yml b/.circleci/config.yml index 047780b810..858d03901a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,7 +1,7 @@ -version: 2 +version: 2.1 orbs: - python: circleci/python@2.0.3 + python: circleci/python@2.1.1 jobs: @@ -16,9 +16,10 @@ jobs: PIP_INSTALL: pip3 install --user steps: - checkout - - run: HOMEBREW_NO_AUTO_UPDATE=1 brew install imagemagick geos bash + - run: HOMEBREW_NO_AUTO_UPDATE=1 brew install imagemagick geos bash opencv + - run: $PIP_INSTALL -U pip setuptools - run: make install - - run: PATH="/Users/distiller/Library/Python/3.9/bin:$PATH" make deps-test test benchmark + - run: export PATH="/Users/distiller/Library/Python/3.9/bin:$PATH"; make deps-test test benchmark test-python37: docker: @@ -121,4 +122,4 @@ workflows: - test-python39 - test-python310 - test-python311 - - test-macos + # - test-macos diff --git a/.dockerignore b/.dockerignore index aefc504c60..9da5b99503 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,6 +1,12 @@ * -!ocrd* +!src* +!pyproject.toml +!requirements.txt !Makefile +!VERSION !LICENSE !README.md !.git +!tests +!requirements_test.txt +!.gitmodules diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 25999a199e..85a602a1b2 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -28,6 +28,10 @@ jobs: - name: Build the Docker image with GPU support # default tag uses docker.io, so override on command-line run: make docker-cuda DOCKER_TAG=${{ env.DOCKER_TAG }}-cuda DOCKER_BASE_IMAGE=${{ env.DOCKER_TAG }} + - name: Smoke Test that ocrd --help works + run: | + docker run --rm ${{ env.DOCKER_TAG }} ocrd --version + docker run --rm ${{ env.DOCKER_TAG }}-cuda ocrd --version - name: Login to GitHub Container Registry uses: docker/login-action@v2 with: diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml new file mode 100644 index 0000000000..ab34859c55 --- /dev/null +++ b/.github/workflows/integration-test.yml @@ -0,0 +1,58 @@ +name: Run ocrd network integration tests + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build: + + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + python-version: + - '3.7' + - '3.8' + - '3.9' + - '3.10' + - '3.11' + os: + - ubuntu-22.04 + # - macos-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Homebrew + id: set-up-homebrew + uses: Homebrew/actions/setup-homebrew@master + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + if [[ "${{ matrix.os }}" == "ubuntu"* ]];then + sudo apt-get -y update + sudo make deps-ubuntu + else + HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 \ + HOMEBREW_NO_AUTO_UPDATE=1 \ + brew install imagemagick geos bash # opencv + fi + make install deps-test + - name: Install Docker on macOS + if: runner.os == 'macos' + run: | + brew install docker docker-compose + colima start + - name: Test network integration with pytest + run: | + if [[ "${{ matrix.os }}" == "macos"* ]];then + make integration-test DOCKER_COMPOSE=docker-compose + else + make integration-test + fi diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml new file mode 100644 index 0000000000..cfe282cd54 --- /dev/null +++ b/.github/workflows/unit-test.yml @@ -0,0 +1,63 @@ +name: Test core installation and run tests + +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build: + + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + python-version: + - '3.7' + - '3.8' + - '3.9' + - '3.10' + - '3.11' + os: + - ubuntu-22.04 + - ubuntu-20.04 + - macos-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Homebrew + id: set-up-homebrew + uses: Homebrew/actions/setup-homebrew@master + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + if [[ "${{ matrix.os }}" == "ubuntu"* ]];then + sudo apt-get -y update + sudo make deps-ubuntu + else + HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 \ + HOMEBREW_NO_AUTO_UPDATE=1 \ + brew install imagemagick geos bash # opencv + fi + make install deps-test + - name: Test with pytest + run: | + make test benchmark + - name: test to ensure that --editable install works + run: | + make install-dev; ocrd --version + - name: Lint with flake8 + run: | + python -m pip install flake8 + # stop the build if there are Python syntax errors or undefined names + flake8 src --count --exit-zero --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 src --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics diff --git a/.gitignore b/.gitignore index 21aa725bf8..aff9382067 100644 --- a/.gitignore +++ b/.gitignore @@ -109,7 +109,6 @@ env2/ ocrd.egg-info .pytest_cache -/src /profile .~lock* .pynative @@ -125,5 +124,5 @@ tests/assets/test.ocrd.zip /foo sanders* ws1 -*.doctree +*.doctrees .vscode diff --git a/CHANGELOG.md b/CHANGELOG.md index d9c7119f3f..0cfabc9623 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,46 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Added: + + * Basic integration test for `ocrd_network`, #1164 + * `ocrd-tool.json` for `ocrd-dummy` now in repo root, for ocrd_all's `make ocrd-all-tool.json`, #1168 + +Fixed: + + * METS Server: UDS sockets are removed on process exit, #117 + +Changed: + + * replace license-incompatible sparkline library with a simpler implementation, #1176 + * remove all pkg_resources calls with modern alternatives, no more run-time setuptools dependency, #1174 + +## [2.61.2] - 2024-01-24 + +Fixed: + + * another regression to docker deployment (requirements.txt missing), #1173 + +## [2.61.1] - 2024-01-23 + +Fixed: + + * deps-cuda: add workaround for keras-team/tf-keras#62, #1169 + * fix regression docker deployment, #1172 + + +## [2.61.0] - 2024-01-23 + +Changed: + + * :fire: simplify the project layout and distribution policy, #1166 + * In the future there will be only one distribution `ocrd` + * The previous separate distributions of the `ocrd_utils`, `ocrd_models`, `ocrd_modelfactory`, `ocrd_validators` and `ocrd_network` are all part of `ocrd` now + * Nothing needs to be changed in code using OCR-D/core, the package structure and API is the same as before + * Until the next major release, we will continue to provide distributions for `ocrd_utils` etc. that contain the same code as `ocrd` + * Using `ocrd_utils` etc. as distributions in `requirements.txt` or `install_requires` is now deprecated + * Once we release v3.0.0, these distributions will be depublished + ## [2.60.3] - 2024-01-10 Fixed: @@ -1957,6 +1997,9 @@ Fixed Initial Release +[2.61.2]: ../../compare/v2.61.2..v2.61.1 +[2.61.1]: ../../compare/v2.61.1..v2.61.1 +[2.61.0]: ../../compare/v2.61.0..v2.60.3 [2.60.3]: ../../compare/v2.60.3..v2.60.2 [2.60.2]: ../../compare/v2.60.2..v2.60.1 [2.60.1]: ../../compare/v2.60.1..v2.60.0 diff --git a/Dockerfile b/Dockerfile index 00bd8bb30a..95130fdd4a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ ARG BASE_IMAGE -FROM $BASE_IMAGE +FROM $BASE_IMAGE as ocrd_core_base ARG FIXUP=echo MAINTAINER OCR-D ENV DEBIAN_FRONTEND noninteractive @@ -9,17 +9,17 @@ ENV LANG=C.UTF-8 ENV PIP=pip WORKDIR /build-ocrd -COPY ocrd ./ocrd -COPY ocrd_modelfactory ./ocrd_modelfactory/ -COPY ocrd_models ./ocrd_models -COPY ocrd_utils ./ocrd_utils -RUN mv ./ocrd_utils/ocrd_logging.conf /etc -COPY ocrd_validators/ ./ocrd_validators -COPY ocrd_network/ ./ocrd_network + +COPY src ./src +COPY pyproject.toml . +COPY VERSION ./VERSION +COPY requirements.txt ./requirements.txt +RUN mv ./src/ocrd_utils/ocrd_logging.conf /etc COPY Makefile . COPY README.md . COPY LICENSE . COPY .git ./.git + RUN echo 'APT::Install-Recommends "0"; APT::Install-Suggests "0";' >/etc/apt/apt.conf.d/ocr-d.conf RUN apt-get update && apt-get -y install software-properties-common \ && apt-get update && apt-get -y install \ @@ -33,14 +33,25 @@ RUN apt-get update && apt-get -y install software-properties-common \ curl \ sudo \ git \ - && make deps-ubuntu \ - && python3 -m venv /usr/local \ + && make deps-ubuntu +RUN python3 -m venv /usr/local \ && hash -r \ - && pip install --upgrade pip setuptools wheel \ && make install \ - && eval $FIXUP \ - && rm -rf /build-ocrd + && eval $FIXUP WORKDIR /data CMD ["/usr/local/bin/ocrd", "--help"] + +FROM ocrd_core_base as ocrd_core_test +WORKDIR /build-ocrd +COPY Makefile . +RUN make assets +COPY tests ./tests +COPY .gitmodules . +COPY requirements_test.txt . +RUN pip install -r requirements_test.txt +RUN mkdir /ocrd-data && chmod 777 /ocrd-data + +CMD ["yes"] +# CMD ["make", "test", "integration-test"] diff --git a/Dockerfile.cuda b/Dockerfile.cuda index 52d7a27619..89db9603cf 100644 --- a/Dockerfile.cuda +++ b/Dockerfile.cuda @@ -1,5 +1,5 @@ ARG BASE_IMAGE -FROM $BASE_IMAGE +FROM $BASE_IMAGE AS ocrd_core_base ENV MAMBA_EXE=/usr/local/bin/conda ENV MAMBA_ROOT_PREFIX=/conda diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000..f002905188 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include README*.md +include requirements.txt +include VERSION diff --git a/Makefile b/Makefile index f2adfdca59..5df74e18ea 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,9 @@ LOG_LEVEL = INFO PYTHONIOENCODING=utf8 TESTDIR = $(CURDIR)/tests PYTEST_ARGS = --continue-on-collection-errors +VERSION = $(shell cat VERSION) + +DOCKER_COMPOSE = docker compose SPHINX_APIDOC = @@ -61,6 +64,8 @@ deps-cuda: mv bin/micromamba $(CONDA_EXE) # Install Conda system-wide (for interactive / login shells) echo 'export MAMBA_EXE=$(CONDA_EXE) MAMBA_ROOT_PREFIX=$(CONDA_PREFIX) CONDA_PREFIX=$(CONDA_PREFIX) PATH=$(CONDA_PREFIX)/bin:$$PATH' >> /etc/profile.d/98-conda.sh +# workaround for tf-keras#62 + echo 'export XLA_FLAGS=--xla_gpu_cuda_data_dir=$(CONDA_PREFIX)/' >> /etc/profile.d/98-conda.sh mkdir -p $(CONDA_PREFIX)/lib $(CONDA_PREFIX)/include echo $(CONDA_PREFIX)/lib >> /etc/ld.so.conf.d/conda.conf # Get CUDA toolkit, including compiler and libraries with dev, @@ -119,14 +124,15 @@ deps-test: .PHONY: build install install-dev uninstall build: - $(PIP) install build setuptools_scm - $(foreach MODULE,$(BUILD_ORDER),$(PYTHON) -m build ./$(MODULE) &&) echo done + $(PIP) install build + $(PYTHON) -m build . # or use -n ? # (Re)install the tool install: #build -# $(PIP_INSTALL) $(BUILD_ORDER:%=./%/dist/ocrd*`$(PYTHON) -m setuptools_scm 2>/dev/null`*.whl) - $(foreach MODULE,$(BUILD_ORDER),$(PIP_INSTALL) ./$(MODULE) $(PIP_INSTALL_CONFIG_OPTION) &&) echo done + # not stricttly necessary but a precaution against outdated python build tools, https://github.com/OCR-D/core/pull/1166 + $(PIP) install -U pip wheel + $(PIP_INSTALL) . $(PIP_INSTALL_CONFIG_OPTION) @# workaround for shapely#1598 $(PIP) config set global.no-binary shapely @@ -138,11 +144,11 @@ install-dev: uninstall # Uninstall the tool uninstall: - $(PIP) uninstall -y $(call reverse,$(BUILD_ORDER)) + $(PIP) uninstall --yes ocrd # Regenerate python code from PAGE XSD -generate-page: GDS_PAGE = ocrd_models/ocrd_models/ocrd_page_generateds.py -generate-page: GDS_PAGE_USER = ocrd_models/ocrd_page_user_methods.py +generate-page: GDS_PAGE = src/ocrd_models/ocrd_page_generateds.py +generate-page: GDS_PAGE_USER = src/ocrd_page_user_methods.py generate-page: repo/assets generateDS \ -f \ @@ -152,7 +158,7 @@ generate-page: repo/assets --export "write etree" \ --disable-generatedssuper-lookup \ --user-methods=$(GDS_PAGE_USER) \ - ocrd_validators/ocrd_validators/page.xsd + src/ocrd_validators/page.xsd # hack to prevent #451: enum keys will be strings sed -i 's/(Enum):$$/(str, Enum):/' $(GDS_PAGE) # hack to ensure output has pc: prefix @@ -209,9 +215,16 @@ test: assets $(PYTHON) \ -m pytest $(PYTEST_ARGS) --durations=10\ --ignore-glob="$(TESTDIR)/**/*bench*.py" \ + --ignore-glob="$(TESTDIR)/network/*.py" \ $(TESTDIR) cd ocrd_utils ; $(PYTHON) -m pytest --continue-on-collection-errors -k TestLogging -k TestDecorators $(TESTDIR) +INTEGRATION_TEST_IN_DOCKER = docker exec core_test +integration-test: + $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml up -d + -$(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_rmq or test_db or test_processing_server' -v + $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans + benchmark: $(PYTHON) -m pytest $(TESTDIR)/model/test_ocrd_mets_bench.py @@ -236,8 +249,8 @@ coverage: assets # Build documentation docs: for mod in $(BUILD_ORDER);do sphinx-apidoc -f -M -e \ - -o docs/api/$$mod $$mod/$$mod \ - 'ocrd_models/ocrd_models/ocrd_page_generateds.py' \ + -o docs/api/$$mod src/$$mod \ + 'src/ocrd_models/ocrd_page_generateds.py' \ ;done cd docs ; $(MAKE) html @@ -263,8 +276,13 @@ gh-pages: # pyclean: + rm -rf ./build + rm -rf ./dist + rm -rf htmlcov + rm -rf .benchmarks + rm -rf **/*.egg-info rm -f **/*.pyc - find . -name '__pycache__' -exec rm -rf '{}' \; + -find . -name '__pycache__' -exec rm -rf '{}' \; rm -rf .pytest_cache # @@ -288,8 +306,42 @@ docker-cuda: DOCKER_FILE = Dockerfile.cuda docker-cuda: docker docker docker-cuda: - docker build --progress=plain -f $(DOCKER_FILE) -t $(DOCKER_TAG) --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) $(DOCKER_ARGS) . + docker build --progress=plain -f $(DOCKER_FILE) -t $(DOCKER_TAG) --target ocrd_core_base --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) $(DOCKER_ARGS) . # Build wheels and source dist and twine upload them pypi: build - twine upload ocrd*/dist/ocrd*`$(PYTHON) -m setuptools_scm 2>/dev/null`*{tar.gz,whl} + twine upload dist/ocrd-$(VERSION)*{tar.gz,whl} + +pypi-workaround: build-workaround + for dist in $(BUILD_ORDER);do twine upload dist/$$dist-$(VERSION)*{tar.gz,whl};done + +# Only in place until v3 so we don't break existing installations +build-workaround: pyclean + cp pyproject.toml pyproject.toml.BAK + cp src/ocrd_utils/constants.py src/ocrd_utils/constants.py.BAK + cp src/ocrd/cli/__init__.py src/ocrd/cli/__init__.py.BAK + for dist in $(BUILD_ORDER);do \ + cat pyproject.toml.BAK | sed "s,^name =.*,name = \"$$dist\"," > pyproject.toml; \ + cat src/ocrd_utils/constants.py.BAK | sed "s,get_distribution('ocrd'),get_distribution('$$dist')," > src/ocrd_utils/constants.py; \ + cat src/ocrd/cli/__init__.py.BAK | sed "s,package_name='ocrd',package_name='$$dist'," > src/ocrd/cli/__init__.py; \ + $(MAKE) build; \ + done + rm pyproject.toml.BAK + rm src/ocrd_utils/constants.py.BAK + rm src/ocrd/cli/__init__.py.BAK + +# test that the aliased packages work in isolation and combined +test-workaround: build-workaround + for dist in $(BUILD_ORDER);do pip uninstall --yes $$dist;done + for dist in $(BUILD_ORDER);do \ + pip install dist/$$dist-*.whl ;\ + ocrd --version ;\ + make test ;\ + pip uninstall --yes $$dist ;\ + done + for dist in $(BUILD_ORDER);do \ + pip install dist/$$dist-*.whl ;\ + done + ocrd --version ;\ + make test ;\ + for dist in $(BUILD_ORDER);do pip uninstall --yes $$dist;done diff --git a/README.md b/README.md index 23fa8ab6b2..f2ad546316 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![image](https://img.shields.io/pypi/v/ocrd.svg)](https://pypi.org/project/ocrd/) [![image](https://circleci.com/gh/OCR-D/core.svg?style=svg)](https://circleci.com/gh/OCR-D/core) [![Docker Image CI](https://github.com/OCR-D/core/actions/workflows/docker-image.yml/badge.svg)](https://github.com/OCR-D/core/actions/workflows/docker-image.yml) +[![Unit Test CI](https://github.com/OCR-D/core/actions/workflows/unit-test.yml/badge.svg)](https://github.com/OCR-D/core/actions/workflows/unit-test.yml) [![image](https://scrutinizer-ci.com/g/OCR-D/core/badges/build.png?b=master)](https://scrutinizer-ci.com/g/OCR-D/core) [![image](https://codecov.io/gh/OCR-D/core/branch/master/graph/badge.svg)](https://codecov.io/gh/OCR-D/core) [![image](https://scrutinizer-ci.com/g/OCR-D/core/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/OCR-D/core) @@ -27,13 +28,6 @@ * [ocrd_network](#ocrd_network) * [ocrd](#ocrd) * [bash library](#bash-library) - * [bashlib API](#bashlib-api) - * [`ocrd__raise`](#ocrd__raise) - * [`ocrd__log`](#ocrd__log) - * [`ocrd__minversion`](#ocrd__minversion) - * [`ocrd__dumpjson`](#ocrd__dumpjson) - * [`ocrd__usage`](#ocrd__usage) - * [`ocrd__parse_argv`](#ocrd__parse_argv) * [Testing](#testing) * [See Also](#see-also) @@ -121,31 +115,31 @@ Some parts of the software are configured via environement variables: Contains utilities and constants, e.g. for logging, path normalization, coordinate calculation etc. -See [README for `ocrd_utils`](./ocrd_utils/README.md) for further information. +See [README for `ocrd_utils`](./README_ocrd_utils.md) for further information. ### ocrd_models Contains file format wrappers for PAGE-XML, METS, EXIF metadata etc. -See [README for `ocrd_models`](./ocrd_models/README.md) for further information. +See [README for `ocrd_models`](./README_ocrd_models.md) for further information. ### ocrd_modelfactory Code to instantiate [models](#ocrd-models) from existing data. -See [README for `ocrd_modelfactory`](./ocrd_modelfactory/README.md) for further information. +See [README for `ocrd_modelfactory`](./README_ocrd_modelfactory.md) for further information. ### ocrd_validators Schemas and routines for validating BagIt, `ocrd-tool.json`, workspaces, METS, page, CLI parameters etc. -See [README for `ocrd_validators`](./ocrd_validators/README.md) for further information. +See [README for `ocrd_validators`](./README_ocrd_validators.md) for further information. ### ocrd_network Components related to OCR-D Web API -See [README for `ocrd_network`](./ocrd_network/README.md) for further information. +See [README for `ocrd_network`](./README_ocrd_network.md) for further information. ### ocrd @@ -153,166 +147,13 @@ Depends on all of the above, also contains decorators and classes for creating O Also contains the command line tool `ocrd`. -See [README for `ocrd`](./ocrd/README.md) for further information. +See [README for `ocrd`](./README_ocrd.md) for further information. ## bash library Builds a bash script that can be sourced by other bash scripts to create OCRD-compliant CLI. -For example: - - source `ocrd bashlib filename` - declare -A NAMESPACES MIMETYPES - eval NAMESPACES=( `ocrd bashlib constants NAMESPACES` ) - echo ${NAMESPACES[page]} - eval MIMETYPE_PAGE=( `ocrd bashlib constants MIMETYPE_PAGE` ) - echo $MIMETYPE_PAGE - eval MIMETYPES=( `ocrd bashlib constants EXT_TO_MIME` ) - echo ${MIMETYPES[.jpg]} - - -### bashlib CLI - -See [CLI usage](https://ocr-d.de/core/api/ocrd/ocrd.cli.bashlib.html) - -### bashlib API - -### `ocrd__raise` - -Raise an error and exit. - -### `ocrd__log` - -Delegate logging to [`ocrd log`](#ocrd-cli) - -### `ocrd__minversion` - -Ensure minimum version - -### `ocrd__dumpjson` - -Output ocrd-tool.json content verbatim. - -Requires `$OCRD_TOOL_JSON` and `$OCRD_TOOL_NAME` to be set: - -```sh -export OCRD_TOOL_JSON=/path/to/ocrd-tool.json -export OCRD_TOOL_NAME=ocrd-foo-bar -``` - -(Which you automatically get from [`ocrd__wrap`](#ocrd__wrap).) - -### `ocrd__show_resource` - -Output given resource file's content. - -### `ocrd__list_resources` - -Output all resource files' names. - -### `ocrd__usage` - -Print help on CLI usage. - -### `ocrd__parse_argv` - -Parses arguments according to [OCR-D CLI](https://ocr-d.de/en/spec/cli). -In doing so, depending on the values passed to it, may delegate to … -- [`ocrd__raise`](#ocrd__raise) and exit (if something went wrong) -- [`ocrd__usage`](#ocrd__usage) and exit -- [`ocrd__dumpjson`](#ocrd__dumpjson) and exit -- [`ocrd__show_resource`](#ocrd__show_resource) and exit -- [`ocrd__list_resources`](#ocrd__list_resources) and exit -- [`ocrd validate tasks`](#ocrd-cli) and return - -Expects an associative array ("hash"/"dict") **`ocrd__argv`** to be predefined: - - declare -A ocrd__argv=() - -This will be filled by the parser along the following keys: -- `overwrite`: whether `--overwrite` is enabled -- `profile`: whether `--profile` is enabled -- `profile_file`: the argument of `--profile-file` -- `log_level`: the argument of `--log-level` -- `mets_file`: absolute path of the `--mets` argument -- `working_dir`: absolute path of the `--working-dir` argument or the parent of `mets_file` -- `page_id`: the argument of `--page-id` -- `input_file_grp`: the argument of `--input-file-grp` -- `output_file_grp`: the argument of `--output-file-grp` - -Moreover, there will be an associative array **`params`** -with the fully expanded runtime values of the ocrd-tool.json parameters. - -### `ocrd__wrap` - -Parses an [ocrd-tool.json](https://ocr-d.de/en/spec/ocrd_tool) for a specific `tool` (i.e. processor `executable`). - -Delegates to … -- [`ocrd__parse_argv`](#ocrd__parse_argv), creating the `ocrd__argv` associative array -- [`ocrd bashlib input-files`](#ocrd-cli), creating the data structures used by [`ocrd__input_file`](#ocrd__input_file) - -Usage: `ocrd__wrap PATH/TO/OCRD-TOOL.JSON EXECUTABLE ARGS` - -For example: - - ocrd__wrap $SHAREDIR/ocrd-tool.json ocrd-olena-binarize "$@" - ... - -### `ocrd__input_file` - -(Requires [`ocrd__wrap`](#ocrd__wrap) to have been run first.) - -Access information on the input files according to the parsed CLI arguments: -- their file `url` (or local file path) -- their file `ID` -- their `mimetype` -- their `pageId` -- their proposed corresponding `outputFileId` (generated from `${ocrd__argv[output__file_grp]}` and input file `ID`) - -Usage: `ocrd__input_file NR KEY` - -For example: - - pageId=`ocrd__input_file 3 pageId` - -To be used in a **loop over all selected pages**: - - for ((n=0; n<${#ocrd__files[*]}; n++)); do - local in_fpath=($(ocrd__input_file $n url)) - local in_id=($(ocrd__input_file $n ID)) - local in_mimetype=($(ocrd__input_file $n mimetype)) - local in_pageId=($(ocrd__input_file $n pageId)) - local out_id=$(ocrd__input_file $n outputFileId) - local out_fpath="${ocrd__argv[output_file_grp]}/${out_id}.xml - - # process $in_fpath to $out_fpath ... - - declare -a options - if [ -n "$in_pageId" ]; then - options=( -g $in_pageId ) - else - options=() - fi - if [[ "${ocrd__argv[overwrite]}" == true ]];then - options+=( --force ) - fi - options+=( -G ${ocrd__argv[output_file_grp]} - -m $MIMETYPE_PAGE -i "$out_id" - "$out_fpath" ) - ocrd -l ${ocrd__argv[log_level]} workspace -d ${ocrd__argv[working_dir]} add "${options[@]}" - -> **Note**: If the `--input-file-grp` is **multi-valued** (N fileGrps separated by commas), -> then usage is similar: -> * The function `ocrd__input_file` can be used, but -> its results will be **lists** (delimited by whitespace and surrounded by single quotes), -> e.g. `[url]='file1.xml file2.xml' [ID]='id_file1 id_file2' [mimetype]='application/vnd.prima.page+xml image/tiff' ...`. -> * Therefore its results should be encapsulated in a (non-associative) **array variable** -> and without extra quotes, e.g. `in_file=($(ocrd__input_file 3 url))`, or as shown above. -> * This will yield the first fileGrp's results on index 0, -> which in bash will always be the same as if you referenced the array without index -> (so code does not need to be changed much), e.g. `test -f $in_file` which equals `test -f ${in_file[0]}`. -> * Additional fileGrps will have to be fetched from higher indexes, e.g. `test -f ${in_file[1]}`. - +See [README for `bashlib`](./README_bashlib.md) for further information. ## Testing diff --git a/README_bashlib.md b/README_bashlib.md new file mode 100644 index 0000000000..09199468cc --- /dev/null +++ b/README_bashlib.md @@ -0,0 +1,169 @@ +# bashlib + +> Builds a bash script that can be sourced by other bash scripts to create OCRD-compliant CLI. + +For example: + + source `ocrd bashlib filename` + declare -A NAMESPACES MIMETYPES + eval NAMESPACES=( `ocrd bashlib constants NAMESPACES` ) + echo ${NAMESPACES[page]} + eval MIMETYPE_PAGE=( `ocrd bashlib constants MIMETYPE_PAGE` ) + echo $MIMETYPE_PAGE + eval MIMETYPES=( `ocrd bashlib constants EXT_TO_MIME` ) + echo ${MIMETYPES[.jpg]} + + + +* [bashlib CLI](#bashlib-cli) +* [bashlib API](#bashlib-api) +* [`ocrd__raise`](#ocrd__raise) +* [`ocrd__log`](#ocrd__log) +* [`ocrd__minversion`](#ocrd__minversion) +* [`ocrd__dumpjson`](#ocrd__dumpjson) +* [`ocrd__usage`](#ocrd__usage) +* [`ocrd__parse_argv`](#ocrd__parse_argv) + + +## bashlib CLI + +See [CLI usage](https://ocr-d.de/core/api/ocrd/ocrd.cli.bashlib.html) + +## bashlib API + +### `ocrd__raise` + +Raise an error and exit. + +### `ocrd__log` + +Delegate logging to [`ocrd log`](#ocrd-cli) + +### `ocrd__minversion` + +Ensure minimum version + +### `ocrd__dumpjson` + +Output ocrd-tool.json content verbatim. + +Requires `$OCRD_TOOL_JSON` and `$OCRD_TOOL_NAME` to be set: + +```sh +export OCRD_TOOL_JSON=/path/to/ocrd-tool.json +export OCRD_TOOL_NAME=ocrd-foo-bar +``` + +(Which you automatically get from [`ocrd__wrap`](#ocrd__wrap).) + +### `ocrd__show_resource` + +Output given resource file's content. + +### `ocrd__list_resources` + +Output all resource files' names. + +### `ocrd__usage` + +Print help on CLI usage. + +### `ocrd__parse_argv` + +Parses arguments according to [OCR-D CLI](https://ocr-d.de/en/spec/cli). +In doing so, depending on the values passed to it, may delegate to … +- [`ocrd__raise`](#ocrd__raise) and exit (if something went wrong) +- [`ocrd__usage`](#ocrd__usage) and exit +- [`ocrd__dumpjson`](#ocrd__dumpjson) and exit +- [`ocrd__show_resource`](#ocrd__show_resource) and exit +- [`ocrd__list_resources`](#ocrd__list_resources) and exit +- [`ocrd validate tasks`](#ocrd-cli) and return + +Expects an associative array ("hash"/"dict") **`ocrd__argv`** to be predefined: + + declare -A ocrd__argv=() + +This will be filled by the parser along the following keys: +- `overwrite`: whether `--overwrite` is enabled +- `profile`: whether `--profile` is enabled +- `profile_file`: the argument of `--profile-file` +- `log_level`: the argument of `--log-level` +- `mets_file`: absolute path of the `--mets` argument +- `working_dir`: absolute path of the `--working-dir` argument or the parent of `mets_file` +- `page_id`: the argument of `--page-id` +- `input_file_grp`: the argument of `--input-file-grp` +- `output_file_grp`: the argument of `--output-file-grp` + +Moreover, there will be an associative array **`params`** +with the fully expanded runtime values of the ocrd-tool.json parameters. + +### `ocrd__wrap` + +Parses an [ocrd-tool.json](https://ocr-d.de/en/spec/ocrd_tool) for a specific `tool` (i.e. processor `executable`). + +Delegates to … +- [`ocrd__parse_argv`](#ocrd__parse_argv), creating the `ocrd__argv` associative array +- [`ocrd bashlib input-files`](#ocrd-cli), creating the data structures used by [`ocrd__input_file`](#ocrd__input_file) + +Usage: `ocrd__wrap PATH/TO/OCRD-TOOL.JSON EXECUTABLE ARGS` + +For example: + + ocrd__wrap $SHAREDIR/ocrd-tool.json ocrd-olena-binarize "$@" + ... + +### `ocrd__input_file` + +(Requires [`ocrd__wrap`](#ocrd__wrap) to have been run first.) + +Access information on the input files according to the parsed CLI arguments: +- their file `url` (or local file path) +- their file `ID` +- their `mimetype` +- their `pageId` +- their proposed corresponding `outputFileId` (generated from `${ocrd__argv[output__file_grp]}` and input file `ID`) + +Usage: `ocrd__input_file NR KEY` + +For example: + + pageId=`ocrd__input_file 3 pageId` + +To be used in a **loop over all selected pages**: + + for ((n=0; n<${#ocrd__files[*]}; n++)); do + local in_fpath=($(ocrd__input_file $n url)) + local in_id=($(ocrd__input_file $n ID)) + local in_mimetype=($(ocrd__input_file $n mimetype)) + local in_pageId=($(ocrd__input_file $n pageId)) + local out_id=$(ocrd__input_file $n outputFileId) + local out_fpath="${ocrd__argv[output_file_grp]}/${out_id}.xml + + # process $in_fpath to $out_fpath ... + + declare -a options + if [ -n "$in_pageId" ]; then + options=( -g $in_pageId ) + else + options=() + fi + if [[ "${ocrd__argv[overwrite]}" == true ]];then + options+=( --force ) + fi + options+=( -G ${ocrd__argv[output_file_grp]} + -m $MIMETYPE_PAGE -i "$out_id" + "$out_fpath" ) + ocrd -l ${ocrd__argv[log_level]} workspace -d ${ocrd__argv[working_dir]} add "${options[@]}" + +> **Note**: If the `--input-file-grp` is **multi-valued** (N fileGrps separated by commas), +> then usage is similar: +> * The function `ocrd__input_file` can be used, but +> its results will be **lists** (delimited by whitespace and surrounded by single quotes), +> e.g. `[url]='file1.xml file2.xml' [ID]='id_file1 id_file2' [mimetype]='application/vnd.prima.page+xml image/tiff' ...`. +> * Therefore its results should be encapsulated in a (non-associative) **array variable** +> and without extra quotes, e.g. `in_file=($(ocrd__input_file 3 url))`, or as shown above. +> * This will yield the first fileGrp's results on index 0, +> which in bash will always be the same as if you referenced the array without index +> (so code does not need to be changed much), e.g. `test -f $in_file` which equals `test -f ${in_file[0]}`. +> * Additional fileGrps will have to be fetched from higher indexes, e.g. `test -f ${in_file[1]}`. + diff --git a/ocrd/README.md b/README_ocrd.md similarity index 100% rename from ocrd/README.md rename to README_ocrd.md diff --git a/ocrd_modelfactory/README.md b/README_ocrd_modelfactory.md similarity index 100% rename from ocrd_modelfactory/README.md rename to README_ocrd_modelfactory.md diff --git a/ocrd_models/README.md b/README_ocrd_models.md similarity index 100% rename from ocrd_models/README.md rename to README_ocrd_models.md diff --git a/ocrd_network/README.md b/README_ocrd_network.md similarity index 100% rename from ocrd_network/README.md rename to README_ocrd_network.md diff --git a/ocrd_utils/README.md b/README_ocrd_utils.md similarity index 100% rename from ocrd_utils/README.md rename to README_ocrd_utils.md diff --git a/ocrd_validators/README.md b/README_ocrd_validators.md similarity index 100% rename from ocrd_validators/README.md rename to README_ocrd_validators.md diff --git a/VERSION b/VERSION new file mode 100644 index 0000000000..fb82db5291 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +2.61.2 diff --git a/docs/api/ocrd_network/ocrd_network.constants.rst b/docs/api/ocrd_network/ocrd_network.constants.rst new file mode 100644 index 0000000000..041c77f22d --- /dev/null +++ b/docs/api/ocrd_network/ocrd_network.constants.rst @@ -0,0 +1,7 @@ +ocrd\_network.constants module +============================== + +.. automodule:: ocrd_network.constants + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/ocrd_network/ocrd_network.models.rst b/docs/api/ocrd_network/ocrd_network.models.rst index dba609ccb5..27bb84ad38 100644 --- a/docs/api/ocrd_network/ocrd_network.models.rst +++ b/docs/api/ocrd_network/ocrd_network.models.rst @@ -15,4 +15,5 @@ Submodules ocrd_network.models.job ocrd_network.models.messages ocrd_network.models.ocrd_tool + ocrd_network.models.workflow ocrd_network.models.workspace diff --git a/docs/api/ocrd_network/ocrd_network.models.workflow.rst b/docs/api/ocrd_network/ocrd_network.models.workflow.rst new file mode 100644 index 0000000000..f9046cd8e7 --- /dev/null +++ b/docs/api/ocrd_network/ocrd_network.models.workflow.rst @@ -0,0 +1,7 @@ +ocrd\_network.models.workflow module +==================================== + +.. automodule:: ocrd_network.models.workflow + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/ocrd_network/ocrd_network.rst b/docs/api/ocrd_network/ocrd_network.rst index bcecb065c8..ae12ae1f5d 100644 --- a/docs/api/ocrd_network/ocrd_network.rst +++ b/docs/api/ocrd_network/ocrd_network.rst @@ -23,6 +23,7 @@ Submodules :maxdepth: 4 ocrd_network.client + ocrd_network.constants ocrd_network.database ocrd_network.deployer ocrd_network.deployment_utils diff --git a/docs/conf.py b/docs/conf.py index cd2ef233f1..3ab2e1826f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,23 +12,21 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -import os -import sys -sys.path.insert(0, os.path.abspath('..')) -import json -import re -with open(os.path.join(os.path.abspath('..'), 'ocrd_utils', 'setup.py'), 'r', encoding='utf-8') as f: - VERSION = re.findall(r'\bversion=\'([^\']*)\'', f.read())[0] +# import os +# import sys +# # sys.path.insert(0, os.path.abspath('..')) +with open('VERSION', encoding='utf-8') as f: + VERSION = f.read() # -- Project information ----------------------------------------------------- project = u'ocrd' -copyright = u'2018-2021, OCR-D' +copyright = u'2018-2024, OCR-D' author = u'OCR-D' # The short X.Y version -version = VERSION +version = VERSION.rsplit('.', 1)[0] # The full version, including alpha/beta/rc tags release = VERSION diff --git a/ocrd-tool.json b/ocrd-tool.json new file mode 120000 index 0000000000..9ba9c7b311 --- /dev/null +++ b/ocrd-tool.json @@ -0,0 +1 @@ +./ocrd/ocrd/processor/builtin/dummy/ocrd-tool.json \ No newline at end of file diff --git a/ocrd/MANIFEST.in b/ocrd/MANIFEST.in deleted file mode 100644 index af6a7f6bad..0000000000 --- a/ocrd/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -recursive-include ocrd *.json *.yml *.yaml *.bash *.xml -include requirements.txt -include README.md diff --git a/ocrd/ocrd/.doctrees/environment.pickle b/ocrd/ocrd/.doctrees/environment.pickle deleted file mode 100644 index d941ba88ab..0000000000 Binary files a/ocrd/ocrd/.doctrees/environment.pickle and /dev/null differ diff --git a/ocrd/ocrd/processor/builtin/dummy/ocrd-tool.json b/ocrd/ocrd/processor/builtin/dummy/ocrd-tool.json deleted file mode 100644 index 3ad29b5d77..0000000000 --- a/ocrd/ocrd/processor/builtin/dummy/ocrd-tool.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "executable": "ocrd-dummy", - "description": "Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group", - "steps": ["preprocessing/optimization"], - "categories": ["Image preprocessing"], - "input_file_grp": "DUMMY_INPUT", - "output_file_grp": "DUMMY_OUTPUT", - "parameters": { - "copy_files": { - "type": "boolean", - "default": false, - "description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)" - } - } -} diff --git a/ocrd/requirements.txt b/ocrd/requirements.txt deleted file mode 100644 index 9c057532a2..0000000000 --- a/ocrd/requirements.txt +++ /dev/null @@ -1,23 +0,0 @@ -bagit >= 1.7.0 -bagit_profile >= 1.3.0 -click >=7 -requests < 2.30 -lxml -opencv-python-headless -Flask -jsonschema -pyyaml -Deprecated == 1.2.0 -memory-profiler >= 0.58.0 -sparklines >= 0.4.2 -python-magic -uvicorn -fastapi -python-multipart -requests_unixsocket -gdown -ocrd_utils -ocrd_models -ocrd_modelfactory -ocrd_validators -ocrd_network diff --git a/ocrd/setup.py b/ocrd/setup.py deleted file mode 100644 index 929849c9f5..0000000000 --- a/ocrd/setup.py +++ /dev/null @@ -1,16 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup, Command -from setuptools.command.build import build as orig_build - -class build(orig_build): - def finalize_options(self): - vers = ' == ' + self.distribution.metadata.version - self.distribution.install_requires = [ - req + vers if req.startswith('ocrd') and '==' not in req else req - for req in self.distribution.install_requires - ] - orig_build.finalize_options(self) - -setup( - cmdclass={"build": build} -) diff --git a/ocrd_modelfactory/MANIFEST.in b/ocrd_modelfactory/MANIFEST.in deleted file mode 100644 index 4fae91089a..0000000000 --- a/ocrd_modelfactory/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include requirements.txt -include README.md diff --git a/ocrd_modelfactory/ocrd_modelfactory/.doctrees/environment.pickle b/ocrd_modelfactory/ocrd_modelfactory/.doctrees/environment.pickle deleted file mode 100644 index b7405edc62..0000000000 Binary files a/ocrd_modelfactory/ocrd_modelfactory/.doctrees/environment.pickle and /dev/null differ diff --git a/ocrd_modelfactory/pyproject.toml b/ocrd_modelfactory/pyproject.toml deleted file mode 100644 index 34798daf29..0000000000 --- a/ocrd_modelfactory/pyproject.toml +++ /dev/null @@ -1,40 +0,0 @@ -[build-system] -requires = [ - "setuptools>=61", - "setuptools_scm[toml]", - "wheel", -] # PEP 508 specifications. -build-backend = "setuptools.build_meta" - -[project] -name = "ocrd_modelfactory" -authors = [{name = "Konstantin Baierer", email = "unixprog@gmail.com"}] -license = {text = "Apache License 2.0"} -description = "OCR-D framework - wrappers to create ocrd_model instances" -requires-python = ">=3.7" -dynamic = ["version", "dependencies"] - -[project.readme] -file = "README.md" -content-type = "text/markdown" - -[tool.setuptools.dynamic.dependencies] -file = ["requirements.txt"] - -[project.urls] -Homepage = "https://ocr-d.de" -Documentation = "https://ocr-d.de/core" -Repository = "https://github.com/OCR-D/core" -Issues = "https://github.com/OCR-D/core/issues" - -[tool.setuptools] -include-package-data = true - -[tool.setuptools.package-data] -"*" = ["*.json"] # *.yml; *.xml - -[tool.setuptools.packages.find] -namespaces = false - -[tool.setuptools_scm] -root = ".." diff --git a/ocrd_modelfactory/requirements.txt b/ocrd_modelfactory/requirements.txt deleted file mode 100644 index c577acff93..0000000000 --- a/ocrd_modelfactory/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -lxml -ocrd_utils -ocrd_models diff --git a/ocrd_modelfactory/setup.py b/ocrd_modelfactory/setup.py deleted file mode 100644 index 929849c9f5..0000000000 --- a/ocrd_modelfactory/setup.py +++ /dev/null @@ -1,16 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup, Command -from setuptools.command.build import build as orig_build - -class build(orig_build): - def finalize_options(self): - vers = ' == ' + self.distribution.metadata.version - self.distribution.install_requires = [ - req + vers if req.startswith('ocrd') and '==' not in req else req - for req in self.distribution.install_requires - ] - orig_build.finalize_options(self) - -setup( - cmdclass={"build": build} -) diff --git a/ocrd_models/MANIFEST.in b/ocrd_models/MANIFEST.in deleted file mode 100644 index 4fae91089a..0000000000 --- a/ocrd_models/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include requirements.txt -include README.md diff --git a/ocrd_models/ocrd_models/.doctrees/environment.pickle b/ocrd_models/ocrd_models/.doctrees/environment.pickle deleted file mode 100644 index 4ff69b98d5..0000000000 Binary files a/ocrd_models/ocrd_models/.doctrees/environment.pickle and /dev/null differ diff --git a/ocrd_models/pyproject.toml b/ocrd_models/pyproject.toml deleted file mode 100644 index 5f8e3ebb1a..0000000000 --- a/ocrd_models/pyproject.toml +++ /dev/null @@ -1,40 +0,0 @@ -[build-system] -requires = [ - "setuptools>=61", - "setuptools_scm[toml]", - "wheel", -] # PEP 508 specifications. -build-backend = "setuptools.build_meta" - -[project] -name = "ocrd_models" -authors = [{name = "Konstantin Baierer", email = "unixprog@gmail.com"}] -license = {text = "Apache License 2.0"} -description = "OCR-D framework - file format APIs and schemas" -requires-python = ">=3.7" -dynamic = ["version", "dependencies"] - -[project.readme] -file = "README.md" -content-type = "text/markdown" - -[tool.setuptools.dynamic.dependencies] -file = ["requirements.txt"] - -[project.urls] -Homepage = "https://ocr-d.de" -Documentation = "https://ocr-d.de/core" -Repository = "https://github.com/OCR-D/core" -Issues = "https://github.com/OCR-D/core/issues" - -[tool.setuptools] -include-package-data = true - -[tool.setuptools.package-data] -"*" = ["*.json"] # *.yml; *.xml - -[tool.setuptools.packages.find] -namespaces = false - -[tool.setuptools_scm] -root = ".." diff --git a/ocrd_models/requirements.txt b/ocrd_models/requirements.txt deleted file mode 100644 index 299120a695..0000000000 --- a/ocrd_models/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -lxml -ocrd_utils diff --git a/ocrd_models/setup.py b/ocrd_models/setup.py deleted file mode 100644 index 929849c9f5..0000000000 --- a/ocrd_models/setup.py +++ /dev/null @@ -1,16 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup, Command -from setuptools.command.build import build as orig_build - -class build(orig_build): - def finalize_options(self): - vers = ' == ' + self.distribution.metadata.version - self.distribution.install_requires = [ - req + vers if req.startswith('ocrd') and '==' not in req else req - for req in self.distribution.install_requires - ] - orig_build.finalize_options(self) - -setup( - cmdclass={"build": build} -) diff --git a/ocrd_network/MANIFEST.in b/ocrd_network/MANIFEST.in deleted file mode 100644 index 4fae91089a..0000000000 --- a/ocrd_network/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include requirements.txt -include README.md diff --git a/ocrd_network/pyproject.toml b/ocrd_network/pyproject.toml deleted file mode 100644 index 778038ab8d..0000000000 --- a/ocrd_network/pyproject.toml +++ /dev/null @@ -1,37 +0,0 @@ -[build-system] -requires = [ - "setuptools>=61", - "setuptools_scm[toml]", - "wheel", -] # PEP 508 specifications. -build-backend = "setuptools.build_meta" - -[project] -name = "ocrd_network" -authors = [{name = "Mehmed Mustafa", email = "unixprog@gmail.com"}, {name = "Jonas Schrewe"}, {name = "Triet Doan"}] -license = {text = "Apache License 2.0"} -description = "OCR-D framework - network" -requires-python = ">=3.7" -dynamic = ["version", "dependencies"] - -[project.readme] -file = "README.md" -content-type = "text/markdown" - -[tool.setuptools.dynamic.dependencies] -file = ["requirements.txt"] - -[project.urls] -Homepage = "https://ocr-d.de" -Documentation = "https://ocr-d.de/core" -Repository = "https://github.com/OCR-D/core" -Issues = "https://github.com/OCR-D/core/issues" - -[tool.setuptools] -include-package-data = false - -[tool.setuptools.packages.find] -namespaces = false - -[tool.setuptools_scm] -root = ".." diff --git a/ocrd_network/requirements.txt b/ocrd_network/requirements.txt deleted file mode 100644 index 60b66e5383..0000000000 --- a/ocrd_network/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -uvicorn>=0.17.6 -fastapi>=0.78.0 -pydantic==1.* -docker -paramiko -pika>=1.2.0 -beanie~=1.7 -httpx>=0.22.0 -ocrd_validators -ocrd_utils -# ocrd # commented to avoid circular dependency diff --git a/ocrd_network/setup.py b/ocrd_network/setup.py deleted file mode 100644 index 929849c9f5..0000000000 --- a/ocrd_network/setup.py +++ /dev/null @@ -1,16 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup, Command -from setuptools.command.build import build as orig_build - -class build(orig_build): - def finalize_options(self): - vers = ' == ' + self.distribution.metadata.version - self.distribution.install_requires = [ - req + vers if req.startswith('ocrd') and '==' not in req else req - for req in self.distribution.install_requires - ] - orig_build.finalize_options(self) - -setup( - cmdclass={"build": build} -) diff --git a/ocrd_utils/MANIFEST.in b/ocrd_utils/MANIFEST.in deleted file mode 100644 index b6ae33acad..0000000000 --- a/ocrd_utils/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -include README.md -include ocrd_logging.conf -include requirements.txt diff --git a/ocrd_utils/pyproject.toml b/ocrd_utils/pyproject.toml deleted file mode 100644 index dec419a9bf..0000000000 --- a/ocrd_utils/pyproject.toml +++ /dev/null @@ -1,40 +0,0 @@ -[build-system] -requires = [ - "setuptools>=61", - "setuptools_scm[toml]", - "wheel", -] # PEP 508 specifications. -build-backend = "setuptools.build_meta" - -[project] -name = "ocrd_utils" -authors = [{name = "Konstantin Baierer", email = "unixprog@gmail.com"}] -license = {text = "Apache License 2.0"} -description = "OCR-D framework - shared code, helpers, constants" -requires-python = ">=3.7" -dynamic = ["version", "dependencies"] - -[project.readme] -file = "README.md" -content-type = "text/markdown" - -[tool.setuptools.dynamic.dependencies] -file = ["requirements.txt"] - -[project.urls] -Homepage = "https://ocr-d.de" -Documentation = "https://ocr-d.de/core" -Repository = "https://github.com/OCR-D/core" -Issues = "https://github.com/OCR-D/core/issues" - -[tool.setuptools] -include-package-data = true - -[tool.setuptools.package-data] -"*" = ["*.json"] # *.yml; *.xml - -[tool.setuptools.packages.find] -namespaces = false - -[tool.setuptools_scm] -root = ".." diff --git a/ocrd_utils/requirements.txt b/ocrd_utils/requirements.txt deleted file mode 100644 index 328783adda..0000000000 --- a/ocrd_utils/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -Pillow >= 7.2.0 -# XXX explicitly do not restrict the numpy version because different -# tensorflow versions might require different versions -numpy -atomicwrites >= 1.3.0 -frozendict>=2.3.4 -filetype diff --git a/ocrd_utils/setup.py b/ocrd_utils/setup.py deleted file mode 100644 index 53d3ffceba..0000000000 --- a/ocrd_utils/setup.py +++ /dev/null @@ -1,4 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup - -setup() diff --git a/ocrd_validators/MANIFEST.in b/ocrd_validators/MANIFEST.in deleted file mode 100644 index 4fae91089a..0000000000 --- a/ocrd_validators/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include requirements.txt -include README.md diff --git a/ocrd_validators/ocrd_validators/.doctrees/environment.pickle b/ocrd_validators/ocrd_validators/.doctrees/environment.pickle deleted file mode 100644 index cb19bf22c1..0000000000 Binary files a/ocrd_validators/ocrd_validators/.doctrees/environment.pickle and /dev/null differ diff --git a/ocrd_validators/pyproject.toml b/ocrd_validators/pyproject.toml deleted file mode 100644 index e039baeff4..0000000000 --- a/ocrd_validators/pyproject.toml +++ /dev/null @@ -1,40 +0,0 @@ -[build-system] -requires = [ - "setuptools>=61", - "setuptools_scm[toml]", - "wheel", -] # PEP 508 specifications. -build-backend = "setuptools.build_meta" - -[project] -name = "ocrd_validators" -authors = [{name = "Konstantin Baierer", email = "unixprog@gmail.com"}] -license = {text = "Apache License 2.0"} -description = "OCR-D framework - data validators" -requires-python = ">=3.7" -dynamic = ["version", "dependencies"] - -[project.readme] -file = "README.md" -content-type = "text/markdown" - -[tool.setuptools.dynamic.dependencies] -file = ["requirements.txt"] - -[project.urls] -Homepage = "https://ocr-d.de" -Documentation = "https://ocr-d.de/core" -Repository = "https://github.com/OCR-D/core" -Issues = "https://github.com/OCR-D/core/issues" - -[tool.setuptools] -include-package-data = true - -[tool.setuptools.package-data] -"*" = ["*.yml"] # *.xsd - -[tool.setuptools.packages.find] -namespaces = false - -[tool.setuptools_scm] -root = ".." diff --git a/ocrd_validators/requirements.txt b/ocrd_validators/requirements.txt deleted file mode 100644 index 2d5bff4ad5..0000000000 --- a/ocrd_validators/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -bagit >= 1.7.0 -bagit_profile >= 1.3.0 -click >=7 -jsonschema -pyyaml -shapely -ocrd_utils -ocrd_models -ocrd_modelfactory diff --git a/ocrd_validators/setup.py b/ocrd_validators/setup.py deleted file mode 100644 index 929849c9f5..0000000000 --- a/ocrd_validators/setup.py +++ /dev/null @@ -1,16 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup, Command -from setuptools.command.build import build as orig_build - -class build(orig_build): - def finalize_options(self): - vers = ' == ' + self.distribution.metadata.version - self.distribution.install_requires = [ - req + vers if req.startswith('ocrd') and '==' not in req else req - for req in self.distribution.install_requires - ] - orig_build.finalize_options(self) - -setup( - cmdclass={"build": build} -) diff --git a/ocrd/pyproject.toml b/pyproject.toml similarity index 78% rename from ocrd/pyproject.toml rename to pyproject.toml index fb8dd79ecc..4e6d12cd89 100644 --- a/ocrd/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,6 @@ [build-system] requires = [ - "setuptools>=61", - "setuptools_scm[toml]", + "setuptools>=44", "wheel", ] # PEP 508 specifications. build-backend = "setuptools.build_meta" @@ -12,7 +11,7 @@ authors = [{name = "Konstantin Baierer", email = "unixprog@gmail.com"}] license = {text = "Apache License 2.0"} description = "OCR-D framework" requires-python = ">=3.7" -dynamic = ["version", "dependencies"] +dynamic = ['version', 'dependencies'] [project.readme] file = "README.md" @@ -21,6 +20,9 @@ content-type = "text/markdown" [tool.setuptools.dynamic.dependencies] file = ["requirements.txt"] +[tool.setuptools.dynamic.version] +file = ["VERSION"] + [project.urls] Homepage = "https://ocr-d.de" Documentation = "https://ocr-d.de/core" @@ -34,9 +36,9 @@ ocrd-dummy = "ocrd.processor.builtin.dummy_processor:cli" [tool.setuptools] include-package-data = true +[tool.setuptools.package-data] +"*" = ["*.json", '*.yml', '*.xml', '*.conf', '*.bash', '*.xsd'] + [tool.setuptools.packages.find] -exclude = ["tests"] # docs +where = ["src"] namespaces = false - -[tool.setuptools_scm] -root = ".." diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000..2190ec648d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,35 @@ +atomicwrites >= 1.3.0 +beanie~=1.7 +click >=7 +Deprecated == 1.2.0 +docker +fastapi>=0.78.0 +filetype +Flask +frozendict>=2.3.4 +gdown +httpx>=0.22.0 +importlib_metadata ; python_version < '3.8' +importlib_resources ; python_version < '3.9' +jsonschema +lxml +memory-profiler >= 0.58.0 +# XXX explicitly do not restrict the numpy version because different +# tensorflow versions might require different versions +numpy +ocrd-fork-bagit >= 1.8.1.post2 +ocrd-fork-bagit_profile >= 1.3.0.post1 +opencv-python-headless +paramiko +pika>=1.2.0 +Pillow >= 7.2.0 +pydantic==1.* +python-magic +python-multipart +pyyaml +requests < 2.30 +requests_unixsocket +shapely +uvicorn +uvicorn>=0.17.6 + diff --git a/ocrd/ocrd/__init__.py b/src/ocrd/__init__.py similarity index 100% rename from ocrd/ocrd/__init__.py rename to src/ocrd/__init__.py diff --git a/ocrd/ocrd/cli/__init__.py b/src/ocrd/cli/__init__.py similarity index 100% rename from ocrd/ocrd/cli/__init__.py rename to src/ocrd/cli/__init__.py diff --git a/ocrd/ocrd/cli/bashlib.py b/src/ocrd/cli/bashlib.py similarity index 100% rename from ocrd/ocrd/cli/bashlib.py rename to src/ocrd/cli/bashlib.py diff --git a/ocrd/ocrd/cli/log.py b/src/ocrd/cli/log.py similarity index 100% rename from ocrd/ocrd/cli/log.py rename to src/ocrd/cli/log.py diff --git a/ocrd/ocrd/cli/network.py b/src/ocrd/cli/network.py similarity index 100% rename from ocrd/ocrd/cli/network.py rename to src/ocrd/cli/network.py diff --git a/ocrd/ocrd/cli/ocrd_tool.py b/src/ocrd/cli/ocrd_tool.py similarity index 100% rename from ocrd/ocrd/cli/ocrd_tool.py rename to src/ocrd/cli/ocrd_tool.py diff --git a/ocrd/ocrd/cli/process.py b/src/ocrd/cli/process.py similarity index 100% rename from ocrd/ocrd/cli/process.py rename to src/ocrd/cli/process.py diff --git a/ocrd/ocrd/cli/resmgr.py b/src/ocrd/cli/resmgr.py similarity index 100% rename from ocrd/ocrd/cli/resmgr.py rename to src/ocrd/cli/resmgr.py diff --git a/ocrd/ocrd/cli/validate.py b/src/ocrd/cli/validate.py similarity index 100% rename from ocrd/ocrd/cli/validate.py rename to src/ocrd/cli/validate.py diff --git a/ocrd/ocrd/cli/workspace.py b/src/ocrd/cli/workspace.py similarity index 99% rename from ocrd/ocrd/cli/workspace.py rename to src/ocrd/cli/workspace.py index 7480f5962e..e848bd4945 100644 --- a/ocrd/ocrd/cli/workspace.py +++ b/src/ocrd/cli/workspace.py @@ -287,14 +287,14 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi \b Examples: ocrd workspace bulk-add \\ - --regex '(?P[^/]+)/page_(?P.*)\.[^.]+' \\ + --regex '(?P[^/]+)/page_(?P.*)\\.[^.]+' \\ --page-id 'PHYS_{{ pageid }}' \\ --file-grp "{{ fileGrp }}" \\ path/to/files/*/*.* \b echo "path/to/src/file.xml SEG/page_p0001.xml" \\ | ocrd workspace bulk-add \\ - --regex '(?P.*?) (?P.+?)/page_(?P.*)\.(?P[^\.]*)' \\ + --regex '(?P.*?) (?P.+?)/page_(?P.*)\\.(?P[^\\.]*)' \\ --file-id 'FILE_{{ fileGrp }}_{{ pageid }}' \\ --page-id 'PHYS_{{ pageid }}' \\ --file-grp "{{ fileGrp }}" \\ diff --git a/ocrd/ocrd/cli/zip.py b/src/ocrd/cli/zip.py similarity index 100% rename from ocrd/ocrd/cli/zip.py rename to src/ocrd/cli/zip.py diff --git a/ocrd/ocrd/constants.py b/src/ocrd/constants.py similarity index 75% rename from ocrd/ocrd/constants.py rename to src/ocrd/constants.py index 1d436a7fa9..796aff1895 100644 --- a/ocrd/ocrd/constants.py +++ b/src/ocrd/constants.py @@ -1,7 +1,7 @@ """ Constants for ocrd. """ -from pkg_resources import resource_filename +from ocrd_utils import resource_filename __all__ = [ 'TMP_PREFIX', @@ -18,7 +18,7 @@ DEFAULT_UPLOAD_FOLDER = '/tmp/uploads-ocrd-core' DOWNLOAD_DIR = '/tmp/ocrd-core-downloads' DEFAULT_REPOSITORY_URL = 'http://localhost:5000/' -BASHLIB_FILENAME = resource_filename(__name__, 'lib.bash') -RESOURCE_LIST_FILENAME = resource_filename(__name__, 'resource_list.yml') +BASHLIB_FILENAME = resource_filename(__package__, 'lib.bash') +RESOURCE_LIST_FILENAME = resource_filename(__package__, 'resource_list.yml') RESOURCE_USER_LIST_COMMENT = "# OCR-D private resource list (consider sending a PR with your own resources to OCR-D/core)" BACKUP_DIR = '.backup' diff --git a/ocrd/ocrd/decorators/__init__.py b/src/ocrd/decorators/__init__.py similarity index 100% rename from ocrd/ocrd/decorators/__init__.py rename to src/ocrd/decorators/__init__.py diff --git a/ocrd/ocrd/decorators/loglevel_option.py b/src/ocrd/decorators/loglevel_option.py similarity index 100% rename from ocrd/ocrd/decorators/loglevel_option.py rename to src/ocrd/decorators/loglevel_option.py diff --git a/ocrd/ocrd/decorators/mets_find_options.py b/src/ocrd/decorators/mets_find_options.py similarity index 100% rename from ocrd/ocrd/decorators/mets_find_options.py rename to src/ocrd/decorators/mets_find_options.py diff --git a/ocrd/ocrd/decorators/ocrd_cli_options.py b/src/ocrd/decorators/ocrd_cli_options.py similarity index 100% rename from ocrd/ocrd/decorators/ocrd_cli_options.py rename to src/ocrd/decorators/ocrd_cli_options.py diff --git a/ocrd/ocrd/decorators/parameter_option.py b/src/ocrd/decorators/parameter_option.py similarity index 100% rename from ocrd/ocrd/decorators/parameter_option.py rename to src/ocrd/decorators/parameter_option.py diff --git a/ocrd/ocrd/lib.bash b/src/ocrd/lib.bash similarity index 100% rename from ocrd/ocrd/lib.bash rename to src/ocrd/lib.bash diff --git a/ocrd/ocrd/mets_server.py b/src/ocrd/mets_server.py similarity index 94% rename from ocrd/ocrd/mets_server.py rename to src/ocrd/mets_server.py index 08011557b5..be2c1333e0 100644 --- a/ocrd/ocrd/mets_server.py +++ b/src/ocrd/mets_server.py @@ -2,24 +2,24 @@ # METS server functionality """ import re -from os import environ, _exit, chmod -from io import BytesIO -from typing import Any, Dict, Optional, Union, List, Tuple +from os import _exit, chmod +from typing import Dict, Optional, Union, List, Tuple from pathlib import Path from urllib.parse import urlparse import socket +import atexit -from fastapi import FastAPI, Request, File, Form, Response +from fastapi import FastAPI, Request, Form, Response from fastapi.responses import JSONResponse -from requests import request, Session as requests_session +from requests import Session as requests_session from requests.exceptions import ConnectionError from requests_unixsocket import Session as requests_unixsocket_session from pydantic import BaseModel, Field, ValidationError import uvicorn -from ocrd_models import OcrdMets, OcrdFile, ClientSideOcrdFile, OcrdAgent, ClientSideOcrdAgent -from ocrd_utils import initLogging, getLogger, deprecated_alias +from ocrd_models import OcrdFile, ClientSideOcrdFile, OcrdAgent, ClientSideOcrdAgent +from ocrd_utils import getLogger, deprecated_alias # # Models @@ -197,9 +197,10 @@ def __init__(self, workspace, url): self.log = getLogger(f'ocrd.mets_server[{self.url}]') def shutdown(self): - self.log.info("Shutting down METS server") if self.is_uds: - Path(self.url).unlink() + if Path(self.url).exists(): + self.log.warning(f'UDS socket {self.url} still exists, removing it') + Path(self.url).unlink() # os._exit because uvicorn catches SystemExit raised by sys.exit _exit(0) @@ -296,7 +297,7 @@ async def stop(): """ Stop the server """ - getLogger('ocrd.models.ocrd_mets').info('Shutting down') + getLogger('ocrd.models.ocrd_mets').info(f'Shutting down METS Server {self.url}') workspace.save_mets() self.shutdown() @@ -308,6 +309,7 @@ async def stop(): self.log.debug(f"chmod 0o677 {self.url}") server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) server.bind(self.url) # creates the socket file + atexit.register(self.shutdown) server.close() chmod(self.url, 0o666) uvicorn_kwargs = {'uds': self.url} diff --git a/ocrd/ocrd/processor/__init__.py b/src/ocrd/processor/__init__.py similarity index 100% rename from ocrd/ocrd/processor/__init__.py rename to src/ocrd/processor/__init__.py diff --git a/ocrd/ocrd/processor/base.py b/src/ocrd/processor/base.py similarity index 99% rename from ocrd/ocrd/processor/base.py rename to src/ocrd/processor/base.py index 38b7848a03..6107688bc2 100644 --- a/ocrd/ocrd/processor/base.py +++ b/src/ocrd/processor/base.py @@ -9,8 +9,6 @@ 'run_processor' ] -from warnings import warn -from pkg_resources import resource_filename from os.path import exists from shutil import copyfileobj import json @@ -30,7 +28,8 @@ list_resource_candidates, pushd_popd, list_all_resources, - get_processor_resource_types + get_processor_resource_types, + resource_filename, ) from ocrd_validators import ParameterValidator from ocrd_models.ocrd_page import MetadataItemType, LabelType, LabelsType @@ -266,7 +265,7 @@ def moduledir(self): """ The filesystem path of the module directory. """ - return resource_filename(self.module, '') + return resource_filename(self.module, '.') @property def input_files(self): diff --git a/ocrd/ocrd/processor/builtin/__init__.py b/src/ocrd/processor/builtin/__init__.py similarity index 100% rename from ocrd/ocrd/processor/builtin/__init__.py rename to src/ocrd/processor/builtin/__init__.py diff --git a/ocrd/ocrd/processor/builtin/dummy/__init__.py b/src/ocrd/processor/builtin/dummy/__init__.py similarity index 100% rename from ocrd/ocrd/processor/builtin/dummy/__init__.py rename to src/ocrd/processor/builtin/dummy/__init__.py diff --git a/src/ocrd/processor/builtin/dummy/ocrd-tool.json b/src/ocrd/processor/builtin/dummy/ocrd-tool.json new file mode 100644 index 0000000000..30a6d99fd9 --- /dev/null +++ b/src/ocrd/processor/builtin/dummy/ocrd-tool.json @@ -0,0 +1,19 @@ +{ + "tools": { + "ocrd-dummy": { + "executable": "ocrd-dummy", + "description": "Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group", + "steps": ["preprocessing/optimization"], + "categories": ["Image preprocessing"], + "input_file_grp": "DUMMY_INPUT", + "output_file_grp": "DUMMY_OUTPUT", + "parameters": { + "copy_files": { + "type": "boolean", + "default": false, + "description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)" + } + } + } + } +} diff --git a/ocrd/ocrd/processor/builtin/dummy_processor.py b/src/ocrd/processor/builtin/dummy_processor.py similarity index 94% rename from ocrd/ocrd/processor/builtin/dummy_processor.py rename to src/ocrd/processor/builtin/dummy_processor.py index c0371e2d0e..0fb00af533 100644 --- a/ocrd/ocrd/processor/builtin/dummy_processor.py +++ b/src/ocrd/processor/builtin/dummy_processor.py @@ -1,6 +1,5 @@ # pylint: disable=missing-module-docstring,invalid-name from os.path import join, basename -from pkg_resources import resource_string import click @@ -13,11 +12,12 @@ make_file_id, MIME_TO_EXT, MIMETYPE_PAGE, - parse_json_string_with_comments + parse_json_string_with_comments, + resource_string ) from ocrd_modelfactory import page_from_file -OCRD_TOOL = parse_json_string_with_comments(resource_string(__name__, 'dummy/ocrd-tool.json').decode('utf8')) +OCRD_TOOL = parse_json_string_with_comments(resource_string(__package__ + '.dummy', 'ocrd-tool.json')) class DummyProcessor(Processor): """ @@ -79,7 +79,7 @@ def process(self): def __init__(self, *args, **kwargs): - kwargs['ocrd_tool'] = OCRD_TOOL + kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-dummy'] kwargs['version'] = '0.0.3' super(DummyProcessor, self).__init__(*args, **kwargs) diff --git a/ocrd/ocrd/processor/helpers.py b/src/ocrd/processor/helpers.py similarity index 99% rename from ocrd/ocrd/processor/helpers.py rename to src/ocrd/processor/helpers.py index 209fa5f67d..f5b6010636 100644 --- a/ocrd/ocrd/processor/helpers.py +++ b/src/ocrd/processor/helpers.py @@ -11,7 +11,7 @@ from click import wrap_text from ocrd.workspace import Workspace -from ocrd_utils import freeze_args, getLogger, config, setOverrideLogLevel, getLevelName +from ocrd_utils import freeze_args, getLogger, config, setOverrideLogLevel, getLevelName, sparkline __all__ = [ @@ -106,7 +106,6 @@ def run_processor( if any(x in config.OCRD_PROFILE for x in ['RSS', 'PSS']): backend = 'psutil_pss' if 'PSS' in config.OCRD_PROFILE else 'psutil' from memory_profiler import memory_usage - from sparklines import sparklines try: mem_usage = memory_usage(proc=processor.process, # only run process once @@ -123,7 +122,7 @@ def run_processor( chdir(old_cwd) mem_usage_values = [mem for mem, _ in mem_usage] mem_output = 'memory consumption: ' - mem_output += ''.join(sparklines(mem_usage_values)) + mem_output += sparkline(mem_usage_values) mem_output += ' max: %.2f MiB min: %.2f MiB' % (max(mem_usage_values), min(mem_usage_values)) logProfile.info(mem_output) else: diff --git a/ocrd/ocrd/resolver.py b/src/ocrd/resolver.py similarity index 100% rename from ocrd/ocrd/resolver.py rename to src/ocrd/resolver.py diff --git a/ocrd/ocrd/resource_list.yml b/src/ocrd/resource_list.yml similarity index 100% rename from ocrd/ocrd/resource_list.yml rename to src/ocrd/resource_list.yml diff --git a/ocrd/ocrd/resource_manager.py b/src/ocrd/resource_manager.py similarity index 99% rename from ocrd/ocrd/resource_manager.py rename to src/ocrd/resource_manager.py index 9d1e6ac596..c668028e9c 100644 --- a/ocrd/ocrd/resource_manager.py +++ b/src/ocrd/resource_manager.py @@ -60,9 +60,9 @@ def xdg_data_home(self): @property def xdg_config_home(self): - if not self._xdg_config_home: - self._xdg_config_home = config.XDG_CONFIG_HOME - return self._xdg_config_home + if self._xdg_config_home: + return self._xdg_config_home + return config.XDG_CONFIG_HOME def save_user_list(self, database=None): if not database: diff --git a/ocrd/ocrd/task_sequence.py b/src/ocrd/task_sequence.py similarity index 100% rename from ocrd/ocrd/task_sequence.py rename to src/ocrd/task_sequence.py diff --git a/ocrd/ocrd/workspace.py b/src/ocrd/workspace.py similarity index 99% rename from ocrd/ocrd/workspace.py rename to src/ocrd/workspace.py index 7772c54d79..633e45acf5 100644 --- a/ocrd/ocrd/workspace.py +++ b/src/ocrd/workspace.py @@ -82,7 +82,7 @@ def __init__(self, resolver, directory, mets=None, mets_basename=DEFAULT_METS_BA mets = ClientSideOcrdMets(mets_server_url) if mets.workspace_path != self.directory: raise ValueError(f"METS server {mets_server_url} workspace directory {mets.workspace_path} differs " - "from local workspace directory {self.directory}. These are not the same workspaces.") + f"from local workspace directory {self.directory}. These are not the same workspaces.") else: mets = OcrdMets(filename=self.mets_target) self.mets = mets diff --git a/ocrd/ocrd/workspace_backup.py b/src/ocrd/workspace_backup.py similarity index 100% rename from ocrd/ocrd/workspace_backup.py rename to src/ocrd/workspace_backup.py diff --git a/ocrd/ocrd/workspace_bagger.py b/src/ocrd/workspace_bagger.py similarity index 98% rename from ocrd/ocrd/workspace_bagger.py rename to src/ocrd/workspace_bagger.py index f838a65894..5c10103ce5 100644 --- a/ocrd/ocrd/workspace_bagger.py +++ b/src/ocrd/workspace_bagger.py @@ -9,7 +9,6 @@ import sys from bagit import Bag, make_manifests, _load_tag_file, _make_tag_file, _make_tagmanifest_file # pylint: disable=no-name-in-module from distutils.dir_util import copy_tree -from pkg_resources import get_distribution from ocrd_utils import ( pushd_popd, @@ -19,6 +18,7 @@ DEFAULT_METS_BASENAME, MIMETYPE_PAGE, VERSION, + dist_version, ) from ocrd_validators.constants import BAGIT_TXT, TMP_BAGIT_PREFIX, OCRD_BAGIT_PROFILE_URL from ocrd_modelfactory import page_from_file @@ -117,8 +117,8 @@ def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ocrd_bas bag.info['BagIt-Profile-Identifier'] = OCRD_BAGIT_PROFILE_URL bag.info['Bag-Software-Agent'] = 'ocrd/core %s (bagit.py %s, bagit_profile %s) [cmdline: "%s"]' % ( VERSION, # TODO - get_distribution('bagit').version, - get_distribution('bagit_profile').version, + dist_version('ocrd-fork-bagit'), + dist_version('ocrd-fork-bagit_profile'), ' '.join(sys.argv)) bag.info['Ocrd-Identifier'] = ocrd_identifier diff --git a/ocrd_modelfactory/ocrd_modelfactory/__init__.py b/src/ocrd_modelfactory/__init__.py similarity index 100% rename from ocrd_modelfactory/ocrd_modelfactory/__init__.py rename to src/ocrd_modelfactory/__init__.py diff --git a/ocrd_models/ocrd_models/__init__.py b/src/ocrd_models/__init__.py similarity index 100% rename from ocrd_models/ocrd_models/__init__.py rename to src/ocrd_models/__init__.py diff --git a/ocrd_models/ocrd_models/constants.py b/src/ocrd_models/constants.py similarity index 96% rename from ocrd_models/ocrd_models/constants.py rename to src/ocrd_models/constants.py index 7bba432678..db6e51e3a2 100644 --- a/ocrd_models/ocrd_models/constants.py +++ b/src/ocrd_models/constants.py @@ -1,8 +1,8 @@ """ Constants for ocrd_models. """ -from pkg_resources import resource_string from enum import Enum, auto +from ocrd_utils import resource_string __all__ = [ 'IDENTIFIER_PRIORITY', @@ -33,7 +33,7 @@ IDENTIFIER_PRIORITY = ['purl', 'urn', 'doi', 'url'] -METS_XML_EMPTY = resource_string(__name__, 'mets-empty.xml') +METS_XML_EMPTY = resource_string(__package__, 'mets-empty.xml') NAMESPACES = { 'mets': "http://www.loc.gov/METS/", diff --git a/ocrd_models/ocrd_models/mets-empty.xml b/src/ocrd_models/mets-empty.xml similarity index 100% rename from ocrd_models/ocrd_models/mets-empty.xml rename to src/ocrd_models/mets-empty.xml diff --git a/ocrd_models/ocrd_models/ocrd_agent.py b/src/ocrd_models/ocrd_agent.py similarity index 100% rename from ocrd_models/ocrd_models/ocrd_agent.py rename to src/ocrd_models/ocrd_agent.py diff --git a/ocrd_models/ocrd_models/ocrd_exif.py b/src/ocrd_models/ocrd_exif.py similarity index 100% rename from ocrd_models/ocrd_models/ocrd_exif.py rename to src/ocrd_models/ocrd_exif.py diff --git a/ocrd_models/ocrd_models/ocrd_file.py b/src/ocrd_models/ocrd_file.py similarity index 100% rename from ocrd_models/ocrd_models/ocrd_file.py rename to src/ocrd_models/ocrd_file.py diff --git a/ocrd_models/ocrd_models/ocrd_mets.py b/src/ocrd_models/ocrd_mets.py similarity index 99% rename from ocrd_models/ocrd_models/ocrd_mets.py rename to src/ocrd_models/ocrd_mets.py index ab50694d59..95c29ade87 100644 --- a/ocrd_models/ocrd_models/ocrd_mets.py +++ b/src/ocrd_models/ocrd_mets.py @@ -69,7 +69,7 @@ def empty_mets(now=None, cache_flag=False): """ if not now: now = datetime.now().isoformat() - tpl = METS_XML_EMPTY.decode('utf-8') + tpl = METS_XML_EMPTY tpl = tpl.replace('{{ VERSION }}', VERSION) tpl = tpl.replace('{{ NOW }}', '%s' % now) return OcrdMets(content=tpl.encode('utf-8'), cache_flag=cache_flag) diff --git a/ocrd_models/ocrd_models/ocrd_page.py b/src/ocrd_models/ocrd_page.py similarity index 100% rename from ocrd_models/ocrd_models/ocrd_page.py rename to src/ocrd_models/ocrd_page.py diff --git a/ocrd_models/ocrd_models/ocrd_page_generateds.py b/src/ocrd_models/ocrd_page_generateds.py similarity index 100% rename from ocrd_models/ocrd_models/ocrd_page_generateds.py rename to src/ocrd_models/ocrd_page_generateds.py diff --git a/ocrd_models/ocrd_models/ocrd_xml_base.py b/src/ocrd_models/ocrd_xml_base.py similarity index 100% rename from ocrd_models/ocrd_models/ocrd_xml_base.py rename to src/ocrd_models/ocrd_xml_base.py diff --git a/ocrd_models/ocrd_models/report.py b/src/ocrd_models/report.py similarity index 100% rename from ocrd_models/ocrd_models/report.py rename to src/ocrd_models/report.py diff --git a/ocrd_models/ocrd_models/utils.py b/src/ocrd_models/utils.py similarity index 100% rename from ocrd_models/ocrd_models/utils.py rename to src/ocrd_models/utils.py diff --git a/ocrd_network/ocrd_network/__init__.py b/src/ocrd_network/__init__.py similarity index 100% rename from ocrd_network/ocrd_network/__init__.py rename to src/ocrd_network/__init__.py diff --git a/ocrd_network/ocrd_network/cli/__init__.py b/src/ocrd_network/cli/__init__.py similarity index 100% rename from ocrd_network/ocrd_network/cli/__init__.py rename to src/ocrd_network/cli/__init__.py diff --git a/ocrd_network/ocrd_network/cli/client.py b/src/ocrd_network/cli/client.py similarity index 100% rename from ocrd_network/ocrd_network/cli/client.py rename to src/ocrd_network/cli/client.py diff --git a/ocrd_network/ocrd_network/cli/processing_server.py b/src/ocrd_network/cli/processing_server.py similarity index 100% rename from ocrd_network/ocrd_network/cli/processing_server.py rename to src/ocrd_network/cli/processing_server.py diff --git a/ocrd_network/ocrd_network/cli/processing_worker.py b/src/ocrd_network/cli/processing_worker.py similarity index 100% rename from ocrd_network/ocrd_network/cli/processing_worker.py rename to src/ocrd_network/cli/processing_worker.py diff --git a/ocrd_network/ocrd_network/cli/processor_server.py b/src/ocrd_network/cli/processor_server.py similarity index 100% rename from ocrd_network/ocrd_network/cli/processor_server.py rename to src/ocrd_network/cli/processor_server.py diff --git a/ocrd_network/ocrd_network/client.py b/src/ocrd_network/client.py similarity index 100% rename from ocrd_network/ocrd_network/client.py rename to src/ocrd_network/client.py diff --git a/ocrd_network/ocrd_network/constants.py b/src/ocrd_network/constants.py similarity index 100% rename from ocrd_network/ocrd_network/constants.py rename to src/ocrd_network/constants.py diff --git a/ocrd_network/ocrd_network/database.py b/src/ocrd_network/database.py similarity index 85% rename from ocrd_network/ocrd_network/database.py rename to src/ocrd_network/database.py index 4945c13353..946ed95a5b 100644 --- a/ocrd_network/ocrd_network/database.py +++ b/src/ocrd_network/database.py @@ -28,17 +28,17 @@ from .utils import call_sync -async def initiate_database(db_url: str): +async def initiate_database(db_url: str, db_name: str = 'ocrd'): client = AsyncIOMotorClient(db_url) await init_beanie( - database=client.get_default_database(default='ocrd'), + database=client.get_default_database(default=db_name), document_models=[DBProcessorJob, DBWorkflowJob, DBWorkspace, DBWorkflowScript] ) @call_sync -async def sync_initiate_database(db_url: str): - await initiate_database(db_url) +async def sync_initiate_database(db_url: str, db_name: str = 'ocrd'): + await initiate_database(db_url, db_name) async def db_create_workspace(mets_path: str) -> DBWorkspace: @@ -60,6 +60,11 @@ async def db_create_workspace(mets_path: str) -> DBWorkspace: return workspace_db +@call_sync +async def sync_db_create_workspace(mets_path: str) -> DBWorkspace: + return await db_create_workspace(mets_path=mets_path) + + async def db_get_workspace(workspace_id: str = None, workspace_mets_path: str = None) -> DBWorkspace: workspace = None if not workspace_id and not workspace_mets_path: @@ -134,6 +139,15 @@ async def sync_db_update_workspace(workspace_id: str = None, workspace_mets_path return await db_update_workspace(workspace_id=workspace_id, workspace_mets_path=workspace_mets_path, **kwargs) +async def db_create_processing_job(db_processing_job: DBProcessorJob) -> DBProcessorJob: + return await db_processing_job.insert() + + +@call_sync +async def sync_db_create_processing_job(db_processing_job: DBProcessorJob) -> DBProcessorJob: + return await db_create_processing_job(db_processing_job=db_processing_job) + + async def db_get_processing_job(job_id: str) -> DBProcessorJob: job = await DBProcessorJob.find_one( DBProcessorJob.job_id == job_id) @@ -180,6 +194,15 @@ async def sync_db_update_processing_job(job_id: str, **kwargs) -> DBProcessorJob return await db_update_processing_job(job_id=job_id, **kwargs) +async def db_create_workflow_job(db_workflow_job: DBWorkflowJob) -> DBWorkflowJob: + return await db_workflow_job.insert() + + +@call_sync +async def sync_db_create_workflow_job(db_workflow_job: DBWorkflowJob) -> DBWorkflowJob: + return await db_create_workflow_job(db_workflow_job=db_workflow_job) + + async def db_get_workflow_job(job_id: str) -> DBWorkflowJob: job = await DBWorkflowJob.find_one(DBWorkflowJob.job_id == job_id) if not job: @@ -202,6 +225,15 @@ async def sync_db_get_processing_jobs(job_ids: List[str]) -> [DBProcessorJob]: return await db_get_processing_jobs(job_ids) +async def db_create_workflow_script(db_workflow_script: DBWorkflowScript) -> DBWorkflowScript: + return await db_workflow_script.insert() + + +@call_sync +async def sync_db_create_workflow_script(db_workflow_script: DBWorkflowScript) -> DBWorkflowScript: + return await db_create_workflow_script(db_workflow_script=db_workflow_script) + + async def db_get_workflow_script(workflow_id: str) -> DBWorkflowScript: workflow = await DBWorkflowScript.find_one(DBWorkflowScript.workflow_id == workflow_id) if not workflow: @@ -221,6 +253,7 @@ async def db_find_first_workflow_script_by_content(content_hash: str) -> DBWorkf return workflow +# TODO: Resolve the inconsistency between the async and sync versions of the same method @call_sync async def sync_db_find_first_workflow_script_by_content(workflow_id: str) -> DBWorkflowScript: return await db_get_workflow_script(workflow_id) diff --git a/ocrd_network/ocrd_network/deployer.py b/src/ocrd_network/deployer.py similarity index 100% rename from ocrd_network/ocrd_network/deployer.py rename to src/ocrd_network/deployer.py diff --git a/ocrd_network/ocrd_network/deployment_utils.py b/src/ocrd_network/deployment_utils.py similarity index 100% rename from ocrd_network/ocrd_network/deployment_utils.py rename to src/ocrd_network/deployment_utils.py diff --git a/ocrd_network/ocrd_network/logging.py b/src/ocrd_network/logging.py similarity index 100% rename from ocrd_network/ocrd_network/logging.py rename to src/ocrd_network/logging.py diff --git a/ocrd_network/ocrd_network/models/__init__.py b/src/ocrd_network/models/__init__.py similarity index 100% rename from ocrd_network/ocrd_network/models/__init__.py rename to src/ocrd_network/models/__init__.py diff --git a/ocrd_network/ocrd_network/models/job.py b/src/ocrd_network/models/job.py similarity index 98% rename from ocrd_network/ocrd_network/models/job.py rename to src/ocrd_network/models/job.py index e5230aa5fd..6cb31bfb92 100644 --- a/ocrd_network/ocrd_network/models/job.py +++ b/src/ocrd_network/models/job.py @@ -20,6 +20,8 @@ class StateEnum(str, Enum): success = 'SUCCESS' # Processing job failed failed = 'FAILED' + # Processing job has not been assigned yet + unset = 'UNSET' class PYJobInput(BaseModel): diff --git a/ocrd_network/ocrd_network/models/messages.py b/src/ocrd_network/models/messages.py similarity index 100% rename from ocrd_network/ocrd_network/models/messages.py rename to src/ocrd_network/models/messages.py diff --git a/ocrd_network/ocrd_network/models/ocrd_tool.py b/src/ocrd_network/models/ocrd_tool.py similarity index 100% rename from ocrd_network/ocrd_network/models/ocrd_tool.py rename to src/ocrd_network/models/ocrd_tool.py diff --git a/ocrd_network/ocrd_network/models/workflow.py b/src/ocrd_network/models/workflow.py similarity index 100% rename from ocrd_network/ocrd_network/models/workflow.py rename to src/ocrd_network/models/workflow.py diff --git a/ocrd_network/ocrd_network/models/workspace.py b/src/ocrd_network/models/workspace.py similarity index 100% rename from ocrd_network/ocrd_network/models/workspace.py rename to src/ocrd_network/models/workspace.py diff --git a/ocrd_network/ocrd_network/param_validators.py b/src/ocrd_network/param_validators.py similarity index 100% rename from ocrd_network/ocrd_network/param_validators.py rename to src/ocrd_network/param_validators.py diff --git a/ocrd_network/ocrd_network/process_helpers.py b/src/ocrd_network/process_helpers.py similarity index 100% rename from ocrd_network/ocrd_network/process_helpers.py rename to src/ocrd_network/process_helpers.py diff --git a/ocrd_network/ocrd_network/processing_server.py b/src/ocrd_network/processing_server.py similarity index 90% rename from ocrd_network/ocrd_network/processing_server.py rename to src/ocrd_network/processing_server.py index e6606f90f0..3c09e8a107 100644 --- a/ocrd_network/ocrd_network/processing_server.py +++ b/src/ocrd_network/processing_server.py @@ -1,3 +1,4 @@ +from datetime import datetime from hashlib import md5 import httpx import json @@ -127,6 +128,14 @@ def __init__(self, config_path: str, host: str, port: int) -> None: else: self.internal_job_callback_url = f'http://{host}:{port}/result_callback' + self.router.add_api_route( + path='/', + endpoint=self.home_page, + methods=['GET'], + status_code=status.HTTP_200_OK, + summary='Get information about the processing server' + ) + # Create routes self.router.add_api_route( path='/stop', @@ -219,6 +228,15 @@ def __init__(self, config_path: str, host: str, port: int) -> None: summary='Get information about a workflow run', ) + self.router.add_api_route( + path='/workflow/job-simple/{workflow_job_id}', + endpoint=self.get_workflow_info_simple, + methods=['GET'], + tags=['workflow', 'processing'], + status_code=status.HTTP_200_OK, + summary='Get simplified overall job status', + ) + self.router.add_api_route( path='/workflow', endpoint=self.upload_workflow, @@ -288,6 +306,14 @@ async def on_shutdown(self) -> None: """ await self.stop_deployed_agents() + async def home_page(self): + message = f"The home page of the {self.title}" + json_message = { + "message": message, + "time": datetime.now().strftime("%Y-%m-%d %H:%M") + } + return json_message + async def stop_deployed_agents(self) -> None: self.deployer.kill_all() @@ -322,6 +348,12 @@ def create_message_queues(self) -> None: unique_only=True ) + # TODO: Reconsider and refactor this. + # Added ocrd-dummy by default if not available for the integration tests. + # A proper Processing Worker / Processor Server registration endpoint is needed on the Processing Server side + if 'ocrd-dummy' not in queue_names: + queue_names.append('ocrd-dummy') + for queue_name in queue_names: # The existence/validity of the worker.name is not tested. # Even if an ocr-d processor does not exist, the queue is created @@ -378,6 +410,12 @@ def processing_agent_exists(self, processor_name: str, agent_type: str) -> bool: if agent_type not in [NETWORK_AGENT_SERVER, NETWORK_AGENT_WORKER]: return False if agent_type == NETWORK_AGENT_WORKER: + # TODO: Reconsider and refactor this. + # Added ocrd-dummy by default if not available for the integration tests. + # A proper Processing Worker / Processor Server registration endpoint + # is needed on the Processing Server side + if processor_name == 'ocrd-dummy': + return True if not self.check_if_queue_exists(processor_name): return False if agent_type == NETWORK_AGENT_SERVER: @@ -434,15 +472,25 @@ async def push_processor_job(self, processor_name: str, data: PYJobInput) -> PYJ validate_job_input(self.log, data.processor_name, ocrd_tool, data) - db_workspace = await db_get_workspace( - workspace_id=data.workspace_id, - workspace_mets_path=data.path_to_mets - ) - if not db_workspace: - raise HTTPException( - status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, - detail=f"Workspace with id: {data.workspace_id} or path: {data.path_to_mets} not found" - ) + if data.workspace_id: + try: + db_workspace = await db_get_workspace(workspace_id=data.workspace_id) + except ValueError as error: + self.log.exception(error) + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Workspace with id `{data.workspace_id}` not found in the DB." + ) + else: # data.path_to_mets provided instead + try: + # TODO: Reconsider and refactor this. Core cannot create workspaces by api, but processing-server needs + # the workspace in the database. Here the workspace is created if the path is available locally and + # not existing in the DB - since it has not been uploaded through the Workspace Server. + await db_create_workspace(data.path_to_mets) + except FileNotFoundError: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, + detail=f"Mets file not existing: {data.path_to_mets}") + workspace_key = data.path_to_mets if data.path_to_mets else data.workspace_id # initialize the request counter for the workspace_key self.cache_processing_requests.update_request_counter(workspace_key=workspace_key, by_value=0) @@ -882,6 +930,41 @@ async def get_workflow_info(self, workflow_job_id) -> Dict: }) return res + """ + Simplified version of the `get_workflow_info` that returns a single state for the entire workflow. + - If a single processing job fails, the entire workflow job status is set to FAILED. + - If there are any processing jobs running, regardless of other states, such as QUEUED and CACHED, + the entire workflow job status is set to RUNNING. + - If all processing jobs has finished successfully, only then the workflow job status is set to SUCCESS + """ + async def get_workflow_info_simple(self, workflow_job_id) -> Dict[str, StateEnum]: + """ Return list of a workflow's processor jobs + """ + try: + workflow_job = await db_get_workflow_job(workflow_job_id) + except ValueError: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, + detail=f"Workflow-Job with id: {workflow_job_id} not found") + job_ids: List[str] = [job_id for lst in workflow_job.processing_job_ids.values() for job_id in lst] + jobs = await db_get_processing_jobs(job_ids) + + workflow_job_state = StateEnum.unset + success_jobs = 0 + for job in jobs: + if job.state == StateEnum.cached or job.state == StateEnum.queued: + continue + if job.state == StateEnum.failed or job.state == StateEnum.cancelled: + workflow_job_state = StateEnum.failed + break + if job.state == StateEnum.running: + workflow_job_state = StateEnum.running + if job.state == StateEnum.success: + success_jobs += 1 + # if all jobs succeeded + if len(job_ids) == success_jobs: + workflow_job_state = StateEnum.success + return {"state": workflow_job_state} + async def upload_workflow(self, workflow: UploadFile) -> Dict: """ Store a script for a workflow in the database """ diff --git a/ocrd_network/ocrd_network/processing_worker.py b/src/ocrd_network/processing_worker.py similarity index 100% rename from ocrd_network/ocrd_network/processing_worker.py rename to src/ocrd_network/processing_worker.py diff --git a/ocrd_network/ocrd_network/processor_server.py b/src/ocrd_network/processor_server.py similarity index 100% rename from ocrd_network/ocrd_network/processor_server.py rename to src/ocrd_network/processor_server.py diff --git a/ocrd_network/ocrd_network/rabbitmq_utils/__init__.py b/src/ocrd_network/rabbitmq_utils/__init__.py similarity index 100% rename from ocrd_network/ocrd_network/rabbitmq_utils/__init__.py rename to src/ocrd_network/rabbitmq_utils/__init__.py diff --git a/ocrd_network/ocrd_network/rabbitmq_utils/connector.py b/src/ocrd_network/rabbitmq_utils/connector.py similarity index 100% rename from ocrd_network/ocrd_network/rabbitmq_utils/connector.py rename to src/ocrd_network/rabbitmq_utils/connector.py diff --git a/ocrd_network/ocrd_network/rabbitmq_utils/constants.py b/src/ocrd_network/rabbitmq_utils/constants.py similarity index 100% rename from ocrd_network/ocrd_network/rabbitmq_utils/constants.py rename to src/ocrd_network/rabbitmq_utils/constants.py diff --git a/ocrd_network/ocrd_network/rabbitmq_utils/consumer.py b/src/ocrd_network/rabbitmq_utils/consumer.py similarity index 100% rename from ocrd_network/ocrd_network/rabbitmq_utils/consumer.py rename to src/ocrd_network/rabbitmq_utils/consumer.py diff --git a/ocrd_network/ocrd_network/rabbitmq_utils/ocrd_messages.py b/src/ocrd_network/rabbitmq_utils/ocrd_messages.py similarity index 100% rename from ocrd_network/ocrd_network/rabbitmq_utils/ocrd_messages.py rename to src/ocrd_network/rabbitmq_utils/ocrd_messages.py diff --git a/ocrd_network/ocrd_network/rabbitmq_utils/publisher.py b/src/ocrd_network/rabbitmq_utils/publisher.py similarity index 100% rename from ocrd_network/ocrd_network/rabbitmq_utils/publisher.py rename to src/ocrd_network/rabbitmq_utils/publisher.py diff --git a/ocrd_network/ocrd_network/runtime_data.py b/src/ocrd_network/runtime_data.py similarity index 100% rename from ocrd_network/ocrd_network/runtime_data.py rename to src/ocrd_network/runtime_data.py diff --git a/ocrd_network/ocrd_network/server_cache.py b/src/ocrd_network/server_cache.py similarity index 100% rename from ocrd_network/ocrd_network/server_cache.py rename to src/ocrd_network/server_cache.py diff --git a/ocrd_network/ocrd_network/server_utils.py b/src/ocrd_network/server_utils.py similarity index 100% rename from ocrd_network/ocrd_network/server_utils.py rename to src/ocrd_network/server_utils.py diff --git a/ocrd_network/ocrd_network/utils.py b/src/ocrd_network/utils.py similarity index 93% rename from ocrd_network/ocrd_network/utils.py rename to src/ocrd_network/utils.py index 8f92706f46..4f66554bc3 100644 --- a/ocrd_network/ocrd_network/utils.py +++ b/src/ocrd_network/utils.py @@ -3,7 +3,7 @@ from pika import URLParameters from pymongo import uri_parser as mongo_uri_parser from re import match as re_match -from requests import Session as Session_TCP +from requests import get, Session as Session_TCP from requests_unixsocket import Session as Session_UDS from typing import Dict, List from uuid import uuid4 @@ -49,6 +49,18 @@ def generate_id() -> str: return str(uuid4()) +def is_url_responsive(url: str, retries: int = 0) -> bool: + while True: + try: + response = get(url) + if response.status_code == 200: + return True + except Exception: + if retries <= 0: + return False + retries -= 1 + + def validate_and_load_config(config_path: str) -> Dict: # Load and validate the config with open(config_path) as fin: @@ -64,7 +76,7 @@ def verify_database_uri(mongodb_address: str) -> str: # perform validation check mongo_uri_parser.parse_uri(uri=mongodb_address, validate=True) except Exception as error: - raise ValueError(f"The database address '{mongodb_address}' is in wrong format, {error}") + raise ValueError(f"The MongoDB address '{mongodb_address}' is in wrong format, {error}") return mongodb_address diff --git a/ocrd_models/ocrd_page_user_methods.py b/src/ocrd_page_user_methods.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods.py rename to src/ocrd_page_user_methods.py diff --git a/ocrd_models/ocrd_page_user_methods/__hash__.py b/src/ocrd_page_user_methods/__hash__.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/__hash__.py rename to src/ocrd_page_user_methods/__hash__.py diff --git a/ocrd_models/ocrd_page_user_methods/clear_AllIndexed.py b/src/ocrd_page_user_methods/clear_AllIndexed.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/clear_AllIndexed.py rename to src/ocrd_page_user_methods/clear_AllIndexed.py diff --git a/ocrd_models/ocrd_page_user_methods/exportChildren_GroupType.py b/src/ocrd_page_user_methods/exportChildren_GroupType.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/exportChildren_GroupType.py rename to src/ocrd_page_user_methods/exportChildren_GroupType.py diff --git a/ocrd_models/ocrd_page_user_methods/extend_AllIndexed.py b/src/ocrd_page_user_methods/extend_AllIndexed.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/extend_AllIndexed.py rename to src/ocrd_page_user_methods/extend_AllIndexed.py diff --git a/ocrd_models/ocrd_page_user_methods/get_AllAlternativeImagePaths.py b/src/ocrd_page_user_methods/get_AllAlternativeImagePaths.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/get_AllAlternativeImagePaths.py rename to src/ocrd_page_user_methods/get_AllAlternativeImagePaths.py diff --git a/ocrd_models/ocrd_page_user_methods/get_AllAlternativeImages.py b/src/ocrd_page_user_methods/get_AllAlternativeImages.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/get_AllAlternativeImages.py rename to src/ocrd_page_user_methods/get_AllAlternativeImages.py diff --git a/ocrd_models/ocrd_page_user_methods/get_AllIndexed.py b/src/ocrd_page_user_methods/get_AllIndexed.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/get_AllIndexed.py rename to src/ocrd_page_user_methods/get_AllIndexed.py diff --git a/ocrd_models/ocrd_page_user_methods/get_AllRegions.py b/src/ocrd_page_user_methods/get_AllRegions.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/get_AllRegions.py rename to src/ocrd_page_user_methods/get_AllRegions.py diff --git a/ocrd_models/ocrd_page_user_methods/get_AllTextLines.py b/src/ocrd_page_user_methods/get_AllTextLines.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/get_AllTextLines.py rename to src/ocrd_page_user_methods/get_AllTextLines.py diff --git a/ocrd_models/ocrd_page_user_methods/get_UnorderedGroupChildren.py b/src/ocrd_page_user_methods/get_UnorderedGroupChildren.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/get_UnorderedGroupChildren.py rename to src/ocrd_page_user_methods/get_UnorderedGroupChildren.py diff --git a/ocrd_models/ocrd_page_user_methods/id.py b/src/ocrd_page_user_methods/id.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/id.py rename to src/ocrd_page_user_methods/id.py diff --git a/ocrd_models/ocrd_page_user_methods/invalidate_AlternativeImage.py b/src/ocrd_page_user_methods/invalidate_AlternativeImage.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/invalidate_AlternativeImage.py rename to src/ocrd_page_user_methods/invalidate_AlternativeImage.py diff --git a/ocrd_models/ocrd_page_user_methods/prune_ReadingOrder.py b/src/ocrd_page_user_methods/prune_ReadingOrder.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/prune_ReadingOrder.py rename to src/ocrd_page_user_methods/prune_ReadingOrder.py diff --git a/ocrd_models/ocrd_page_user_methods/set_Border.py b/src/ocrd_page_user_methods/set_Border.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/set_Border.py rename to src/ocrd_page_user_methods/set_Border.py diff --git a/ocrd_models/ocrd_page_user_methods/set_Coords.py b/src/ocrd_page_user_methods/set_Coords.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/set_Coords.py rename to src/ocrd_page_user_methods/set_Coords.py diff --git a/ocrd_models/ocrd_page_user_methods/set_orientation.py b/src/ocrd_page_user_methods/set_orientation.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/set_orientation.py rename to src/ocrd_page_user_methods/set_orientation.py diff --git a/ocrd_models/ocrd_page_user_methods/set_points.py b/src/ocrd_page_user_methods/set_points.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/set_points.py rename to src/ocrd_page_user_methods/set_points.py diff --git a/ocrd_models/ocrd_page_user_methods/sort_AllIndexed.py b/src/ocrd_page_user_methods/sort_AllIndexed.py similarity index 100% rename from ocrd_models/ocrd_page_user_methods/sort_AllIndexed.py rename to src/ocrd_page_user_methods/sort_AllIndexed.py diff --git a/ocrd_utils/ocrd_utils/__init__.py b/src/ocrd_utils/__init__.py similarity index 98% rename from ocrd_utils/ocrd_utils/__init__.py rename to src/ocrd_utils/__init__.py index 90cd554779..d03c2a920c 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/src/ocrd_utils/__init__.py @@ -158,7 +158,10 @@ from .introspect import ( freeze_args, set_json_key_value_overrides, - membername + membername, + resource_filename, + resource_string, + dist_version ) from .logging import ( @@ -198,6 +201,7 @@ partition_list, parse_json_string_or_file, parse_json_string_with_comments, + sparkline, remove_non_path_from_url, safe_filename) diff --git a/ocrd_utils/ocrd_utils/config.py b/src/ocrd_utils/config.py similarity index 99% rename from ocrd_utils/ocrd_utils/config.py rename to src/ocrd_utils/config.py index 7b4538223b..cc12a31152 100644 --- a/ocrd_utils/ocrd_utils/config.py +++ b/src/ocrd_utils/config.py @@ -179,10 +179,12 @@ def _ocrd_download_timeout_parser(val): config.add("XDG_DATA_HOME", description="Directory to look for `./ocrd/resources.yml` (i.e. `ocrd resmgr` user database)", + parser=lambda val: Path(val), default=(True, lambda: Path(config.HOME, '.local/share'))) config.add("XDG_CONFIG_HOME", description="Directory to look for `./ocrd-resources/*` (i.e. `ocrd resmgr` data location)", + parser=lambda val: Path(val), default=(True, lambda: Path(config.HOME, '.config'))) config.add("OCRD_LOGGING_DEBUG", diff --git a/ocrd_utils/ocrd_utils/constants.py b/src/ocrd_utils/constants.py similarity index 84% rename from ocrd_utils/ocrd_utils/constants.py rename to src/ocrd_utils/constants.py index 1fbd9bf421..ff95a9fbbb 100644 --- a/ocrd_utils/ocrd_utils/constants.py +++ b/src/ocrd_utils/constants.py @@ -1,7 +1,7 @@ """ Constants for ocrd_utils. """ -from pkg_resources import get_distribution +from .introspect import dist_version from re import compile as regex_compile __all__ = [ @@ -18,7 +18,7 @@ 'VERSION', ] -VERSION = get_distribution('ocrd_utils').version +VERSION = dist_version('ocrd') MIMETYPE_PAGE = 'application/vnd.prima.page+xml' @@ -110,3 +110,24 @@ RESOURCE_LOCATIONS = ['data', 'cwd', 'system', 'module'] DEFAULT_METS_BASENAME = 'mets.xml' + + +# 2581 ▁ LOWER ONE EIGHTH BLOCK +# 2582 ▂ LOWER ONE QUARTER BLOCK +# 2583 ▃ LOWER THREE EIGHTHS BLOCK +# 2584 ▄ LOWER HALF BLOCK +# 2585 ▅ LOWER FIVE EIGHTHS BLOCK +# 2586 ▆ LOWER THREE QUARTERS BLOCK +# 2587 ▇ LOWER SEVEN EIGHTHS BLOCK +# 2588 █ FULL BLOCK +SPARKLINE_CHARS = [ + ' ', + '\u2581', + '\u2582', + '\u2583', + '\u2584', + '\u2585', + '\u2586', + '\u2587', + '\u2588', +] diff --git a/ocrd_utils/ocrd_utils/deprecate.py b/src/ocrd_utils/deprecate.py similarity index 100% rename from ocrd_utils/ocrd_utils/deprecate.py rename to src/ocrd_utils/deprecate.py diff --git a/ocrd_utils/ocrd_utils/image.py b/src/ocrd_utils/image.py similarity index 100% rename from ocrd_utils/ocrd_utils/image.py rename to src/ocrd_utils/image.py diff --git a/ocrd_utils/ocrd_utils/introspect.py b/src/ocrd_utils/introspect.py similarity index 55% rename from ocrd_utils/ocrd_utils/introspect.py rename to src/ocrd_utils/introspect.py index cfd3d32b52..a86b9b8984 100644 --- a/ocrd_utils/ocrd_utils/introspect.py +++ b/src/ocrd_utils/introspect.py @@ -3,8 +3,25 @@ """ import json from functools import wraps +from pathlib import Path from frozendict import frozendict +import atexit +from contextlib import ExitStack +# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files +import sys +if sys.version_info < (3, 9): + import importlib_resources +else: + import importlib.resources as importlib_resources + +if sys.version_info < (3, 8): + import importlib_metadata +else: + import importlib.metadata as importlib_metadata + +file_manager = ExitStack() +atexit.register(file_manager.close) # Taken from https://github.com/OCR-D/core/pull/884 def freeze_args(func): @@ -32,3 +49,14 @@ def set_json_key_value_overrides(obj, *kvpairs): except json.decoder.JSONDecodeError: obj[k] = v return obj + +def resource_filename(pkg : str, fname : str) -> Path: + ref = importlib_resources.files(pkg) / fname + return file_manager.enter_context(importlib_resources.as_file(ref)) + +def resource_string(pkg : str, fname : str) -> str: + with open(resource_filename(pkg, fname), 'r', encoding='utf-8') as f: + return f.read() + +def dist_version(module : str) -> str: + return importlib_metadata.version(module) diff --git a/ocrd_utils/ocrd_utils/logging.py b/src/ocrd_utils/logging.py similarity index 100% rename from ocrd_utils/ocrd_utils/logging.py rename to src/ocrd_utils/logging.py diff --git a/ocrd_utils/ocrd_logging.conf b/src/ocrd_utils/ocrd_logging.conf similarity index 100% rename from ocrd_utils/ocrd_logging.conf rename to src/ocrd_utils/ocrd_logging.conf diff --git a/ocrd_utils/ocrd_utils/os.py b/src/ocrd_utils/os.py similarity index 100% rename from ocrd_utils/ocrd_utils/os.py rename to src/ocrd_utils/os.py diff --git a/ocrd_utils/ocrd_utils/str.py b/src/ocrd_utils/str.py similarity index 92% rename from ocrd_utils/ocrd_utils/str.py rename to src/ocrd_utils/str.py index cf5cd3778e..e1e77ccc5e 100644 --- a/ocrd_utils/ocrd_utils/str.py +++ b/src/ocrd_utils/str.py @@ -4,7 +4,8 @@ import re import json -from .constants import REGEX_FILE_ID +from typing import List, Union +from .constants import REGEX_FILE_ID, SPARKLINE_CHARS from .deprecate import deprecation_warning from warnings import warn from numpy import array_split @@ -237,3 +238,18 @@ def partition_list(lst, chunks, chunk_index=None): if chunk_index is not None: return [ret[chunk_index]] return ret + +def sparkline(values : List[int]) -> str: + """ + Render a list of points with block characters + """ + if any(x is None or not isinstance(x, (int, float)) or x < 0 for x in values): + # return an empty string on non-positive-int values, better not to + # output a sparkline than to cancel execution due to problematic input + return '' + max_value = max(values) + max_mapping = len(SPARKLINE_CHARS) - 1 + # normalize to 0..1 and convert to index in SPARKLINE_CHARS + mapped = [int(x / max_value * max_mapping) for x in values] + return ''.join(SPARKLINE_CHARS[x] for x in mapped) + diff --git a/ocrd_validators/ocrd_validators/__init__.py b/src/ocrd_validators/__init__.py similarity index 100% rename from ocrd_validators/ocrd_validators/__init__.py rename to src/ocrd_validators/__init__.py diff --git a/ocrd_validators/ocrd_validators/bagit-profile.yml b/src/ocrd_validators/bagit-profile.yml similarity index 100% rename from ocrd_validators/ocrd_validators/bagit-profile.yml rename to src/ocrd_validators/bagit-profile.yml diff --git a/ocrd_validators/ocrd_validators/constants.py b/src/ocrd_validators/constants.py similarity index 65% rename from ocrd_validators/ocrd_validators/constants.py rename to src/ocrd_validators/constants.py index fc1ff445ae..b8d145bbba 100644 --- a/ocrd_validators/ocrd_validators/constants.py +++ b/src/ocrd_validators/constants.py @@ -2,7 +2,7 @@ Constants for ocrd_validators. """ import yaml -from pkg_resources import resource_string, resource_filename +from ocrd_utils import resource_string, resource_filename __all__ = [ 'PROCESSING_SERVER_CONFIG_SCHEMA', @@ -21,10 +21,10 @@ 'XSD_PATHS', ] -PROCESSING_SERVER_CONFIG_SCHEMA = yaml.safe_load(resource_string(__name__, 'processing_server_config.schema.yml')) -MESSAGE_SCHEMA_PROCESSING = yaml.safe_load(resource_string(__name__, 'message_processing.schema.yml')) -MESSAGE_SCHEMA_RESULT = yaml.safe_load(resource_string(__name__, 'message_result.schema.yml')) -OCRD_TOOL_SCHEMA = yaml.safe_load(resource_string(__name__, 'ocrd_tool.schema.yml')) +PROCESSING_SERVER_CONFIG_SCHEMA = yaml.safe_load(resource_string(__package__, 'processing_server_config.schema.yml')) +MESSAGE_SCHEMA_PROCESSING = yaml.safe_load(resource_string(__package__, 'message_processing.schema.yml')) +MESSAGE_SCHEMA_RESULT = yaml.safe_load(resource_string(__package__, 'message_result.schema.yml')) +OCRD_TOOL_SCHEMA = yaml.safe_load(resource_string(__package__, 'ocrd_tool.schema.yml')) RESOURCE_LIST_SCHEMA = { 'type': 'object', 'additionalProperties': False, @@ -32,7 +32,7 @@ '^ocrd-.*': OCRD_TOOL_SCHEMA['properties']['tools']['patternProperties']['ocrd-.*']['properties']['resources'] } } -OCRD_BAGIT_PROFILE = yaml.safe_load(resource_string(__name__, 'bagit-profile.yml')) +OCRD_BAGIT_PROFILE = yaml.safe_load(resource_string(__package__, 'bagit-profile.yml')) BAGIT_TXT = 'BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8' FILE_GROUP_PREFIX = 'OCR-D-' @@ -42,5 +42,5 @@ XSD_METS_URL = 'https://www.loc.gov/standards/mets/mets.xsd' XSD_PAGE_URL = 'http://www.primaresearch.org/schema/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd' XSD_PATHS = {} -XSD_PATHS[XSD_METS_URL] = resource_filename(__name__, 'mets.xsd') -XSD_PATHS[XSD_PAGE_URL] = resource_filename(__name__, 'page.xsd') +XSD_PATHS[XSD_METS_URL] = resource_filename(__package__, 'mets.xsd') +XSD_PATHS[XSD_PAGE_URL] = resource_filename(__package__, 'page.xsd') diff --git a/ocrd_validators/ocrd_validators/json_validator.py b/src/ocrd_validators/json_validator.py similarity index 100% rename from ocrd_validators/ocrd_validators/json_validator.py rename to src/ocrd_validators/json_validator.py diff --git a/ocrd_validators/ocrd_validators/message_processing.schema.yml b/src/ocrd_validators/message_processing.schema.yml similarity index 100% rename from ocrd_validators/ocrd_validators/message_processing.schema.yml rename to src/ocrd_validators/message_processing.schema.yml diff --git a/ocrd_validators/ocrd_validators/message_result.schema.yml b/src/ocrd_validators/message_result.schema.yml similarity index 100% rename from ocrd_validators/ocrd_validators/message_result.schema.yml rename to src/ocrd_validators/message_result.schema.yml diff --git a/ocrd_validators/ocrd_validators/mets.xsd b/src/ocrd_validators/mets.xsd similarity index 100% rename from ocrd_validators/ocrd_validators/mets.xsd rename to src/ocrd_validators/mets.xsd diff --git a/ocrd_validators/ocrd_validators/ocrd_network_message_validator.py b/src/ocrd_validators/ocrd_network_message_validator.py similarity index 100% rename from ocrd_validators/ocrd_validators/ocrd_network_message_validator.py rename to src/ocrd_validators/ocrd_network_message_validator.py diff --git a/ocrd_validators/ocrd_validators/ocrd_tool.schema.yml b/src/ocrd_validators/ocrd_tool.schema.yml similarity index 100% rename from ocrd_validators/ocrd_validators/ocrd_tool.schema.yml rename to src/ocrd_validators/ocrd_tool.schema.yml diff --git a/ocrd_validators/ocrd_validators/ocrd_tool_validator.py b/src/ocrd_validators/ocrd_tool_validator.py similarity index 100% rename from ocrd_validators/ocrd_validators/ocrd_tool_validator.py rename to src/ocrd_validators/ocrd_tool_validator.py diff --git a/ocrd_validators/ocrd_validators/ocrd_zip_validator.py b/src/ocrd_validators/ocrd_zip_validator.py similarity index 100% rename from ocrd_validators/ocrd_validators/ocrd_zip_validator.py rename to src/ocrd_validators/ocrd_zip_validator.py diff --git a/ocrd_validators/ocrd_validators/page.xsd b/src/ocrd_validators/page.xsd similarity index 100% rename from ocrd_validators/ocrd_validators/page.xsd rename to src/ocrd_validators/page.xsd diff --git a/ocrd_validators/ocrd_validators/page_validator.py b/src/ocrd_validators/page_validator.py similarity index 100% rename from ocrd_validators/ocrd_validators/page_validator.py rename to src/ocrd_validators/page_validator.py diff --git a/ocrd_validators/ocrd_validators/parameter_validator.py b/src/ocrd_validators/parameter_validator.py similarity index 100% rename from ocrd_validators/ocrd_validators/parameter_validator.py rename to src/ocrd_validators/parameter_validator.py diff --git a/ocrd_validators/ocrd_validators/processing_server_config.schema.yml b/src/ocrd_validators/processing_server_config.schema.yml similarity index 100% rename from ocrd_validators/ocrd_validators/processing_server_config.schema.yml rename to src/ocrd_validators/processing_server_config.schema.yml diff --git a/ocrd_validators/ocrd_validators/processing_server_config_validator.py b/src/ocrd_validators/processing_server_config_validator.py similarity index 100% rename from ocrd_validators/ocrd_validators/processing_server_config_validator.py rename to src/ocrd_validators/processing_server_config_validator.py diff --git a/ocrd_validators/ocrd_validators/resource_list_validator.py b/src/ocrd_validators/resource_list_validator.py similarity index 100% rename from ocrd_validators/ocrd_validators/resource_list_validator.py rename to src/ocrd_validators/resource_list_validator.py diff --git a/ocrd_validators/ocrd_validators/workspace_validator.py b/src/ocrd_validators/workspace_validator.py similarity index 100% rename from ocrd_validators/ocrd_validators/workspace_validator.py rename to src/ocrd_validators/workspace_validator.py diff --git a/ocrd_validators/ocrd_validators/xlink.xsd b/src/ocrd_validators/xlink.xsd similarity index 100% rename from ocrd_validators/ocrd_validators/xlink.xsd rename to src/ocrd_validators/xlink.xsd diff --git a/ocrd_validators/ocrd_validators/xsd_mets_validator.py b/src/ocrd_validators/xsd_mets_validator.py similarity index 100% rename from ocrd_validators/ocrd_validators/xsd_mets_validator.py rename to src/ocrd_validators/xsd_mets_validator.py diff --git a/ocrd_validators/ocrd_validators/xsd_page_validator.py b/src/ocrd_validators/xsd_page_validator.py similarity index 100% rename from ocrd_validators/ocrd_validators/xsd_page_validator.py rename to src/ocrd_validators/xsd_page_validator.py diff --git a/ocrd_validators/ocrd_validators/xsd_validator.py b/src/ocrd_validators/xsd_validator.py similarity index 100% rename from ocrd_validators/ocrd_validators/xsd_validator.py rename to src/ocrd_validators/xsd_validator.py diff --git a/tests/cli/test_bashlib.py b/tests/cli/test_bashlib.py index fc043b5fd7..74a623d1b6 100644 --- a/tests/cli/test_bashlib.py +++ b/tests/cli/test_bashlib.py @@ -1,6 +1,5 @@ from tests.base import CapturingTestCase as TestCase, main, assets, copy_of_directory -from pkg_resources import parse_version import os, sys import traceback import subprocess @@ -103,8 +102,8 @@ def test_bashlib_minversion(self): exit_code, out, err = self.invoke_bash( "source $(ocrd bashlib filename) && ocrd__minversion 2.29.0") assert exit_code == 0 - version = parse_version(VERSION) - version = "%d.%d.%d" % (version.major, version.minor+1, 0) + (major, minor, patch) = map(int, str(VERSION).split('.')) + version = "%d.%d.%d" % (major, minor + 1, patch) exit_code, out, err = self.invoke_bash( "source $(ocrd bashlib filename) && ocrd__minversion " + version) assert exit_code > 0 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000..1f35071075 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,15 @@ +from tests.network.config import test_config + +pytest_plugins = [ + "tests.network.fixtures_mongodb", + "tests.network.fixtures_rabbitmq" +] + +test_config.add("DB_NAME", description="...", default=(True, 'ocrd_network_test')) +test_config.add("DB_URL", description="...", default=(True, 'mongodb://network_test:network_test@0.0.0.0:27017')) + +test_config.add('DEFAULT_EXCHANGER_NAME', description="...", default=(True, 'ocrd-network-default')) +test_config.add('DEFAULT_QUEUE', description="...", default=(True, 'ocrd-network-default')) + +test_config.add('PROCESSING_SERVER_URL', description="...", default=(True, "http://0.0.0.0:8000")) +test_config.add('RABBITMQ_URL', description="...", default=(True, "amqp://network_test:network_test@0.0.0.0:5672/")) diff --git a/tests/network/.env b/tests/network/.env new file mode 100644 index 0000000000..7e96308c06 --- /dev/null +++ b/tests/network/.env @@ -0,0 +1,23 @@ +DOCKER_OCRD_NETWORK_NAME=ocrd_network_test +DOCKER_OCRD_NETWORK_MTU=1450 + +OCRD_NETWORK_LOGS_ROOT=/tmp/ocrd_network_logs +OCRD_NETWORK_SOCKETS_ROOT=/tmp/ocrd_network_sockets + +OCRD_PS_HOST=ps-docker-host +OCRD_PS_PORT=8000 +OCRD_PS_URL=http://${OCRD_PS_HOST}.${DOCKER_OCRD_NETWORK_NAME}:${OCRD_PS_PORT} + +MONGODB_NAME=ocrd_network_test +MONGODB_USER=network_test +MONGODB_PASS=network_test +MONGODB_HOST=mongodb-docker-host +MONGODB_PORT=27017 +MONGODB_URL=mongodb://${MONGODB_USER}:${MONGODB_PASS}@${MONGODB_HOST}.${DOCKER_OCRD_NETWORK_NAME}:${MONGODB_PORT} + +RABBITMQ_FEATURE_FLAGS=quorum_queue,implicit_default_bindings,classic_mirrored_queue_version +RABBITMQ_USER=network_test +RABBITMQ_PASS=network_test +RABBITMQ_HOST=rabbitmq-docker-host +RABBITMQ_PORT=5672 +RABBITMQ_URL=amqp://${RABBITMQ_USER}:${RABBITMQ_PASS}@${RABBITMQ_HOST}.${DOCKER_OCRD_NETWORK_NAME}:${RABBITMQ_PORT} diff --git a/tests/network/__init__.py b/tests/network/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/network/config.py b/tests/network/config.py new file mode 100644 index 0000000000..646833aeea --- /dev/null +++ b/tests/network/config.py @@ -0,0 +1,134 @@ +from pathlib import Path +from tempfile import gettempdir +from src.ocrd_utils.config import OcrdEnvConfig +from src.ocrd_utils.config import _ocrd_download_timeout_parser + +test_config = OcrdEnvConfig() + +test_config.add( + name='OCRD_METS_CACHING', + description='If set to `true`, access to the METS file is cached, speeding in-memory search and modification.', + validator=lambda val: val in ('true', 'false', '0', '1'), + parser=lambda val: val in ('true', '1') +) + +test_config.add( + name='OCRD_MAX_PROCESSOR_CACHE', + description=""" + Maximum number of processor instances (for each set of parameters) to be kept in memory (including loaded models) + for processing workers or processor servers. + """, + parser=int, + default=(True, 128) +) + +test_config.add( + name='OCRD_PROFILE', + description=""" + Whether to enable gathering runtime statistics + on the `ocrd.profile` logger (comma-separated): + - `CPU`: yields CPU and wall-time, + - `RSS`: also yields peak memory (resident set size) + - `PSS`: also yields peak memory (proportional set size) + """, + validator=lambda val: all(t in ('', 'CPU', 'RSS', 'PSS') for t in val.split(',')), + default=(True, '') +) + +test_config.add( + name="OCRD_PROFILE_FILE", + description=""" + If set, then the CPU profile is written to this file for later peruse with a analysis tools like snakeviz + """ +) + +test_config.add( + name="OCRD_DOWNLOAD_RETRIES", + description="Number of times to retry failed attempts for downloads of workspace files.", + validator=int, + parser=int, +) + +test_config.add( + name="OCRD_DOWNLOAD_TIMEOUT", + description="Timeout in seconds for connecting or reading (comma-separated) when downloading.", + parser=_ocrd_download_timeout_parser +) + +test_config.add( + name="OCRD_NETWORK_SERVER_ADDR_PROCESSING", + description="Default address of Processing Server to connect to (for `ocrd network client processing`).", + default=(True, '') +) + +test_config.add( + name="OCRD_NETWORK_SERVER_ADDR_WORKFLOW", + description="Default address of Workflow Server to connect to (for `ocrd network client workflow`).", + default=(True, '') +) + +test_config.add( + name="OCRD_NETWORK_SERVER_ADDR_WORKSPACE", + description="Default address of Workspace Server to connect to (for `ocrd network client workspace`).", + default=(True, '') +) + +test_config.add( + name="OCRD_NETWORK_WORKER_QUEUE_CONNECT_ATTEMPTS", + description=""" + Number of attempts for a worker to create its queue. Helpful if the rabbitmq-server needs time to be fully started + """, + parser=int, + default=(True, 1) +) + +test_config.add( + name="OCRD_NETWORK_SOCKETS_ROOT_DIR", + description="The root directory where all mets server related socket files are created", + parser=lambda val: Path(val), + default=(True, Path(gettempdir(), "ocrd_network_sockets")) +) +test_config.OCRD_NETWORK_SOCKETS_ROOT_DIR.mkdir(parents=True, exist_ok=True) +# Remove socket files left from previous integration tests +for path in test_config.OCRD_NETWORK_SOCKETS_ROOT_DIR.iterdir(): + if path.is_socket() and path.suffix == '.sock': + path.unlink() + +test_config.add( + name="OCRD_NETWORK_LOGS_ROOT_DIR", + description="The root directory where all ocrd_network related file logs are stored", + parser=lambda val: Path(val), + default=(True, Path(gettempdir(), "ocrd_network_logs")) +) +test_config.OCRD_NETWORK_LOGS_ROOT_DIR.mkdir(parents=True, exist_ok=True) + +test_config.add( + name="HOME", + description="Directory to look for `ocrd_logging.conf`, fallback for unset XDG variables.", + # description="HOME directory, cf. https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html", + validator=lambda val: Path(val).is_dir(), + parser=lambda val: Path(val), + default=(True, lambda: Path.home()) +) + +test_config.add( + name="XDG_DATA_HOME", + description="Directory to look for `./ocrd/resources.yml` (i.e. `ocrd resmgr` user database)", + parser=lambda val: Path(val), + default=(True, lambda: Path(test_config.HOME, '.local/share')) +) + +test_config.add( + name="XDG_CONFIG_HOME", + description="Directory to look for `./ocrd-resources/*` (i.e. `ocrd resmgr` data location)", + parser=lambda val: Path(val), + default=(True, lambda: Path(test_config.HOME, '.config')) +) + +test_config.add( + name="OCRD_LOGGING_DEBUG", + description="Print information about the logging setup to STDERR", + default=(True, False), + validator=lambda val: isinstance(val, bool) or val in ('true', 'false', '0', '1'), + parser=lambda val: val in ('true', '1') +) diff --git a/tests/network/docker-compose.yml b/tests/network/docker-compose.yml new file mode 100644 index 0000000000..a5cef49e23 --- /dev/null +++ b/tests/network/docker-compose.yml @@ -0,0 +1,120 @@ +networks: + ocrd_network_test: + name: ${DOCKER_OCRD_NETWORK_NAME} + driver: bridge + driver_opts: + com.docker.network.driver.mtu: ${DOCKER_OCRD_NETWORK_MTU} + +services: + + ocrd_network_mongo_db: + image: "mongo" + hostname: ${MONGODB_HOST} + container_name: ocrd_network_mongo_db + networks: + - ${DOCKER_OCRD_NETWORK_NAME} + ports: + - ${MONGODB_PORT}:27017 + environment: + - MONGO_INITDB_ROOT_USERNAME=${MONGODB_USER} + - MONGO_INITDB_ROOT_PASSWORD=${MONGODB_PASS} + healthcheck: + test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet + interval: 1s + timeout: 3s + retries: 30 + + ocrd_network_rabbit_mq: + image: "rabbitmq:3.12-management" + hostname: ${RABBITMQ_HOST} + container_name: ocrd_network_rabbit_mq + networks: + - ${DOCKER_OCRD_NETWORK_NAME} + ports: + - ${RABBITMQ_PORT}:5672 + - 15672:15672 + - 25672:25672 + environment: + - RABBITMQ_DEFAULT_USER=${RABBITMQ_USER} + - RABBITMQ_DEFAULT_PASS=${RABBITMQ_PASS} + - RABBITMQ_FEATURE_FLAGS=${RABBITMQ_FEATURE_FLAGS} + healthcheck: + test: rabbitmq-diagnostics check_port_connectivity + interval: 1s + timeout: 3s + retries: 30 + + ocrd_network_processing_server: + image: "ocrd_core_test" + build: + context: ../../ + dockerfile: Dockerfile + args: + BASE_IMAGE: 'ubuntu:22.04' + target: ocrd_core_test + hostname: ${OCRD_PS_HOST} + container_name: ocrd_network_processing_server + depends_on: + ocrd_network_mongo_db: + condition: service_healthy + ocrd_network_rabbit_mq: + condition: service_healthy + networks: + - ${DOCKER_OCRD_NETWORK_NAME} + ports: + - ${OCRD_PS_PORT}:8000 + environment: + DB_NAME: ${MONGODB_NAME} + DB_URL: ${MONGODB_URL} + RABBITMQ_URL: ${RABBITMQ_URL} + OCRD_NETWORK_LOGS_ROOT_DIR: /ocrd-data/ocrd_network_logs + OCRD_NETWORK_SOCKETS_ROOT_DIR: /ocrd-data/ocrd_network_sockets + healthcheck: + test: curl -f ${OCRD_PS_URL}/ + interval: 1s + timeout: 3s + retries: 30 + volumes: + - ${OCRD_NETWORK_LOGS_ROOT}:/ocrd-data/ocrd_network_logs + - ${OCRD_NETWORK_SOCKETS_ROOT}:/ocrd-data/ocrd_network_sockets + - "./dummy-workflow.txt:/ocrd-data/assets/dummy-workflow.txt" + - "./ocrd_logging.conf:/etc/ocrd_logging.conf" + - "./ps_config.yml:/ocrd-data/ps_config.yml" + command: ocrd network processing-server -a 0.0.0.0:8000 /ocrd-data/ps_config.yml + + ocrd_dummy_processing_worker: + image: "ocrd_core_test" + depends_on: + ocrd_network_processing_server: + condition: service_healthy + networks: + - ${DOCKER_OCRD_NETWORK_NAME} + environment: + OCRD_NETWORK_LOGS_ROOT_DIR: /ocrd-data/ocrd_network_logs + volumes: + - ${OCRD_NETWORK_LOGS_ROOT}:/ocrd-data/ocrd_network_logs + - ${OCRD_NETWORK_SOCKETS_ROOT}:/ocrd-data/ocrd_network_sockets + - "./dummy-workflow.txt:/ocrd-data/assets/dummy-workflow.txt" + - "./ocrd_logging.conf:/etc/ocrd_logging.conf" + command: ocrd-dummy worker --database ${MONGODB_URL} --queue ${RABBITMQ_URL} + + ocrd_network_core_test: + image: "ocrd_core_test" + container_name: core_test + depends_on: + ocrd_network_processing_server: + condition: service_healthy + networks: + - ${DOCKER_OCRD_NETWORK_NAME} + environment: + DB_NAME: ${MONGODB_NAME} + DB_URL: ${MONGODB_URL} + PROCESSING_SERVER_URL: ${OCRD_PS_URL} + RABBITMQ_URL: ${RABBITMQ_URL} + OCRD_NETWORK_LOGS_ROOT_DIR: /ocrd-data/ocrd_network_logs + OCRD_NETWORK_SOCKETS_ROOT_DIR: /ocrd-data/ocrd_network_sockets + volumes: + - ${OCRD_NETWORK_LOGS_ROOT}:/ocrd-data/ocrd_network_logs + - ${OCRD_NETWORK_SOCKETS_ROOT}:/ocrd-data/ocrd_network_sockets + - "./dummy-workflow.txt:/ocrd-data/assets/dummy-workflow.txt" + - "./ocrd_logging.conf:/etc/ocrd_logging.conf" diff --git a/tests/network/dummy-workflow.txt b/tests/network/dummy-workflow.txt new file mode 100644 index 0000000000..8d4e04d651 --- /dev/null +++ b/tests/network/dummy-workflow.txt @@ -0,0 +1,3 @@ +dummy -I OCR-D-IMG -O OCR-D-DUMMY1 +dummy -I OCR-D-DUMMY1 -O OCR-D-DUMMY2 +dummy -I OCR-D-DUMMY2 -O OCR-D-DUMMY3 diff --git a/tests/network/fixtures_mongodb.py b/tests/network/fixtures_mongodb.py new file mode 100644 index 0000000000..1409bafe72 --- /dev/null +++ b/tests/network/fixtures_mongodb.py @@ -0,0 +1,10 @@ +from pytest import fixture +from src.ocrd_network.database import sync_initiate_database +from src.ocrd_network.utils import verify_database_uri +from tests.network.config import test_config + + +@fixture(scope="package", name="mongo_client") +def fixture_mongo_client(): + verify_database_uri(test_config.DB_URL) + sync_initiate_database(test_config.DB_URL, test_config.DB_NAME) diff --git a/tests/network/fixtures_rabbitmq.py b/tests/network/fixtures_rabbitmq.py new file mode 100644 index 0000000000..a3b1300cf2 --- /dev/null +++ b/tests/network/fixtures_rabbitmq.py @@ -0,0 +1,75 @@ +from pika.credentials import PlainCredentials +from pytest import fixture +from src.ocrd_network.rabbitmq_utils import RMQConnector, RMQConsumer, RMQPublisher +from src.ocrd_network.utils import verify_and_parse_mq_uri +from tests.network.config import test_config + + +RABBITMQ_URL = test_config.RABBITMQ_URL +DEFAULT_EXCHANGER_NAME = test_config.DEFAULT_EXCHANGER_NAME +DEFAULT_QUEUE = test_config.DEFAULT_QUEUE + + +@fixture(scope="package", name="rabbitmq_defaults") +def fixture_rabbitmq_defaults(): + rmq_data = verify_and_parse_mq_uri(RABBITMQ_URL) + rmq_username = rmq_data["username"] + rmq_password = rmq_data["password"] + rmq_host = rmq_data["host"] + rmq_port = rmq_data["port"] + rmq_vhost = rmq_data["vhost"] + + test_connection = RMQConnector.open_blocking_connection( + credentials=PlainCredentials(rmq_username, rmq_password), + host=rmq_host, + port=rmq_port, + vhost=rmq_vhost + ) + test_channel = RMQConnector.open_blocking_channel(test_connection) + assert test_channel + RMQConnector.exchange_declare( + channel=test_channel, + exchange_name=DEFAULT_EXCHANGER_NAME, + exchange_type='direct', + durable=False + ) + RMQConnector.queue_declare(channel=test_channel, queue_name=DEFAULT_QUEUE, durable=False) + RMQConnector.queue_bind( + channel=test_channel, + exchange_name=DEFAULT_EXCHANGER_NAME, + queue_name=DEFAULT_QUEUE, + routing_key=DEFAULT_QUEUE + ) + # Clean all messages inside if any from previous tests + RMQConnector.queue_purge(channel=test_channel, queue_name=DEFAULT_QUEUE) + + +@fixture(scope="package", name="rabbitmq_publisher") +def fixture_rabbitmq_publisher(rabbitmq_defaults): + rmq_data = verify_and_parse_mq_uri(RABBITMQ_URL) + rmq_publisher = RMQPublisher( + host=rmq_data["host"], + port=rmq_data["port"], + vhost=rmq_data["vhost"] + ) + rmq_publisher.authenticate_and_connect( + username=rmq_data["username"], + password=rmq_data["password"] + ) + rmq_publisher.enable_delivery_confirmations() + yield rmq_publisher + + +@fixture(scope="package", name="rabbitmq_consumer") +def fixture_rabbitmq_consumer(rabbitmq_defaults): + rmq_data = verify_and_parse_mq_uri(RABBITMQ_URL) + rmq_consumer = RMQConsumer( + host=rmq_data["host"], + port=rmq_data["port"], + vhost=rmq_data["vhost"] + ) + rmq_consumer.authenticate_and_connect( + username=rmq_data["username"], + password=rmq_data["password"] + ) + yield rmq_consumer diff --git a/tests/network/ocrd_logging.conf b/tests/network/ocrd_logging.conf new file mode 100644 index 0000000000..ee081b7b56 --- /dev/null +++ b/tests/network/ocrd_logging.conf @@ -0,0 +1,150 @@ +# This is a template configuration file which allows customizing +# format and destination of log messages with OCR-D. +# It is meant as an example, and should be customized. +# To get into effect, you must put a copy (under the same name) +# into your CWD, HOME or /etc. These directories are searched +# in said order, and the first find wins. When no config file +# is found, the default logging configuration applies (cf. ocrd.logging.py). +# +# mandatory loggers section +# configure loggers with corresponding keys "root", "" +# each logger requires a corresponding configuration section below +# +[loggers] +keys=root,ocrd,ocrd_network,ocrd_tensorflow,ocrd_shapely_geos,ocrd_PIL,uvicorn,uvicorn_access,uvicorn_error,multipart + +# +# mandatory handlers section +# handle output for each logging "channel" +# i.e. console, file, smtp, syslog, http, ... +# each handler requires a corresponding configuration section below +# +[handlers] +keys=consoleHandler,fileHandler,processingServerHandler + +# +# optional custom formatters section +# format message fields, to be used differently by logging handlers +# each formatter requires a corresponding formatter section below +# +[formatters] +keys=defaultFormatter,detailedFormatter + +# +# default logger "root" using consoleHandler +# +[logger_root] +level=DEBUG +handlers=consoleHandler,fileHandler + + +# +# additional logger configurations can be added +# as separate configuration sections like below +# +# example logger "ocrd_workspace" uses fileHandler and overrides +# default log level "INFO" with custom level "DEBUG" +# "qualname" must match the logger label used in the corresponding +# ocrd module +# see in the module-of-interest (moi) +# +#[logger_ocrd_workspace] +#level=DEBUG +#handlers=fileHandler +#qualname=ocrd.workspace + +# ocrd loggers +[logger_ocrd] +level=DEBUG +handlers=consoleHandler,fileHandler +qualname=ocrd +propagate=0 + +[logger_ocrd_network] +level=DEBUG +handlers=consoleHandler,processingServerHandler +qualname=ocrd_network +propagate=0 + +# +# logger tensorflow +# +[logger_ocrd_tensorflow] +level=DEBUG +handlers=consoleHandler +qualname=tensorflow + +# +# logger shapely.geos +# +[logger_ocrd_shapely_geos] +level=DEBUG +handlers=consoleHandler +qualname=shapely.geos + + +# +# logger PIL +# +[logger_ocrd_PIL] +level=DEBUG +handlers=consoleHandler +qualname=PIL + +# +# uvicorn loggers +# +[logger_uvicorn] +level=INFO +handlers=consoleHandler +qualname=uvicorn +[logger_uvicorn_access] +level=DEBUG +handlers=consoleHandler +qualname=uvicorn.access +[logger_uvicorn_error] +level=DEBUG +handlers=consoleHandler +qualname=uvicorn.error +[logger_multipart] +level=DEBUG +handlers=consoleHandler +qualname=multipart + + + +# +# handle stderr output +# +[handler_consoleHandler] +class=StreamHandler +formatter=defaultFormatter +args=(sys.stderr,) + +# +# example logfile handler +# handle output with logfile +# +[handler_fileHandler] +class=FileHandler +formatter=defaultFormatter +args=('ocrd.log','a+') + +[handler_processingServerHandler] +class=FileHandler +formatter=defaultFormatter +args=('/ocrd-data/ocrd_processing_server_conf.log','a+') + +# +# default log format conforming to OCR-D (https://ocr-d.de/en/spec/cli#logging) +# +[formatter_defaultFormatter] +format=%(asctime)s.%(msecs)03d %(levelname)s %(name)s - %(message)s +datefmt=%H:%M:%S + +# +# store more logging context information +# +[formatter_detailedFormatter] +format=%(asctime)s.%(msecs)03d %(levelname)-8s (%(name)s)[%(filename)s:%(lineno)d] - %(message)s +datefmt=%H:%M:%S diff --git a/tests/network/ps_config.yml b/tests/network/ps_config.yml new file mode 100644 index 0000000000..655a847b8f --- /dev/null +++ b/tests/network/ps_config.yml @@ -0,0 +1,17 @@ +# the content of this config file is based on the .env +internal_callback_url: http://ps-docker-host.ocrd_network_test:8000 +process_queue: + address: rabbitmq-docker-host.ocrd_network_test + port: 5672 + skip_deployment: true + credentials: + username: network_test + password: network_test +database: + address: mongodb-docker-host.ocrd_network_test + port: 27017 + skip_deployment: true + credentials: + username: network_test + password: network_test +hosts: [] diff --git a/tests/network/test_db.py b/tests/network/test_db.py new file mode 100644 index 0000000000..6a69822883 --- /dev/null +++ b/tests/network/test_db.py @@ -0,0 +1,166 @@ +from datetime import datetime +from hashlib import md5 +from pathlib import Path +from pytest import raises +from tests.base import assets +from src.ocrd_network.models import DBProcessorJob, DBWorkflowScript, StateEnum +from src.ocrd_network.database import ( + sync_db_create_processing_job, + sync_db_get_processing_job, + sync_db_update_processing_job, + sync_db_create_workspace, + sync_db_get_workspace, + sync_db_update_workspace, + sync_db_create_workflow_script, + sync_db_get_workflow_script, + sync_db_find_first_workflow_script_by_content +) + + +def test_db_processing_job_create(mongo_client): + job_id = f'test_job_id_{datetime.now()}' + db_created_processing_job = sync_db_create_processing_job( + db_processing_job=DBProcessorJob( + job_id=job_id, + processor_name='ocrd-dummy', + state=StateEnum.cached, + path_to_mets='/ocrd/dummy/path', + input_file_grps=['DEFAULT'], + output_file_grps=['OCR-D-DUMMY'] + ) + ) + assert db_created_processing_job + db_found_processing_job = sync_db_get_processing_job(job_id=job_id) + assert db_found_processing_job + assert db_found_processing_job.job_id == job_id + assert db_found_processing_job.processor_name == 'ocrd-dummy' + assert db_found_processing_job.state == StateEnum.cached + assert db_found_processing_job.path_to_mets == '/ocrd/dummy/path' + assert db_found_processing_job.input_file_grps == ['DEFAULT'] + assert db_found_processing_job.output_file_grps == ['OCR-D-DUMMY'] + + with raises(ValueError): + sync_db_get_processing_job(job_id='non-existing-id') + + +def test_db_processing_job_update(mongo_client): + job_id = f'test_job_id_{datetime.now()}' + db_created_processing_job = sync_db_create_processing_job( + db_processing_job=DBProcessorJob( + job_id=job_id, + processor_name='ocrd-dummy', + state=StateEnum.cached, + path_to_mets='/ocrd/dummy/path', + input_file_grps=['DEFAULT'], + output_file_grps=['OCR-D-DUMMY'] + ) + ) + assert db_created_processing_job + db_found_processing_job = sync_db_get_processing_job(job_id=job_id) + assert db_found_processing_job + db_updated_processing_job = sync_db_update_processing_job(job_id=job_id, state=StateEnum.running) + assert db_found_processing_job != db_updated_processing_job + db_found_updated_processing_job = sync_db_get_processing_job(job_id=job_id) + assert db_found_updated_processing_job + assert db_found_updated_processing_job == db_updated_processing_job + assert db_found_updated_processing_job.state == StateEnum.running + + with raises(ValueError): + sync_db_update_processing_job(job_id='non-existing', state=StateEnum.running) + sync_db_update_processing_job(job_id=job_id, non_existing_field='dummy_value') + sync_db_update_processing_job(job_id=job_id, processor_name='non-updatable-field') + + +def test_db_workspace_create(mongo_client): + mets_path = assets.path_to('kant_aufklaerung_1784/data/mets.xml') + db_created_workspace = sync_db_create_workspace(mets_path=mets_path) + assert db_created_workspace + assert db_created_workspace.workspace_mets_path == mets_path + db_found_workspace = sync_db_get_workspace(workspace_mets_path=mets_path) + assert db_found_workspace + assert db_found_workspace == db_created_workspace + + with raises(ValueError): + sync_db_get_workspace(workspace_id='non-existing-id') + sync_db_get_workspace(workspace_mets_path='non-existing-mets') + + with raises(FileNotFoundError): + sync_db_create_workspace(mets_path='non-existing-mets') + + +def test_db_workspace_update(mongo_client): + mets_path = assets.path_to('kant_aufklaerung_1784-binarized/data/mets.xml') + dummy_mets_server_url = '/tmp/dummy.sock' + + db_created_workspace = sync_db_create_workspace(mets_path=mets_path) + assert db_created_workspace + db_found_workspace = sync_db_get_workspace(workspace_mets_path=mets_path) + assert db_found_workspace + assert db_created_workspace == db_found_workspace + + db_updated_workspace = sync_db_update_workspace( + workspace_mets_path=mets_path, + mets_server_url=dummy_mets_server_url + ) + assert db_updated_workspace + assert db_updated_workspace != db_created_workspace + + db_found_updated_workspace = sync_db_get_workspace(workspace_mets_path=mets_path) + assert db_found_updated_workspace + assert db_found_updated_workspace.workspace_mets_path == mets_path + assert db_found_updated_workspace.mets_server_url == dummy_mets_server_url + assert db_found_updated_workspace == db_updated_workspace + + +# TODO: There is no db wrapper implemented due to direct access in the processing server... +# TODO2: Should be refactored with proper asset access +def create_db_model_workflow_script( + workflow_id: str, + script_path: Path = Path(Path(__file__).parent, "dummy-workflow.txt") +) -> DBWorkflowScript: + workflow_id = workflow_id + with open(script_path, 'rb') as fp: + content = (fp.read()).decode("utf-8") + content_hash = md5(content.encode("utf-8")).hexdigest() + return DBWorkflowScript(workflow_id=workflow_id, content=content, content_hash=content_hash) + + +def test_db_workflow_script_create(mongo_client): + workflow_id = f'test_workflow_{datetime.now()}' + db_model_workflow_script = create_db_model_workflow_script(workflow_id=workflow_id) + db_created_workflow_script = sync_db_create_workflow_script( + db_workflow_script=db_model_workflow_script + ) + assert db_created_workflow_script + db_found_workflow_script = sync_db_get_workflow_script(workflow_id=workflow_id) + assert db_found_workflow_script + assert db_found_workflow_script == db_created_workflow_script + + with raises(ValueError): + sync_db_get_workflow_script(workflow_id='non-existing-id') + + +def test_db_find_workflow_script_by_content(mongo_client): + workflow_id = f'test_workflow_{datetime.now()}' + db_model_workflow_script = create_db_model_workflow_script(workflow_id=workflow_id) + db_created_workflow_script = sync_db_create_workflow_script( + db_workflow_script=db_model_workflow_script + ) + assert db_created_workflow_script + db_found_workflow_script = sync_db_find_first_workflow_script_by_content( + workflow_id=db_model_workflow_script.workflow_id + ) + assert db_found_workflow_script + assert db_found_workflow_script == db_created_workflow_script + + +# TODO: hard to implement without some refactoring in the ocrd_network +# and providing proper db wrappers +def test_db_workflow_job_create(): + pass + + +# TODO: hard to implement without some refactoring in the ocrd_network +# and providing proper db wrappers +def test_db_workflow_job_update(): + pass diff --git a/tests/network/test_processing_server.py b/tests/network/test_processing_server.py new file mode 100644 index 0000000000..6d039f5bc9 --- /dev/null +++ b/tests/network/test_processing_server.py @@ -0,0 +1,97 @@ +from time import sleep +from requests import get, post +from src.ocrd_network import NETWORK_AGENT_WORKER +from src.ocrd_network.models import StateEnum +from tests.base import assets +from tests.network.config import test_config + +PROCESSING_SERVER_URL = test_config.PROCESSING_SERVER_URL + + +def poll_till_timeout_fail_or_success(test_url: str, tries: int, wait: int) -> StateEnum: + job_state = StateEnum.unset + while tries > 0: + sleep(wait) + response = get(url=test_url) + assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" + job_state = response.json()["state"] + if job_state == StateEnum.success or job_state == StateEnum.failed: + break + tries -= 1 + return job_state + + +def test_processing_server_connectivity(): + test_url = f'{PROCESSING_SERVER_URL}/' + response = get(test_url) + assert response.status_code == 200, \ + f'Processing server is not reachable on: {test_url}, {response.status_code}' + message = response.json()['message'] + assert message.startswith('The home page of'), \ + f'Processing server home page message is corrupted' + + +# TODO: The processing workers are still not registered when deployed separately. +# Fix that by extending the processing server. +def test_processing_server_deployed_processors(): + test_url = f'{PROCESSING_SERVER_URL}/processor' + response = get(test_url) + processors = response.json() + assert response.status_code == 200, \ + f'Processing server: {test_url}, {response.status_code}' + assert processors == [], f'Mismatch in deployed processors' + + +def test_processing_server_processing_request(): + path_to_mets = assets.path_to('kant_aufklaerung_1784/data/mets.xml') + test_processing_job_input = { + "path_to_mets": path_to_mets, + "input_file_grps": ['OCR-D-IMG'], + "output_file_grps": ['OCR-D-DUMMY'], + "agent_type": NETWORK_AGENT_WORKER, + "parameters": {} + } + test_processor = 'ocrd-dummy' + test_url = f'{PROCESSING_SERVER_URL}/processor/run/{test_processor}' + response = post( + url=test_url, + headers={"accept": "application/json"}, + json=test_processing_job_input + ) + # print(response.json()) + assert response.status_code == 200, \ + f'Processing server: {test_url}, {response.status_code}' + processing_job_id = response.json()["job_id"] + assert processing_job_id + + job_state = poll_till_timeout_fail_or_success( + test_url=f"{PROCESSING_SERVER_URL}/processor/job/{processing_job_id}", + tries=10, + wait=10 + ) + assert job_state == StateEnum.success + + +def test_processing_server_workflow_request(): + # Note: the used workflow path is volume mapped + path_to_dummy_wf = "/ocrd-data/assets/dummy-workflow.txt" + path_to_mets = assets.path_to('kant_aufklaerung_1784/data/mets.xml') + + # submit the workflow job + test_url = f"{PROCESSING_SERVER_URL}/workflow/run?mets_path={path_to_mets}&page_wise=True" + response = post( + url=test_url, + headers={"accept": "application/json"}, + files={"workflow": open(path_to_dummy_wf, 'rb')} + ) + # print(response.json()) + assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" + wf_job_id = response.json()["job_id"] + assert wf_job_id + + job_state = poll_till_timeout_fail_or_success( + test_url=f"{PROCESSING_SERVER_URL}/workflow/job-simple/{wf_job_id}", + tries=30, + wait=10 + ) + assert job_state == StateEnum.success diff --git a/tests/network/test_rabbitmq.py b/tests/network/test_rabbitmq.py new file mode 100644 index 0000000000..951266e5dc --- /dev/null +++ b/tests/network/test_rabbitmq.py @@ -0,0 +1,79 @@ +from pika import BasicProperties +from pickle import dumps, loads +from tests.network.config import test_config + +DEFAULT_EXCHANGER_NAME = test_config.DEFAULT_EXCHANGER_NAME +DEFAULT_QUEUE = test_config.DEFAULT_QUEUE + + +def test_rmq_publish_then_consume_2_messages(rabbitmq_publisher, rabbitmq_consumer): + test_headers = {"Test Header": "Test Value"} + test_properties = BasicProperties( + app_id='webapi-processing-broker', + content_type='application/json', + headers=test_headers + ) + rabbitmq_publisher.publish_to_queue( + queue_name=DEFAULT_QUEUE, + message="RabbitMQ test 123", + exchange_name=DEFAULT_EXCHANGER_NAME, + properties=test_properties + ) + rabbitmq_publisher.publish_to_queue( + queue_name=DEFAULT_QUEUE, + message="RabbitMQ test 456", + exchange_name=DEFAULT_EXCHANGER_NAME, + properties=test_properties + ) + assert rabbitmq_publisher.message_counter == 2 + + # Consume the 1st message + method_frame, header_frame, message = rabbitmq_consumer.get_one_message( + queue_name=DEFAULT_QUEUE, + auto_ack=True + ) + assert method_frame.delivery_tag == 1 # 1st delivered message to this queue + assert method_frame.message_count == 1 # messages left in the queue + assert method_frame.redelivered is False + assert method_frame.exchange == DEFAULT_EXCHANGER_NAME + assert method_frame.routing_key == DEFAULT_QUEUE + # It's possible to assert header_frame the same way + assert message.decode() == "RabbitMQ test 123" + + # Consume the 2nd message + method_frame, header_frame, message = rabbitmq_consumer.get_one_message( + queue_name=DEFAULT_QUEUE, + auto_ack=True + ) + assert method_frame.delivery_tag == 2 # 2nd delivered message to this queue + assert method_frame.message_count == 0 # messages left in the queue + assert method_frame.redelivered is False + assert method_frame.exchange == DEFAULT_EXCHANGER_NAME + assert method_frame.routing_key == DEFAULT_QUEUE + # It's possible to assert header_frame the same way + assert message.decode() == "RabbitMQ test 456" + + +def test_rmq_publish_then_consume_ocrd_message(rabbitmq_publisher, rabbitmq_consumer): + ocrd_processing_message = { + "job_id": "Test_job_id", + "workflow_id": "Test_workflow_id", + "workspace_id": "Test_workspace_id" + } + message_bytes = dumps(ocrd_processing_message) + rabbitmq_publisher.publish_to_queue( + queue_name=DEFAULT_QUEUE, + message=message_bytes, + exchange_name=DEFAULT_EXCHANGER_NAME, + properties=None + ) + + method_frame, header_frame, message = rabbitmq_consumer.get_one_message( + queue_name=DEFAULT_QUEUE, + auto_ack=True + ) + assert method_frame.message_count == 0 # messages left in the queue + decoded_message = loads(message) + assert decoded_message["job_id"] == "Test_job_id" + assert decoded_message["workflow_id"] == "Test_workflow_id" + assert decoded_message["workspace_id"] == "Test_workspace_id" diff --git a/tests/test_logging_conf.py b/tests/test_logging_conf.py index 1bfe742b76..898722a413 100644 --- a/tests/test_logging_conf.py +++ b/tests/test_logging_conf.py @@ -30,7 +30,7 @@ def resetLogging(): def _fixture_loggin_conf(tmpdir): path_logging_conf_orig = os.path.join( - str(TEST_ROOT), 'ocrd_utils', 'ocrd_logging.conf') + str(TEST_ROOT), 'src', 'ocrd_utils', 'ocrd_logging.conf') path_logging_conf_dest = os.path.join(str(tmpdir), 'ocrd_logging.conf') shutil.copy(path_logging_conf_orig, path_logging_conf_dest) return str(tmpdir) diff --git a/tests/test_resource_manager.py b/tests/test_resource_manager.py index 785b568833..221b0a3af1 100644 --- a/tests/test_resource_manager.py +++ b/tests/test_resource_manager.py @@ -1,8 +1,9 @@ import os from pathlib import Path -import pdb +# import pdb from ocrd.resource_manager import OcrdResourceManager +from ocrd_utils import config from ocrd_utils.os import get_ocrd_tool_json from pytest import raises, fixture @@ -20,6 +21,8 @@ def test_resources_manager_config_default(monkeypatch, tmp_path): # arrange monkeypatch.setenv('HOME', str(tmp_path)) + if 'XDG_CONFIG_HOME' in os.environ: + monkeypatch.delenv('XDG_CONFIG_HOME', raising=False) # act mgr = OcrdResourceManager() @@ -27,8 +30,11 @@ def test_resources_manager_config_default(monkeypatch, tmp_path): # assert default_config_dir = os.path.join(os.environ['HOME'], '.config', 'ocrd') f = Path(default_config_dir) / CONST_RESOURCE_YML - assert f.exists() + assert os.environ['HOME'] == str(tmp_path) + assert config.HOME == tmp_path + assert Path.home() == tmp_path assert f == mgr.user_list + assert f.exists() assert mgr.add_to_user_database('ocrd-foo', f) # pdb.set_trace() @@ -43,9 +49,9 @@ def test_resources_manager_config_default(monkeypatch, tmp_path): def test_resources_manager_from_environment(tmp_path, monkeypatch): # arrange - monkeypatch.setenv('XDG_CONFIG_HOME', tmp_path) - monkeypatch.setenv('XDG_DATA_HOME', tmp_path) - monkeypatch.setenv('HOME', tmp_path) + monkeypatch.setenv('XDG_CONFIG_HOME', str(tmp_path)) + monkeypatch.setenv('XDG_DATA_HOME', str(tmp_path)) + monkeypatch.setenv('HOME', str(tmp_path)) # act mgr = OcrdResourceManager() diff --git a/tests/test_utils.py b/tests/test_utils.py index 2dac359c46..89ff6d90f3 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -21,6 +21,7 @@ is_string, membername, generate_range, + sparkline, nth_url_segment, remove_non_path_from_url, @@ -331,6 +332,12 @@ def test_partition_list(): assert partition_list(lst_13, chunks=4) == [[1, 2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 13]] assert partition_list(lst_13, chunks=4, chunk_index=1) == [[5, 6, 7]] +def test_sparkline(): + assert sparkline([5, 2, 3]) == '█▃▄' + assert sparkline([1000, 1, 2222]) == '▃ █' + assert sparkline([8, 7, 6, 5, 4, 3, 2, 1, 0]) == '█▇▆▅▄▃▂▁ ' + assert sparkline([-1, None, 'forty-two']) == '' + if __name__ == '__main__': main(__file__) diff --git a/tests/validator/test_xsd_validator.py b/tests/validator/test_xsd_validator.py index 4d4deadc15..d0150338dd 100644 --- a/tests/validator/test_xsd_validator.py +++ b/tests/validator/test_xsd_validator.py @@ -25,7 +25,7 @@ def test_constructor(self): def test_mets_empty(self): with TemporaryDirectory() as tempdir: mets_path = Path(tempdir, 'mets.xml') - mets_path.write_bytes(METS_XML_EMPTY) + mets_path.write_text(METS_XML_EMPTY) report = XsdMetsValidator.validate(mets_path) self.assertEqual(len(report.errors), 2) self.assertEqual(report.errors[0],