Skip to content

Commit

Permalink
Merge branch 'master' into ocrd-tool-json-root
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Jan 30, 2024
2 parents 9326404 + 69d9757 commit e07c5f2
Show file tree
Hide file tree
Showing 43 changed files with 1,273 additions and 81 deletions.
4 changes: 2 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: 2
version: 2.1

orbs:
python: circleci/python@2.0.3
python: circleci/python@2.1.1

jobs:

Expand Down
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@
!LICENSE
!README.md
!.git
!tests
!requirements_test.txt
!.gitmodules
58 changes: 58 additions & 0 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: Run ocrd network integration tests

on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]

jobs:
build:

runs-on: ${{ matrix.os }}

strategy:
fail-fast: false
matrix:
python-version:
- '3.7'
- '3.8'
- '3.9'
- '3.10'
- '3.11'
os:
- ubuntu-22.04
# - macos-latest

steps:
- uses: actions/checkout@v3
- name: Set up Homebrew
id: set-up-homebrew
uses: Homebrew/actions/setup-homebrew@master
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
if [[ "${{ matrix.os }}" == "ubuntu"* ]];then
sudo apt-get -y update
sudo make deps-ubuntu
else
HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 \
HOMEBREW_NO_AUTO_UPDATE=1 \
brew install imagemagick geos bash # opencv
fi
make install deps-test
- name: Install Docker on macOS
if: runner.os == 'macos'
run: |
brew install docker docker-compose
colima start
- name: Test network integration with pytest
run: |
if [[ "${{ matrix.os }}" == "macos"* ]];then
make integration-test DOCKER_COMPOSE=docker-compose
else
make integration-test
fi
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@ Versioned according to [Semantic Versioning](http://semver.org/).

## Unreleased

Added:

* Basic integration test for `ocrd_network`, #1164

Fixed:

* METS Server: UDS sockets are removed on process exit, #117

Changed:

* replace license-incompatible sparkline library with a simpler implementation, #1176
* remove all pkg_resources calls with modern alternatives, no more run-time setuptools dependency, #1174

## [2.61.2] - 2024-01-24

Fixed:
Expand Down
22 changes: 17 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ARG BASE_IMAGE
FROM $BASE_IMAGE
FROM $BASE_IMAGE as ocrd_core_base
ARG FIXUP=echo
MAINTAINER OCR-D
ENV DEBIAN_FRONTEND noninteractive
Expand Down Expand Up @@ -33,13 +33,25 @@ RUN apt-get update && apt-get -y install software-properties-common \
curl \
sudo \
git \
&& make deps-ubuntu \
&& python3 -m venv /usr/local \
&& make deps-ubuntu
RUN python3 -m venv /usr/local \
&& hash -r \
&& make install \
&& eval $FIXUP \
&& rm -rf /build-ocrd
&& eval $FIXUP

WORKDIR /data

CMD ["/usr/local/bin/ocrd", "--help"]

FROM ocrd_core_base as ocrd_core_test
WORKDIR /build-ocrd
COPY Makefile .
RUN make assets
COPY tests ./tests
COPY .gitmodules .
COPY requirements_test.txt .
RUN pip install -r requirements_test.txt
RUN mkdir /ocrd-data && chmod 777 /ocrd-data

CMD ["yes"]
# CMD ["make", "test", "integration-test"]
2 changes: 1 addition & 1 deletion Dockerfile.cuda
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ARG BASE_IMAGE
FROM $BASE_IMAGE
FROM $BASE_IMAGE AS ocrd_core_base

ENV MAMBA_EXE=/usr/local/bin/conda
ENV MAMBA_ROOT_PREFIX=/conda
Expand Down
14 changes: 12 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ TESTDIR = $(CURDIR)/tests
PYTEST_ARGS = --continue-on-collection-errors
VERSION = $(shell cat VERSION)

DOCKER_COMPOSE = docker compose

SPHINX_APIDOC =

BUILD_ORDER = ocrd_utils ocrd_models ocrd_modelfactory ocrd_validators ocrd_network ocrd
Expand Down Expand Up @@ -129,7 +131,7 @@ build:
# (Re)install the tool
install: #build
# not stricttly necessary but a precaution against outdated python build tools, https://github.com/OCR-D/core/pull/1166
$(PIP) install -U pip wheel setuptools
$(PIP) install -U pip wheel
$(PIP_INSTALL) . $(PIP_INSTALL_CONFIG_OPTION)
@# workaround for shapely#1598
$(PIP) config set global.no-binary shapely
Expand Down Expand Up @@ -213,9 +215,16 @@ test: assets
$(PYTHON) \
-m pytest $(PYTEST_ARGS) --durations=10\
--ignore-glob="$(TESTDIR)/**/*bench*.py" \
--ignore-glob="$(TESTDIR)/network/*.py" \
$(TESTDIR)
cd ocrd_utils ; $(PYTHON) -m pytest --continue-on-collection-errors -k TestLogging -k TestDecorators $(TESTDIR)

INTEGRATION_TEST_IN_DOCKER = docker exec core_test
integration-test:
$(DOCKER_COMPOSE) --file tests/network/docker-compose.yml up -d
-$(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_rmq or test_db or test_processing_server' -v
$(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans

benchmark:
$(PYTHON) -m pytest $(TESTDIR)/model/test_ocrd_mets_bench.py

Expand Down Expand Up @@ -271,6 +280,7 @@ pyclean:
rm -rf ./dist
rm -rf htmlcov
rm -rf .benchmarks
rm -rf **/*.egg-info
rm -f **/*.pyc
-find . -name '__pycache__' -exec rm -rf '{}' \;
rm -rf .pytest_cache
Expand All @@ -296,7 +306,7 @@ docker-cuda: DOCKER_FILE = Dockerfile.cuda
docker-cuda: docker

docker docker-cuda:
docker build --progress=plain -f $(DOCKER_FILE) -t $(DOCKER_TAG) --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) $(DOCKER_ARGS) .
docker build --progress=plain -f $(DOCKER_FILE) -t $(DOCKER_TAG) --target ocrd_core_base --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) $(DOCKER_ARGS) .

# Build wheels and source dist and twine upload them
pypi: build
Expand Down
13 changes: 7 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
atomicwrites >= 1.3.0
bagit >= 1.7.0
bagit_profile >= 1.3.0
beanie~=1.7
click >=7
Deprecated == 1.2.0
docker
fastapi
fastapi>=0.78.0
filetype
Flask
frozendict>=2.3.4
gdown
httpx>=0.22.0
importlib_metadata ; python_version < '3.8'
importlib_resources ; python_version < '3.9'
jsonschema
lxml
memory-profiler >= 0.58.0
# XXX explicitly do not restrict the numpy version because different
# tensorflow versions might require different versions
numpy
ocrd-fork-bagit >= 1.8.1.post2
ocrd-fork-bagit_profile >= 1.3.0.post1
opencv-python-headless
paramiko
pika>=1.2.0
Expand All @@ -27,8 +30,6 @@ pyyaml
requests < 2.30
requests_unixsocket
shapely
sparklines >= 0.4.2
# tensorflow versions might require different versions
uvicorn
uvicorn>=0.17.6
# XXX explicitly do not restrict the numpy version because different

4 changes: 2 additions & 2 deletions src/ocrd/cli/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,14 +286,14 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
\b
Examples:
ocrd workspace bulk-add \\
--regex '(?P<fileGrp>[^/]+)/page_(?P<pageid>.*)\.[^.]+' \\
--regex '(?P<fileGrp>[^/]+)/page_(?P<pageid>.*)\\.[^.]+' \\
--page-id 'PHYS_{{ pageid }}' \\
--file-grp "{{ fileGrp }}" \\
path/to/files/*/*.*
\b
echo "path/to/src/file.xml SEG/page_p0001.xml" \\
| ocrd workspace bulk-add \\
--regex '(?P<src>.*?) (?P<fileGrp>.+?)/page_(?P<pageid>.*)\.(?P<ext>[^\.]*)' \\
--regex '(?P<src>.*?) (?P<fileGrp>.+?)/page_(?P<pageid>.*)\\.(?P<ext>[^\\.]*)' \\
--file-id 'FILE_{{ fileGrp }}_{{ pageid }}' \\
--page-id 'PHYS_{{ pageid }}' \\
--file-grp "{{ fileGrp }}" \\
Expand Down
6 changes: 3 additions & 3 deletions src/ocrd/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Constants for ocrd.
"""
from pkg_resources import resource_filename
from ocrd_utils import resource_filename

__all__ = [
'TMP_PREFIX',
Expand All @@ -18,7 +18,7 @@
DEFAULT_UPLOAD_FOLDER = '/tmp/uploads-ocrd-core'
DOWNLOAD_DIR = '/tmp/ocrd-core-downloads'
DEFAULT_REPOSITORY_URL = 'http://localhost:5000/'
BASHLIB_FILENAME = resource_filename(__name__, 'lib.bash')
RESOURCE_LIST_FILENAME = resource_filename(__name__, 'resource_list.yml')
BASHLIB_FILENAME = resource_filename(__package__, 'lib.bash')
RESOURCE_LIST_FILENAME = resource_filename(__package__, 'resource_list.yml')
RESOURCE_USER_LIST_COMMENT = "# OCR-D private resource list (consider sending a PR with your own resources to OCR-D/core)"
BACKUP_DIR = '.backup'
22 changes: 12 additions & 10 deletions src/ocrd/mets_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,24 @@
# METS server functionality
"""
import re
from os import environ, _exit, chmod
from io import BytesIO
from typing import Any, Dict, Optional, Union, List, Tuple
from os import _exit, chmod
from typing import Dict, Optional, Union, List, Tuple
from pathlib import Path
from urllib.parse import urlparse
import socket
import atexit

from fastapi import FastAPI, Request, File, Form, Response
from fastapi import FastAPI, Request, Form, Response
from fastapi.responses import JSONResponse
from requests import request, Session as requests_session
from requests import Session as requests_session
from requests.exceptions import ConnectionError
from requests_unixsocket import Session as requests_unixsocket_session
from pydantic import BaseModel, Field, ValidationError

import uvicorn

from ocrd_models import OcrdMets, OcrdFile, ClientSideOcrdFile, OcrdAgent, ClientSideOcrdAgent
from ocrd_utils import initLogging, getLogger, deprecated_alias
from ocrd_models import OcrdFile, ClientSideOcrdFile, OcrdAgent, ClientSideOcrdAgent
from ocrd_utils import getLogger, deprecated_alias

#
# Models
Expand Down Expand Up @@ -197,9 +197,10 @@ def __init__(self, workspace, url):
self.log = getLogger(f'ocrd.mets_server[{self.url}]')

def shutdown(self):
self.log.info("Shutting down METS server")
if self.is_uds:
Path(self.url).unlink()
if Path(self.url).exists():
self.log.warning(f'UDS socket {self.url} still exists, removing it')
Path(self.url).unlink()
# os._exit because uvicorn catches SystemExit raised by sys.exit
_exit(0)

Expand Down Expand Up @@ -296,7 +297,7 @@ async def stop():
"""
Stop the server
"""
getLogger('ocrd.models.ocrd_mets').info('Shutting down')
getLogger('ocrd.models.ocrd_mets').info(f'Shutting down METS Server {self.url}')
workspace.save_mets()
self.shutdown()

Expand All @@ -308,6 +309,7 @@ async def stop():
self.log.debug(f"chmod 0o677 {self.url}")
server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
server.bind(self.url) # creates the socket file
atexit.register(self.shutdown)
server.close()
chmod(self.url, 0o666)
uvicorn_kwargs = {'uds': self.url}
Expand Down
7 changes: 3 additions & 4 deletions src/ocrd/processor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
'run_processor'
]

from warnings import warn
from pkg_resources import resource_filename
from os.path import exists
from shutil import copyfileobj
import json
Expand All @@ -30,7 +28,8 @@
list_resource_candidates,
pushd_popd,
list_all_resources,
get_processor_resource_types
get_processor_resource_types,
resource_filename,
)
from ocrd_validators import ParameterValidator
from ocrd_models.ocrd_page import MetadataItemType, LabelType, LabelsType
Expand Down Expand Up @@ -266,7 +265,7 @@ def moduledir(self):
"""
The filesystem path of the module directory.
"""
return resource_filename(self.module, '')
return resource_filename(self.module, '.')

@property
def input_files(self):
Expand Down
6 changes: 3 additions & 3 deletions src/ocrd/processor/builtin/dummy_processor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# pylint: disable=missing-module-docstring,invalid-name
from os.path import join, basename
from pkg_resources import resource_string

import click

Expand All @@ -13,11 +12,12 @@
make_file_id,
MIME_TO_EXT,
MIMETYPE_PAGE,
parse_json_string_with_comments
parse_json_string_with_comments,
resource_string
)
from ocrd_modelfactory import page_from_file

OCRD_TOOL = parse_json_string_with_comments(resource_string(__name__, 'dummy/ocrd-tool.json').decode('utf8'))
OCRD_TOOL = parse_json_string_with_comments(resource_string(__package__ + '.dummy', 'ocrd-tool.json'))

class DummyProcessor(Processor):
"""
Expand Down
Loading

0 comments on commit e07c5f2

Please sign in to comment.