Skip to content

Commit

Permalink
Merge branch 'master' into new-processor-api
Browse files Browse the repository at this point in the history
  • Loading branch information
kba authored Aug 14, 2024
2 parents 5ffe3cb + 6041785 commit dd3046e
Show file tree
Hide file tree
Showing 37 changed files with 547 additions and 254 deletions.
53 changes: 20 additions & 33 deletions .github/workflows/docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ on:
workflow_dispatch: # run manually

env:
GHCRIO_DOCKER_TAG: ghcr.io/ocr-d/core
DOCKERIO_DOCKER_TAG: docker.io/ocrd/core
# FIXME: linux/arm/v7 disabled as long as scikit-build/cmake-python-distributions#503 is unresolved
# PLATFORMS: linux/amd64,linux/arm/v7,linux/arm64/v8,linux/ppc64le
PLATFORMS: linux/amd64,linux/arm64/v8,linux/ppc64le

jobs:

Expand All @@ -17,8 +18,18 @@ jobs:
permissions:
packages: write
contents: read

env:
DOCKER_BASE_TAG: ghcr.io/ocr-d docker.io/ocrd
# TODO(kba): make the interpolation work correctly
# DOCKER_BUILD: docker buildx build --progress=plain --platform ${{ env.PLATFORMS }} --push
# TODO(kba): Investigate why ppc64le build hangs on "Installing build dependencies"
# TODO(kba): Investigate why arm64 fails with .buildkit_qemu_emulator: /usr/local/bin/conda: Invalid ELF image for this architecture
DOCKER_BUILD: docker buildx build --progress=plain --platform linux/amd64 --push
steps:
- name: Export variables
run: |
echo "DOCKER_BASE_TAG=${{ env.DOCKER_BASE_TAG }}" >> $GITHUB_ENV
echo "DOCKER_BUILD=${{ env.DOCKER_BUILD }}" >> $GITHUB_ENV
- name: Checkout
uses: actions/checkout@v4
with:
Expand All @@ -28,19 +39,6 @@ jobs:
- # Activate cache export feature to reduce build time of images
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build the Docker image
run: make docker
- name: Build the Docker image with GPU support
run: make docker-cuda
- name: Alias Docker images
# default tag uses docker.io, so tag post-hoc
run: |
docker tag ocrd/core ${{ env.GHCRIO_DOCKER_TAG }}
docker tag ocrd/core-cuda ${{ env.GHCRIO_DOCKER_TAG }}-cuda
- name: Smoke Test that ocrd --help works
run: |
docker run --rm ${{ env.GHCRIO_DOCKER_TAG }} ocrd --version
docker run --rm ${{ env.GHCRIO_DOCKER_TAG }}-cuda ocrd --version
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
Expand All @@ -52,20 +50,9 @@ jobs:
with:
username: ${{ secrets.DOCKERIO_USERNAME }}
password: ${{ secrets.DOCKERIO_PASSWORD }}
- name: Push images to Github Container Registry
run: |
docker push ${{ env.GHCRIO_DOCKER_TAG }}:latest
docker push ${{ env.GHCRIO_DOCKER_TAG }}-cuda:latest
- name: Push images to Docker Hub
run: |
docker tag ${{ env.GHCRIO_DOCKER_TAG }} ${{ env.DOCKERIO_DOCKER_TAG }}
docker tag ${{ env.GHCRIO_DOCKER_TAG }}-cuda ${{ env.DOCKERIO_DOCKER_TAG }}-cuda
docker push ${{ env.DOCKERIO_DOCKER_TAG }}:latest
docker push ${{ env.DOCKERIO_DOCKER_TAG }}-cuda:latest
docker push ${{ env.DOCKERIO_DOCKER_TAG }}:latest
docker push ${{ env.DOCKERIO_DOCKER_TAG }}-cuda:latest
docker tag ${{ env.DOCKERIO_DOCKER_TAG }} ${{ env.DOCKERIO_DOCKER_TAG }}:$(git describe --tags --abbrev=0)
docker tag ${{ env.DOCKERIO_DOCKER_TAG }}-cuda ${{ env.DOCKERIO_DOCKER_TAG }}-cuda:$(git describe --tags --abbrev=0)
docker push ${{ env.DOCKERIO_DOCKER_TAG }}:$(git describe --tags --abbrev=0)
docker push ${{ env.DOCKERIO_DOCKER_TAG }}-cuda:$(git describe --tags --abbrev=0)
- name: Build the Docker image
run: make docker
- name: Build the Docker image with GPU support
run: make docker-cuda
- name: Build the Docker images with GPU support and ML frameworks
run: make docker-cuda-tf1 docker-cuda-tf2 docker-cuda-torch
12 changes: 8 additions & 4 deletions .github/workflows/unit-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,18 @@ jobs:
make install deps-test
- name: Test with pytest
run: |
make test benchmark
mkdir -p test-results
make test benchmark PYTEST_ARGS=--junitxml=test-results/test.xml
- uses: test-summary/action@v2
with:
paths: "test-results/test.xml"
- name: test to ensure that --editable install works
run: |
make install-dev; ocrd --version
- name: Lint with flake8
run: |
python -m pip install flake8
python -m pip install flake8 flake8-github
# stop the build if there are Python syntax errors or undefined names
flake8 src --count --exit-zero --select=E9,F63,F7,F82 --show-source --statistics
flake8 src --count --exit-zero --select=E9,F63,F7,F82 --show-source --statistics --format=github
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 src --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
flake8 src --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --format=github
8 changes: 6 additions & 2 deletions .scrutinizer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@ build:
python:
version: 3.8.2
virtualenv: true
variables:
DEBIAN_FRONTEND: noninteractive
nodes:
analysis:
dependencies:
override:
- sudo make deps-ubuntu
- make install
- echo "Skipped"
# - command: sudo make deps-ubuntu
# idle_timeout: 600
# - make install
tests:
override:
- py-scrutinizer-run
Expand Down
58 changes: 57 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,58 @@ Versioned according to [Semantic Versioning](http://semver.org/).

## Unreleased

Changed:

* ocrd_network: Use `ocrd-all-tool.json` bundled by core instead of download from website, #1257, #1260
* `ocrd workspace clone`/`Resolver.workspace_from_url`: with `clobber_mets=False`, raise a FileExistsError for existing mets.xml on disk, #563, #1268
* `ocrd workspace find --download`: print the the correct, up-to-date field, not `None`, #1202, #1266

## [2.67.2] - 2024-07-19

Fixed:

* Run `multiprocessing.set_start_method('fork')` only for OSX, #1261
* Broken PyPI release, #1262

## [2.67.1] - 2024-07-17

Fixed:

- Build and tests fixed, no functional changes from #1258

## [2.67.0] - 2024-07-16

Changed:

- Additional docker base images with preinstalled tensorflow 1 (`core-cuda-tf1`), tensorflow 2 (`core-cuda-tf2`) and torch (`core-cuda-torch`), #1239
- Resource Manager: Skip instead of raise an exception download if target file already exists (unless `--overwrite`), #1246
- Resource Manager: Try to use bundled `ocrd-all-tool.json` if available, #1250, OCR-D/all#444

Added:

- `ocrd process` does support `-U/--mets-server`, #1243

Fixed:

- `ocrd process`-derived tasks are not run in a temporary directory when not called from within workspace, #1243
- regression from #1238 where processors failed that had required parameters, #1255, #1256
- METS Server: Unlink UDS sockert file if it exists before startup, #1244
- Resource Manager: Do not create zero-size files for failing downloads, #1201, #1246
- Workspace.add_file: Allow multiple processors to create file group folders simultaneously, #1203, #1253
- Resource Manager: Do not try to run `--dump-json` for known non-processors `ocrd-{cis-data,import,make}`, #1218, #1249
- Resource Manager: Properly handle copying of directories, #1237, #1248
- bashlib: regression in parsing JSON from introducing parameter preset files, #1258

Removed:

- Defaults for `-I/--input-file-grp`/`-O/--output-file-grp`, #1256, #274

## [2.66.1] - 2024-06-26

Fixed:

* GHA Docker: build docker.io first, then tag ghcr.io

## [2.66.0] - 2024-06-07

Fixed:
Expand Down Expand Up @@ -2092,8 +2144,12 @@ Fixed
## [0.0.1] - 2018-04-17

Initial Release
]

<!-- link-labels -->
[2.67.2]: ../../compare/v2.67.2..v2.67.1
[2.67.1]: ../../compare/v2.67.1..v2.67.0
[2.67.0]: ../../compare/v2.67.0..v2.66.1
[2.66.1]: ../../compare/v2.66.1..v2.66.0
[2.66.0]: ../../compare/v2.66.0..v2.65.0
[2.65.0]: ../../compare/v2.65.0..v2.64.1
[2.64.1]: ../../compare/v2.64.1..v2.64.0
Expand Down
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ RUN python3 -m venv /usr/local \
&& hash -r \
&& make install-dev \
&& eval $FIXUP
# Smoke Test
RUN ocrd --version

WORKDIR /data

Expand Down
32 changes: 17 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ deps-tf1:
fi

deps-tf2:
if $(PYTHON) -c 'import sys; print("%u.%u" % (sys.version_info.major, sys.version_info.minor))' | fgrep 3.8 && \
if $(PYTHON) -c 'import sys; print("%u.%u" % (sys.version_info.major, sys.version_info.minor))' | fgrep 3.8; then \
$(PIP) install tensorflow; \
else \
$(PIP) install "tensorflow[and-cuda]"; \
Expand All @@ -162,7 +162,7 @@ deps-torch:

# Dependencies for deployment in an ubuntu/debian linux
deps-ubuntu:
apt-get install -y python3 imagemagick libgeos-dev
apt-get install -y python3 imagemagick libgeos-dev libxml2-dev libxslt-dev libssl-dev

# Install test python deps via pip
deps-test:
Expand Down Expand Up @@ -361,44 +361,46 @@ pyclean:
.PHONY: docker docker-cuda

# Additional arguments to docker build. Default: '$(DOCKER_ARGS)'
DOCKER_ARGS =
DOCKER_ARGS ?=
DOCKER_BASE_TAG ?= ocrd
DOCKER_BUILD ?= docker build --progress=plain

# Build docker image
docker: DOCKER_BASE_IMAGE = ubuntu:20.04
docker: DOCKER_TAG = ocrd/core
docker: DOCKER_TAG = $(DOCKER_BASE_TAG:%=%/core)
docker: DOCKER_FILE = Dockerfile

# Build extended sets for maximal layer sharing
docker-cuda: DOCKER_BASE_IMAGE = ocrd/core
docker-cuda: DOCKER_TAG = ocrd/core-cuda
docker-cuda: DOCKER_BASE_IMAGE = $(DOCKER_BASE_TAG)/core
docker-cuda: DOCKER_TAG = $(DOCKER_BASE_TAG:%=%/core-cuda)
docker-cuda: DOCKER_FILE = Dockerfile.cuda

docker-cuda: docker

docker-cuda-tf1: DOCKER_BASE_IMAGE = ocrd/core-cuda
docker-cuda-tf1: DOCKER_TAG = ocrd/core-cuda-tf1
docker-cuda-tf1: DOCKER_BASE_IMAGE = $(DOCKER_BASE_TAG)/core-cuda
docker-cuda-tf1: DOCKER_TAG = $(DOCKER_BASE_TAG:%=%/core-cuda-tf1)
docker-cuda-tf1: DOCKER_FILE = Dockerfile.cuda-tf1

docker-cuda-tf1: docker-cuda

docker-cuda-tf2: DOCKER_BASE_IMAGE = ocrd/core-cuda
docker-cuda-tf2: DOCKER_TAG = ocrd/core-cuda-tf2
docker-cuda-tf2: DOCKER_BASE_IMAGE = $(DOCKER_BASE_TAG)/core-cuda
docker-cuda-tf2: DOCKER_TAG = $(DOCKER_BASE_TAG:%=%/core-cuda-tf2)
docker-cuda-tf2: DOCKER_FILE = Dockerfile.cuda-tf2

docker-cuda-tf2: docker-cuda

docker-cuda-torch: DOCKER_BASE_IMAGE = ocrd/core-cuda
docker-cuda-torch: DOCKER_TAG = ocrd/core-cuda-torch
docker-cuda-torch: DOCKER_BASE_IMAGE = $(DOCKER_BASE_TAG)/core-cuda
docker-cuda-torch: DOCKER_TAG = $(DOCKER_BASE_TAG:%=%/core-cuda-torch)
docker-cuda-torch: DOCKER_FILE = Dockerfile.cuda-torch

docker-cuda-torch: docker-cuda

docker docker-cuda docker-cuda-tf1 docker-cuda-tf2 docker-cuda-torch:
docker build --progress=plain -f $(DOCKER_FILE) -t $(DOCKER_TAG) --target ocrd_core_base --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) $(DOCKER_ARGS) .
docker docker-cuda docker-cuda-tf1 docker-cuda-tf2 docker-cuda-torch:
$(DOCKER_BUILD) -f $(DOCKER_FILE) $(DOCKER_TAG:%=-t %) --target ocrd_core_base --build-arg BASE_IMAGE=$(lastword $(DOCKER_BASE_IMAGE)) $(DOCKER_ARGS) .

# Build wheels and source dist and twine upload them
pypi: build
twine upload dist/ocrd-$(VERSION)*{tar.gz,whl}
twine upload --verbose dist/ocrd-$(VERSION)*{tar.gz,whl}

pypi-workaround: build-workaround
for dist in $(BUILD_ORDER);do twine upload dist/$$dist-$(VERSION)*{tar.gz,whl};done
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.0.0a1
3.0.0a1
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
atomicwrites >= 1.3.0
beanie~=1.7
click >=7
cryptography < 43.0.0
Deprecated == 1.2.0
docker
fastapi>=0.78.0
Expand All @@ -27,9 +28,8 @@ pydantic==1.*
python-magic
python-multipart
pyyaml
requests < 2.30
requests_unixsocket
requests
requests_unixsocket2
shapely
uvicorn
uvicorn>=0.17.6

1 change: 1 addition & 0 deletions requirements_test.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
autopep8
cryptography < 43.0.0
pytest >= 4.0.0
generateDS == 2.35.20
pytest-benchmark >= 3.2.3
Expand Down
5 changes: 3 additions & 2 deletions src/ocrd/cli/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,15 @@
@click.command('process')
@ocrd_loglevel
@click.option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME)
@click.option('-U', '--mets-server-url', help="TCP host URI or UDS path of METS server")
@click.option('-g', '--page-id', help="ID(s) of the pages to process")
@click.option('--overwrite', is_flag=True, default=False, help="Remove output pages/images if they already exist")
@click.argument('tasks', nargs=-1, required=True)
def process_cli(log_level, mets, page_id, tasks, overwrite):
def process_cli(log_level, mets, mets_server_url, page_id, tasks, overwrite):
"""
Process a series of tasks
"""
initLogging()
log = getLogger('ocrd.cli.process')
run_tasks(mets, log_level, page_id, tasks, overwrite)
run_tasks(mets, log_level, page_id, tasks, overwrite=overwrite, mets_server_url=mets_server_url)
log.info("Finished")
Loading

0 comments on commit dd3046e

Please sign in to comment.