diff --git a/.pylintrc b/.pylintrc
index b2125d824c..2e3af4288b 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,19 +1,22 @@
 [MASTER]
-extension-pkg-whitelist=lxml
-ignored-modules=cv2,tesserocr,ocrd.model
+extension-pkg-whitelist=lxml,pydantic
+ignored-modules=cv2,tesserocr,ocrd_models.ocrd_page_generateds
+ignore-paths=ocrd_page_generateds.py
+ignore-patterns=.*generateds.*
 
 [MESSAGES CONTROL]
-ignore-patterns='.*generateds.*'
 disable =
     fixme,
-    E501,
+    line-too-long,
+    consider-using-f-string,
+    logging-fstring-interpolation,
     trailing-whitespace,
     logging-not-lazy,
     inconsistent-return-statements,
+    disallowed-name,
     invalid-name,
     line-too-long,
     missing-docstring,
-    no-self-use,
     wrong-import-order,
     too-many-nested-blocks,
     superfluous-parens,
@@ -25,13 +28,9 @@ disable =
     ungrouped-imports,
     useless-object-inheritance,
     useless-import-alias,
-    bad-continuation,
     no-else-return,
     logging-not-lazy
 
-[FORMAT]
-no-space-check=empty-line
-
 [DESIGN]
 # Maximum number of arguments for function / method
 max-args=12
@@ -40,7 +39,7 @@ max-locals=30
 # Maximum number of return / yield for function / method body
 max-returns=12
 # Maximum number of branch for function / method body
-max-branchs=30
+max-branches=30
 # Maximum number of statements in function / method body
 max-statements=60
 # Maximum number of parents for a class (see R0901).
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 351f5a56aa..04ea2d42a1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,164 @@ Versioned according to [Semantic Versioning](http://semver.org/).
 
 ## Unreleased
 
+## [3.0.0b7] - 2024-11-12
+
+Fixed:
+ - `initLogging`: only add root handler instead of multiple redundant handlers with `propagate=false`
+ - `setOverrideLogLevel`: override all currently active loggers' level
+
+Changed:
+ - :fire: logging: increase default root (not `ocrd`) level from `INFO` to `WARNING`
+ - :fire: `initLogging`: do not remove any previous handlers/levels, unless `force_reinit`
+ - :fire: `disableLogging`: remove all handlers, reset all levels - instead of being selective
+ - :fire: Processor: replace `weakref` with `__del__` to trigger `shutdown`
+ - :fire: `OCRD_MAX_PARALLEL_PAGES>1`: log via `QueueHandler` in subprocess, `QueueListener` in main
+
+## [3.0.0b6] - 2024-10-30
+
+Fixed:
+ - `OcrdMets.get_physical_pages`: cover `return_divs` w/o `for_fileIds` and `for_pageIds`
+
+Changed:
+ - :fire: `ocrd_utils.initLogging`: also add handler to root logger (as in file config),
+   but disable message propagation to avoid duplication
+ - only import `ocrd_network` in `src/ocrd/decorators/__init__.py` once needed
+ - `Processor.process_page_file`: skip computing `process_page_pcgts` if output already exists,  
+   but `OCRD_EXISTING_OUTPUT!=OVERWRITE`
+ - :fire: `OCRD_MAX_PARALLEL_PAGES>1`: switch from multithreading to multiprocessing, depend on
+   `loky` instead of stdlib `concurrent.futures`
+ - `OCRD_PROCESSING_PAGE_TIMEOUT>0`: actually enforce timeout within worker
+ - `OCRD_MAX_MISSING_OUTPUTS>0`: abort early if too many failures already, prospectively
+ - `Processor.process_workspace`: split up into overridable sub-methods:
+   - `process_workspace_submit_tasks` (iterate input file group and schedule page tasks)
+     - `process_workspace_submit_page_task` (download input files and submit single page task)
+   - `process_workspace_handle_tasks` (monitor page tasks and aggregate results)
+     - `process_workspace_handle_page_task` (await single page task and handle errors)
+
+
+## [3.0.0b5] - 2024-09-16
+
+Fixed:
+  - tests: ensure `ocrd_utils.config` gets reset whenever changing it globally
+  - `OcrdMetsServer.add_file`: pass on `force` kwarg
+  - `ocrd.cli.workspace`: consistently pass on `--mets-server-url` and `--backup`
+  - `ocrd.cli.validate "tasks"`: pass on `--mets-server-url`
+  - `ocrd.cli.bashlib "input-files"`: pass on `--mets-server-url`
+  - `lib.bash input-files`: pass on `--mets-server-url`, `--overwrite`, and parameters
+  - `lib.bash`: fix `errexit` handling
+  - `ocrd.cli.ocrd-tool "resolve-resource"`: forgot to actually print result
+
+Changed:
+  - :fire: `Processor` / `Workspace.add_file`: always `force` if `OCRD_EXISTING_OUTPUT==OVERWRITE`
+  - :fire: `Processor.verify`: revert 3.0.0b1 enforcing cardinality checks (stay backwards compatible)
+  - :fire: `Processor.verify`: check output fileGrps, too
+     (must not exist unless `OCRD_EXISTING_OUTPUT=OVERWRITE|SKIP` or disjoint `--page-id` range)
+  - lib.bash `input-files`: do not try to validate tasks here (now covered by `Processor.verify()`)
+  - `run_processor`: be robust if `ocrd_tool` is missing `steps`
+  - `PcGtsType.PageType.id` via `make_xml_id`: replace `/` with `_`
+
+Added:
+  - `OcrdPage`: new `PageType.get_ReadingOrderGroups()` to retrieve recursive RO as dict
+  - ocrd.cli.workspace `server`: add subcommands `reload` and `save`
+  - METS Server: export and delegate `physical_pages`
+  - processor CLI: delegate `--resolve-resource`, too
+  - `Processor.process_page_file` / `OcrdPageResultImage`: allow `None` besides `AlternativeImageType`
+
+## [3.0.0b4] - 2024-09-02
+
+Fixed:
+
+  * `Processor.metadata_location`: `src` workaround respects namespace packages, qurator-spk/eynollah#134
+  * `Workspace.reload_mets`: handle ClientSideOcrdMets as well
+
+## [3.0.0b3] - 2024-08-30
+
+Added:
+
+  * `OcrdConfig.reset_defaults` to reset config variables to their defaults
+
+## [3.0.0b2] - 2024-08-30
+
+Added:
+ - `Processor.max_workers`: class attribute to control per-page parallelism of this implementation
+ - `Processor.max_page_seconds`: class attribute to control per-page timeout of this implementation
+ - `OCRD_MAX_PARALLEL_PAGES` for whether and how many workers should process pages in parallel
+ - `OCRD_PROCESSING_PAGE_TIMEOUT` for whether and how long processors should wait for single pages
+ - `OCRD_MAX_MISSING_OUTPUTS` for maximum rate (fraction) of pages before making `OCRD_MISSING_OUTPUT=abort`
+
+Fixed:
+  - `disableLogging`: also re-instate root logger to Python defaults
+
+## [3.0.0b1] - 2024-08-26
+
+Fixed:
+  - actually apply CLI `--log-filename`, and show in `--help`
+  - adapt to Pillow changes
+  - `ocrd workspace clone`: do pass on `--file-grp` (for download filtering)
+
+Changed:
+  - :fire: `ocrd_utils`, `ocrd_models`, `ocrd_modelfactory`, `ocrd_validators` and `ocrd_network` are not published
+    as separate packages anymore, everything is contained in `ocrd` - you should adapt your `requirements.txt` accordingly
+  - :fire: `Processor.parameter` now a property (attribute always exists, but `None` for non-processing contexts)
+  - :fire: `Processor.parameter` is now a `frozendict` (contents immutable)
+  - :fire: `Processor.parameter` validate when(ever) set instead of (just) the constructor
+  - setting `Processor.parameter` will also trigger (`Processor.shutdown() and) `Processor.setup()`
+  - `get_processor(... instance_caching=True)`: use `min(max_instances, OCRD_MAX_PROCESSOR_CACHE)`
+  - :fire: `Processor.verify` always validates fileGrp cardinalities (because we have `ocrd-tool.json` defaults now)
+  - :fire: `OcrdMets.add_agent` without positional arguments
+  - `ocrd bashlib input-files` now uses normal Processor decorator, and gets passed actual `ocrd-tool.json` and tool name
+    from bashlib's `ocrd__wrap`
+
+Added:
+  - `Processor.metadata_filename`: expose to make local path of `ocrd-tool.json` in Python distribution reusable+overridable
+  - `Processor.metadata_location`: expose to make absolute path of `ocrd-tool.json` reusable+overridable
+  - `Processor.metadata_rawdict`: expose to make in-memory contents of `ocrd-tool.json` reusable+overridable
+  - `Processor.metadata`: expose to make validated and default-expanded contents of `ocrd-tool.json` reusable+overridable
+  - `Processor.shutdown`: to shut down processor after processing, optional
+  - `Processor.max_instances`: class attribute to control instance caching of this implementation
+
+## [3.0.0a2] - 2024-08-22
+
+Changed:
+ - :fire: `OcrdPage` as proxy of `PcGtsType` instead of alias; also contains `etree` and `mapping` now
+ - :fire: `page_from_file`: removed kwarg `with_tree` - use `OcrdPage.etree` and `OcrdPage.mapping` instead
+ - :fire: `Processor.zip_input_files` now can throw `ocrd.NonUniqueInputFile` and `ocrd.MissingInputFile`
+   (the latter only if `OCRD_MISSING_INPUT=ABORT`)
+ - :fire: `Processor.zip_input_files` does not by default use `require_first` anymore
+   (so the first file in any input file tuple per page can be `None` as well)
+ - :fire: no more `Workspace.overwrite_mode`, merely delegate to `OCRD_EXISTING_OUTPUT=OVERWRITE`
+ - :art: improve on docs result for `ocrd_utils.config`
+
+Added:
+  - :point_right: `OCRD_DOWNLOAD_INPUT` for whether input files should be downloaded before processing
+  - :point_right: `OCRD_MISSING_INPUT` for how to handle missing input files (**`SKIP`** or `ABORT`)
+  - :point_right: `OCRD_MISSING_OUTPUT` for how to handle processing failures (**`SKIP`** or `ABORT` or `COPY`)
+     the latter behaves like ocrd-dummy for the failed page(s)
+  - :point_right: `OCRD_EXISTING_OUTPUT` for how to handle existing output files (**`SKIP`** or `ABORT` or `OVERWRITE`)
+  - new CLI option `--debug` as short-hand for `ABORT` choices above
+  - `Processor.logger` set up by constructor already (for re-use by processor implementors)
+  - `default`-expand and validate `ocrd_tool.json` in `Processor` constructor, log invalidities
+  - handle JSON `deprecation` in `ocrd_tool.json` by reporting warnings
+
+## [3.0.0a1] - 2024-08-15
+
+Changed:
+  - :fire: Deprecate `Processor.process`
+  - update spec to v3.25.0, which requires annotating fileGrp cardinality in `ocrd-tool.json`
+  - :fire: Remove passing non-processing kwargs to `Processor` constructor, add as members  
+     (i.e. `show_help`, `dump_json`, `dump_module_dir`, `list_resources`, `show_resource`, `resolve_resource`)
+  - :fire: Deprecate passing processing arg / kwargs to `Processor` constructor  
+     (i.e. `workspace`, `page_id`, `input_file_grp`, `output_file_grp`; now all set by `run_processor`)
+  - :fire: Deprecate passing `ocrd-tool.json` metadata to `Processor` constructor
+  - `ocrd.processor`: Handle loading of bundled `ocrd-tool.json` generically
+
+Added:
+  - `Processor.process_workspace`: process a complete workspace, with default implementation
+  - `Processor.process_page_file`: process an OcrdFile, with default implementation
+  - `Processor.process_page_pcgts`: process a single OcrdPage, produce a single OcrdPage, required to implement
+  - `Processor.verify`: handle fileGrp cardinality verification, with default implementation
+  - `Processor.setup`: to set up processor before processing, optional
+
 ## [2.68.0] - 2024-08-23
 
 Changed:
@@ -2164,6 +2322,14 @@ Fixed
 Initial Release
 
 <!-- link-labels -->
+[3.0.0b6]: ../../compare/v3.0.0b6..v3.0.0b5
+[3.0.0b5]: ../../compare/v3.0.0b5..v3.0.0b4
+[3.0.0b4]: ../../compare/v3.0.0b4..v3.0.0b3
+[3.0.0b3]: ../../compare/v3.0.0b3..v3.0.0b2
+[3.0.0b2]: ../../compare/v3.0.0b2..v3.0.0b1
+[3.0.0b1]: ../../compare/v3.0.0b1..v3.0.0a2
+[3.0.0a2]: ../../compare/v3.0.0a2..v3.0.0a1
+[3.0.0a1]: ../../compare/v3.0.0a1..v2.67.2
 [2.68.0]: ../../compare/v2.68.0..v2.67.2
 [2.67.2]: ../../compare/v2.67.2..v2.67.1
 [2.67.1]: ../../compare/v2.67.1..v2.67.0
diff --git a/Dockerfile b/Dockerfile
index 144ae774dc..77c24bf77e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -50,9 +50,9 @@ FROM ocrd_core_base as ocrd_core_test
 ARG SKIP_ASSETS
 WORKDIR /build/core
 COPY Makefile .
+COPY .gitmodules .
 RUN if test -z "$SKIP_ASSETS" || test $SKIP_ASSETS -eq 0 ; then make assets ; fi
 COPY tests ./tests
-COPY .gitmodules .
 COPY requirements_test.txt .
 RUN pip install -r requirements_test.txt
 RUN mkdir /ocrd-data && chmod 777 /ocrd-data
diff --git a/Makefile b/Makefile
index 4997066d1b..1a4a6bbdb8 100644
--- a/Makefile
+++ b/Makefile
@@ -238,9 +238,9 @@ repo/assets repo/spec: always-update
 
 .PHONY: spec
 # Copy JSON Schema, OpenAPI from OCR-D/spec
-spec: repo/spec
-	cp repo/spec/ocrd_tool.schema.yml ocrd_validators/ocrd_validators/ocrd_tool.schema.yml
-	cp repo/spec/bagit-profile.yml ocrd_validators/ocrd_validators/bagit-profile.yml
+spec: # repo/spec
+	cp repo/spec/ocrd_tool.schema.yml src/ocrd_validators/ocrd_tool.schema.yml
+	cp repo/spec/bagit-profile.yml src/ocrd_validators/bagit-profile.yml
 
 #
 # Assets
@@ -273,7 +273,7 @@ test-logging: assets
 	cp src/ocrd_utils/ocrd_logging.conf $$tempdir; \
 	cd $$tempdir; \
 	$(PYTHON) -m pytest --continue-on-collection-errors -k TestLogging -k TestDecorators $(TESTDIR); \
-	rm -r $$tempdir/ocrd_logging.conf $$tempdir/.benchmarks; \
+	rm -r $$tempdir/ocrd_logging.conf $$tempdir/ocrd.log $$tempdir/.benchmarks; \
 	rm -rf $$tempdir/.coverage; \
 	rmdir $$tempdir
 
@@ -401,41 +401,3 @@ docker docker-cuda docker-cuda-tf1 docker-cuda-tf2 docker-cuda-torch:
 # Build wheels and source dist and twine upload them
 pypi: build
 	twine upload --verbose dist/ocrd-$(VERSION)*{tar.gz,whl}
-
-pypi-workaround: build-workaround
-	for dist in $(BUILD_ORDER);do twine upload dist/$$dist-$(VERSION)*{tar.gz,whl};done
-
-# Only in place until v3 so we don't break existing installations
-build-workaround: pyclean
-	cp pyproject.toml pyproject.toml.BAK
-	cp src/ocrd_utils/constants.py src/ocrd_utils/constants.py.BAK
-	cp src/ocrd/cli/__init__.py src/ocrd/cli/__init__.py.BAK
-	for dist in $(BUILD_ORDER);do \
-		cat pyproject.toml.BAK | sed "s,^name =.*,name = \"$$dist\"," > pyproject.toml; \
-		cat src/ocrd_utils/constants.py.BAK | sed "s,dist_version('ocrd'),dist_version('$$dist')," > src/ocrd_utils/constants.py; \
-		cat src/ocrd/cli/__init__.py.BAK | sed "s,package_name='ocrd',package_name='$$dist'," > src/ocrd/cli/__init__.py; \
-		$(MAKE) build; \
-	done
-	rm pyproject.toml.BAK
-	rm src/ocrd_utils/constants.py.BAK
-	rm src/ocrd/cli/__init__.py.BAK
-
-# test that the aliased packages work in isolation and combined
-test-workaround: build-workaround
-	$(MAKE) uninstall-workaround
-	for dist in $(BUILD_ORDER);do \
-		pip install dist/$$dist-*.whl ;\
-		ocrd --version ;\
-		make test ;\
-		pip uninstall --yes $$dist ;\
-	done
-	for dist in $(BUILD_ORDER);do \
-		pip install dist/$$dist-*.whl ;\
-	done
-	ocrd --version ;\
-	make test ;\
-	for dist in $(BUILD_ORDER);do pip uninstall --yes $$dist;done
-
-uninstall-workaround:
-	for dist in $(BUILD_ORDER);do $(PIP) uninstall --yes $$dist;done
-
diff --git a/README.md b/README.md
index b401428ee0..d41a2dddb6 100644
--- a/README.md
+++ b/README.md
@@ -47,17 +47,12 @@ complete stack of OCR-D-related software.
 
 The easiest way to install is via `pip`:
 
-```sh
-pip install ocrd
+    pip install ocrd
 
-# or just the functionality you need, e.g.
-
-pip install ocrd_modelfactory
-```
 
 All Python software released by [OCR-D](https://github.com/OCR-D) requires Python 3.8 or higher.
 
-**NOTE** Some OCR-D-Tools (or even test cases) _might_ reveal an unintended behavior if you have specific environment modifications, like:
+> **NOTE** Some OCR-D tools (or even test cases) _might_ reveal an unintended behavior if you have specific environment modifications, like:
 * using a custom build of [ImageMagick](https://github.com/ImageMagick/ImageMagick), whose format delegates are different from what OCR-D supposes
 * custom Python logging configurations in your personal account
 
@@ -82,7 +77,6 @@ Almost all behaviour of the OCR-D/core software is configured via CLI options an
 
 Some parts of the software are configured via environment variables:
 
-* `OCRD_METS_CACHING`: If set to `true`, access to the METS file is cached, speeding in-memory search and modification.
 * `OCRD_PROFILE`: This variable configures the built-in CPU and memory profiling. If empty, no profiling is done. Otherwise expected to contain any of the following tokens:
   * `CPU`: Enable CPU profiling of processor runs
   * `RSS`: Enable RSS memory profiling
@@ -95,18 +89,46 @@ Some parts of the software are configured via environment variables:
 * `XDG_CONFIG_HOME`: Directory to look for `./ocrd/resources.yml` (i.e. `ocrd resmgr` user database) – defaults to `$HOME/.config`.
 * `XDG_DATA_HOME`: Directory to look for `./ocrd-resources/*` (i.e. `ocrd resmgr` data location) – defaults to `$HOME/.local/share`.
 
-* `OCRD_DOWNLOAD_RETRIES`: Number of times to retry failed attempts for downloads of workspace files.
+* `OCRD_DOWNLOAD_RETRIES`: Number of times to retry failed attempts for downloads of resources or workspace files.
 * `OCRD_DOWNLOAD_TIMEOUT`: Timeout in seconds for connecting or reading (comma-separated) when downloading.
 
+* `OCRD_MISSING_INPUT`: How to deal with missing input files (for some fileGrp/pageId) during processing:
+  * `SKIP`: ignore and proceed with next page's input
+  * `ABORT`: throw `MissingInputFile` exception
+
+* `OCRD_MISSING_OUTPUT`: How to deal with missing output files (for some fileGrp/pageId) during processing:
+  * `SKIP`: ignore and proceed processing next page
+  * `COPY`: fall back to copying input PAGE to output fileGrp for page
+  * `ABORT`: re-throw whatever caused processing to fail
+
+* `OCRD_MAX_MISSING_OUTPUTS`: Maximal rate of skipped/fallback pages among all processed pages before aborting (decimal fraction, ignored if negative).
+
+* `OCRD_EXISTING_OUTPUT`: How to deal with already existing output files (for some fileGrp/pageId) during processing:
+  * `SKIP`: ignore and proceed processing next page
+  * `OVERWRITE`: force writing result to output fileGrp for page
+  * `ABORT`: re-throw `FileExistsError` exception
+
+
 * `OCRD_METS_CACHING`: Whether to enable in-memory storage of OcrdMets data structures for speedup during processing or workspace operations.
 
 * `OCRD_MAX_PROCESSOR_CACHE`: Maximum number of processor instances (for each set of parameters) to be kept in memory (including loaded models) for processing workers or processor servers.
 
+* `OCRD_MAX_PARALLEL_PAGES`: Maximum number of processor threads for page-parallel processing (within each Processor's selected page range, independent of the number of Processing Workers or Processor Servers). If set `>1`, then a METS Server must be used for METS synchronisation.
+
+* `OCRD_PROCESSING_PAGE_TIMEOUT`: Timeout in seconds for processing a single page. If set >0, when exceeded, the same as OCRD_MISSING_OUTPUT applies.
+
 * `OCRD_NETWORK_SERVER_ADDR_PROCESSING`: Default address of Processing Server to connect to (for `ocrd network client processing`).
 * `OCRD_NETWORK_SERVER_ADDR_WORKFLOW`: Default address of Workflow Server to connect to (for `ocrd network client workflow`).
 * `OCRD_NETWORK_SERVER_ADDR_WORKSPACE`: Default address of Workspace Server to connect to (for `ocrd network client workspace`).
 * `OCRD_NETWORK_RABBITMQ_CLIENT_CONNECT_ATTEMPTS`: Number of attempts for a worker to create its queue. Helpful if the rabbitmq-server needs time to be fully started.
 
+* `OCRD_NETWORK_CLIENT_POLLING_SLEEP`: How many seconds to sleep before trying `ocrd network client` again.
+* `OCRD_NETWORK_CLIENT_POLLING_TIMEOUT`: Timeout for a blocking `ocrd network client` (in seconds).
+
+* `OCRD_NETWORK_SOCKETS_ROOT_DIR`: The root directory where all mets server related socket files are created.
+* `OCRD_NETWORK_LOGS_ROOT_DIR`: The root directory where all ocrd_network related file logs are stored.
+
+
 
 ## Packages
 
diff --git a/README_bashlib.md b/README_bashlib.md
index 09199468cc..20379c3c92 100644
--- a/README_bashlib.md
+++ b/README_bashlib.md
@@ -21,6 +21,9 @@ For example:
 * [`ocrd__log`](#ocrd__log)
 * [`ocrd__minversion`](#ocrd__minversion)
 * [`ocrd__dumpjson`](#ocrd__dumpjson)
+* [`ocrd__resolve_resource`](#ocrd__resolve_resource)
+* [`ocrd__show_resource`](#ocrd__show_resource)
+* [`ocrd__list_resources`](#ocrd__list_resources)
 * [`ocrd__usage`](#ocrd__usage)
 * [`ocrd__parse_argv`](#ocrd__parse_argv)
 <!-- END-MARKDOWN-TOC -->
@@ -56,6 +59,10 @@ export OCRD_TOOL_NAME=ocrd-foo-bar
 
 (Which you automatically get from [`ocrd__wrap`](#ocrd__wrap).)
 
+### `ocrd__resolve_resource`
+
+Output given resource file's path.
+
 ### `ocrd__show_resource`
 
 Output given resource file's content.
@@ -88,6 +95,7 @@ This will be filled by the parser along the following keys:
 - `profile`: whether `--profile` is enabled
 - `profile_file`: the argument of `--profile-file`
 - `log_level`: the argument of `--log-level`
+- `mets_server_url`: the argument of `--mets-server-url` argument
 - `mets_file`: absolute path of the `--mets` argument
 - `working_dir`: absolute path of the `--working-dir` argument or the parent of `mets_file`
 - `page_id`: the argument of `--page-id`
@@ -95,7 +103,7 @@ This will be filled by the parser along the following keys:
 - `output_file_grp`: the argument of `--output-file-grp`
 
 Moreover, there will be an associative array **`params`**
-with the fully expanded runtime values of the ocrd-tool.json parameters.
+with the fully validated and default-expanded runtime values of the `ocrd-tool.json` parameters.
 
 ### `ocrd__wrap`
 
diff --git a/VERSION b/VERSION
index 0f1ddc8105..1129dfd443 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.68.0
+3.0.0b7
diff --git a/docs/api/ocrd/ocrd.processor.ocrd_page_result.rst b/docs/api/ocrd/ocrd.processor.ocrd_page_result.rst
new file mode 100644
index 0000000000..e13d50e155
--- /dev/null
+++ b/docs/api/ocrd/ocrd.processor.ocrd_page_result.rst
@@ -0,0 +1,7 @@
+ocrd.processor.ocrd\_page\_result module
+========================================
+
+.. automodule:: ocrd.processor.ocrd_page_result
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/api/ocrd/ocrd.processor.rst b/docs/api/ocrd/ocrd.processor.rst
index 801114d2a3..7507d8439b 100644
--- a/docs/api/ocrd/ocrd.processor.rst
+++ b/docs/api/ocrd/ocrd.processor.rst
@@ -22,3 +22,4 @@ Submodules
 
    ocrd.processor.base
    ocrd.processor.helpers
+   ocrd.processor.ocrd_page_result
diff --git a/docs/api/ocrd_network/ocrd_network.deployer.rst b/docs/api/ocrd_network/ocrd_network.deployer.rst
deleted file mode 100644
index 205a331ba2..0000000000
--- a/docs/api/ocrd_network/ocrd_network.deployer.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-ocrd\_network.deployer module
-=============================
-
-.. automodule:: ocrd_network.deployer
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/api/ocrd_network/ocrd_network.deployment_utils.rst b/docs/api/ocrd_network/ocrd_network.deployment_utils.rst
deleted file mode 100644
index cc1f315ac5..0000000000
--- a/docs/api/ocrd_network/ocrd_network.deployment_utils.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-ocrd\_network.deployment\_utils module
-======================================
-
-.. automodule:: ocrd_network.deployment_utils
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/api/ocrd_network/ocrd_network.logging.rst b/docs/api/ocrd_network/ocrd_network.logging.rst
deleted file mode 100644
index d2ac721d14..0000000000
--- a/docs/api/ocrd_network/ocrd_network.logging.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-ocrd\_network.logging module
-============================
-
-.. automodule:: ocrd_network.logging
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/docs/api/ocrd_network/ocrd_network.logging_utils.rst b/docs/api/ocrd_network/ocrd_network.logging_utils.rst
new file mode 100644
index 0000000000..561ce00193
--- /dev/null
+++ b/docs/api/ocrd_network/ocrd_network.logging_utils.rst
@@ -0,0 +1,7 @@
+ocrd\_network.logging\_utils module
+===================================
+
+.. automodule:: ocrd_network.logging_utils
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/api/ocrd_network/ocrd_network.rabbitmq_utils.helpers.rst b/docs/api/ocrd_network/ocrd_network.rabbitmq_utils.helpers.rst
new file mode 100644
index 0000000000..e13ff897a9
--- /dev/null
+++ b/docs/api/ocrd_network/ocrd_network.rabbitmq_utils.helpers.rst
@@ -0,0 +1,7 @@
+ocrd\_network.rabbitmq\_utils.helpers module
+============================================
+
+.. automodule:: ocrd_network.rabbitmq_utils.helpers
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/api/ocrd_network/ocrd_network.rabbitmq_utils.rst b/docs/api/ocrd_network/ocrd_network.rabbitmq_utils.rst
index 36b581a337..63fd6f89aa 100644
--- a/docs/api/ocrd_network/ocrd_network.rabbitmq_utils.rst
+++ b/docs/api/ocrd_network/ocrd_network.rabbitmq_utils.rst
@@ -15,5 +15,6 @@ Submodules
    ocrd_network.rabbitmq_utils.connector
    ocrd_network.rabbitmq_utils.constants
    ocrd_network.rabbitmq_utils.consumer
+   ocrd_network.rabbitmq_utils.helpers
    ocrd_network.rabbitmq_utils.ocrd_messages
    ocrd_network.rabbitmq_utils.publisher
diff --git a/docs/api/ocrd_network/ocrd_network.rst b/docs/api/ocrd_network/ocrd_network.rst
index ae12ae1f5d..4497702751 100644
--- a/docs/api/ocrd_network/ocrd_network.rst
+++ b/docs/api/ocrd_network/ocrd_network.rst
@@ -15,6 +15,7 @@ Subpackages
    ocrd_network.cli
    ocrd_network.models
    ocrd_network.rabbitmq_utils
+   ocrd_network.runtime_data
 
 Submodules
 ----------
@@ -25,15 +26,13 @@ Submodules
    ocrd_network.client
    ocrd_network.constants
    ocrd_network.database
-   ocrd_network.deployer
-   ocrd_network.deployment_utils
-   ocrd_network.logging
+   ocrd_network.logging_utils
    ocrd_network.param_validators
    ocrd_network.process_helpers
    ocrd_network.processing_server
    ocrd_network.processing_worker
    ocrd_network.processor_server
-   ocrd_network.runtime_data
    ocrd_network.server_cache
    ocrd_network.server_utils
+   ocrd_network.tcp_to_uds_mets_proxy
    ocrd_network.utils
diff --git a/docs/api/ocrd_network/ocrd_network.runtime_data.config_parser.rst b/docs/api/ocrd_network/ocrd_network.runtime_data.config_parser.rst
new file mode 100644
index 0000000000..e56ad31f89
--- /dev/null
+++ b/docs/api/ocrd_network/ocrd_network.runtime_data.config_parser.rst
@@ -0,0 +1,7 @@
+ocrd\_network.runtime\_data.config\_parser module
+=================================================
+
+.. automodule:: ocrd_network.runtime_data.config_parser
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/api/ocrd_network/ocrd_network.runtime_data.connection_clients.rst b/docs/api/ocrd_network/ocrd_network.runtime_data.connection_clients.rst
new file mode 100644
index 0000000000..2fd62e5ef2
--- /dev/null
+++ b/docs/api/ocrd_network/ocrd_network.runtime_data.connection_clients.rst
@@ -0,0 +1,7 @@
+ocrd\_network.runtime\_data.connection\_clients module
+======================================================
+
+.. automodule:: ocrd_network.runtime_data.connection_clients
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/api/ocrd_network/ocrd_network.runtime_data.deployer.rst b/docs/api/ocrd_network/ocrd_network.runtime_data.deployer.rst
new file mode 100644
index 0000000000..62abe20db3
--- /dev/null
+++ b/docs/api/ocrd_network/ocrd_network.runtime_data.deployer.rst
@@ -0,0 +1,7 @@
+ocrd\_network.runtime\_data.deployer module
+===========================================
+
+.. automodule:: ocrd_network.runtime_data.deployer
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/api/ocrd_network/ocrd_network.runtime_data.hosts.rst b/docs/api/ocrd_network/ocrd_network.runtime_data.hosts.rst
new file mode 100644
index 0000000000..8f9001c381
--- /dev/null
+++ b/docs/api/ocrd_network/ocrd_network.runtime_data.hosts.rst
@@ -0,0 +1,7 @@
+ocrd\_network.runtime\_data.hosts module
+========================================
+
+.. automodule:: ocrd_network.runtime_data.hosts
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/api/ocrd_network/ocrd_network.runtime_data.network_agents.rst b/docs/api/ocrd_network/ocrd_network.runtime_data.network_agents.rst
new file mode 100644
index 0000000000..1a597caad1
--- /dev/null
+++ b/docs/api/ocrd_network/ocrd_network.runtime_data.network_agents.rst
@@ -0,0 +1,7 @@
+ocrd\_network.runtime\_data.network\_agents module
+==================================================
+
+.. automodule:: ocrd_network.runtime_data.network_agents
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/api/ocrd_network/ocrd_network.runtime_data.network_services.rst b/docs/api/ocrd_network/ocrd_network.runtime_data.network_services.rst
new file mode 100644
index 0000000000..d72e67c9d6
--- /dev/null
+++ b/docs/api/ocrd_network/ocrd_network.runtime_data.network_services.rst
@@ -0,0 +1,7 @@
+ocrd\_network.runtime\_data.network\_services module
+====================================================
+
+.. automodule:: ocrd_network.runtime_data.network_services
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/api/ocrd_network/ocrd_network.runtime_data.rst b/docs/api/ocrd_network/ocrd_network.runtime_data.rst
index fefa00b492..cdf45f6b6e 100644
--- a/docs/api/ocrd_network/ocrd_network.runtime_data.rst
+++ b/docs/api/ocrd_network/ocrd_network.runtime_data.rst
@@ -1,7 +1,20 @@
-ocrd\_network.runtime\_data module
-==================================
+ocrd\_network.runtime\_data package
+===================================
 
 .. automodule:: ocrd_network.runtime_data
    :members:
    :undoc-members:
    :show-inheritance:
+
+Submodules
+----------
+
+.. toctree::
+   :maxdepth: 4
+
+   ocrd_network.runtime_data.config_parser
+   ocrd_network.runtime_data.connection_clients
+   ocrd_network.runtime_data.deployer
+   ocrd_network.runtime_data.hosts
+   ocrd_network.runtime_data.network_agents
+   ocrd_network.runtime_data.network_services
diff --git a/docs/api/ocrd_network/ocrd_network.tcp_to_uds_mets_proxy.rst b/docs/api/ocrd_network/ocrd_network.tcp_to_uds_mets_proxy.rst
new file mode 100644
index 0000000000..fa6e607f94
--- /dev/null
+++ b/docs/api/ocrd_network/ocrd_network.tcp_to_uds_mets_proxy.rst
@@ -0,0 +1,7 @@
+ocrd\_network.tcp\_to\_uds\_mets\_proxy module
+==============================================
+
+.. automodule:: ocrd_network.tcp_to_uds_mets_proxy
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/conf.py b/docs/conf.py
index 3ab2e1826f..917c5c62ca 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -15,7 +15,7 @@
 # import os
 # import sys
 # # sys.path.insert(0, os.path.abspath('..'))
-with open('VERSION', encoding='utf-8') as f:
+with open('../VERSION', encoding='utf-8') as f:
     VERSION = f.read()
 
 
@@ -72,7 +72,7 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path .
-exclude_patterns = [u'build', 'Thumbs.db', '.DS_Store', 'src', 'venv']
+exclude_patterns = [u'build', 'Thumbs.db', '.DS_Store', 'tests', 'venv']
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
diff --git a/docs/index.rst b/docs/index.rst
index 96a4e98360..67bba66fe0 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -7,9 +7,10 @@ OCR-D/core
 
    ocrd <api/ocrd/modules>
    ocrd_utils <api/ocrd_utils/modules>
+   ocrd_modelfactory <api/ocrd_modelfactory/modules>
    ocrd_models <api/ocrd_models/modules>
    ocrd_validators <api/ocrd_validators/modules>
-   ocrd_modelfactory <api/ocrd_modelfactory/modules>
+   ocrd_network <api/ocrd_network/modules>
 
 
 Indices and tables
diff --git a/requirements.txt b/requirements.txt
index ed5fd56d59..05d4e9aa44 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,7 +12,8 @@ gdown
 httpx>=0.22.0
 importlib_metadata ; python_version < '3.8'
 importlib_resources ; python_version < '3.10'
-jsonschema
+jsonschema>=4
+loky
 lxml
 memory-profiler >= 0.58.0
 # XXX explicitly do not restrict the numpy version because different
diff --git a/requirements_test.txt b/requirements_test.txt
index d8cef1dae7..a6a87918fc 100644
--- a/requirements_test.txt
+++ b/requirements_test.txt
@@ -3,6 +3,7 @@ cryptography < 43.0.0
 pytest >= 4.0.0
 generateDS == 2.35.20
 pytest-benchmark >= 3.2.3
+pytest-timeout
 coverage >= 4.5.2
 sphinx
 sphinx_click
diff --git a/src/ocrd/__init__.py b/src/ocrd/__init__.py
index 62b6ffbc0a..e4c782685b 100644
--- a/src/ocrd/__init__.py
+++ b/src/ocrd/__init__.py
@@ -14,8 +14,9 @@
 
 """
 
-from ocrd.processor.base import run_processor, run_cli, Processor
-from ocrd_models import OcrdMets, OcrdExif, OcrdFile, OcrdAgent
+from ocrd.processor.base import run_processor, run_cli, Processor, ResourceNotFoundError
+from ocrd.processor.ocrd_page_result import OcrdPageResult, OcrdPageResultImage
+from ocrd_models import OcrdMets, OcrdPage, OcrdExif, OcrdFile, OcrdAgent
 from ocrd.resolver import Resolver
 from ocrd_validators import *
 from ocrd.workspace import Workspace
diff --git a/src/ocrd/cli/__init__.py b/src/ocrd/cli/__init__.py
index 70d738f083..9e8a37b8bf 100644
--- a/src/ocrd/cli/__init__.py
+++ b/src/ocrd/cli/__init__.py
@@ -10,6 +10,34 @@
 
 from ocrd_utils import config
 
+# pylint: disable=wrong-import-position
+
+def command_with_replaced_help(*replacements):
+
+    class CommandWithReplacedHelp(click.Command):
+        def get_help(self, ctx):
+            newhelp = super().get_help(ctx)
+            for replacement in replacements:
+                newhelp = re.sub(*replacement, newhelp)
+            # print(newhelp)
+            return newhelp
+
+    return CommandWithReplacedHelp
+
+# pylint: enable=wrong-import-position
+
+from ..decorators import ocrd_loglevel
+from .ocrd_tool import ocrd_tool_cli
+from .workspace import workspace_cli
+from .process import process_cli
+from .bashlib import bashlib_cli
+from .validate import validate_cli
+from .resmgr import resmgr_cli
+from .zip import zip_cli
+from .log import log_cli
+from .network import network_cli
+
+
 __all__ = ['cli']
 
 _epilog = f"""
@@ -31,6 +59,14 @@
 \b
 {config.describe('OCRD_DOWNLOAD_TIMEOUT')}
 \b
+{config.describe('OCRD_DOWNLOAD_INPUT')}
+\b
+{config.describe('OCRD_MISSING_INPUT', wrap_text=False)}
+\b
+{config.describe('OCRD_MISSING_OUTPUT', wrap_text=False)}
+\b
+{config.describe('OCRD_EXISTING_OUTPUT', wrap_text=False)}
+\b
 {config.describe('OCRD_METS_CACHING')}
 \b
 {config.describe('OCRD_MAX_PROCESSOR_CACHE')}
@@ -58,30 +94,6 @@
 {config.describe('OCRD_LOGGING_DEBUG')}
 """
 
-def command_with_replaced_help(*replacements):
-
-    class CommandWithReplacedHelp(click.Command):
-        def get_help(self, ctx):
-            help = super().get_help(ctx)
-            for replacement in replacements:
-                help = re.sub(*replacement, help)
-            # print(help)
-            return help
-
-    return CommandWithReplacedHelp
-
-from ocrd.cli.ocrd_tool import ocrd_tool_cli
-from ocrd.cli.workspace import workspace_cli
-from ocrd.cli.process import process_cli
-from ocrd.cli.bashlib import bashlib_cli
-from ocrd.cli.validate import validate_cli
-from ocrd.cli.resmgr import resmgr_cli
-from ocrd.decorators import ocrd_loglevel
-from .zip import zip_cli
-from .log import log_cli
-from .network import network_cli
-
-
 @click.group(epilog=_epilog)
 @click.version_option(package_name='ocrd')
 @ocrd_loglevel
diff --git a/src/ocrd/cli/bashlib.py b/src/ocrd/cli/bashlib.py
index 1def4638c7..b6817abe91 100644
--- a/src/ocrd/cli/bashlib.py
+++ b/src/ocrd/cli/bashlib.py
@@ -8,7 +8,6 @@
 """
 from __future__ import print_function
 import sys
-from os.path import isfile
 import click
 
 from ocrd.constants import BASHLIB_FILENAME
@@ -20,15 +19,10 @@
 from ocrd.decorators import (
     parameter_option,
     parameter_override_option,
-    ocrd_loglevel
+    ocrd_loglevel,
+    ocrd_cli_wrap_processor
 )
-from ocrd_utils import (
-    is_local_filename,
-    get_local_filename,
-    initLogging,
-    make_file_id
-)
-from ocrd.resolver import Resolver
+from ocrd_utils import make_file_id
 from ocrd.processor import Processor
 
 # ----------------------------------------------------------------------
@@ -79,17 +73,23 @@ def bashlib_constants(name):
         print(val)
 
 @bashlib_cli.command('input-files')
+@click.option('--ocrd-tool', help="path to ocrd-tool.json of processor to feed", default=None)
+@click.option('--executable', help="name of processor executable in ocrd-tool.json", default=None)
 @click.option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME)
-@click.option('-w', '--working-dir', help="Working Directory")
-@click.option('-I', '--input-file-grp', help='File group(s) used as input.', default='INPUT')
-@click.option('-O', '--output-file-grp', help='File group(s) used as output.', default='OUTPUT')
-# repeat some other processor options for convenience (will be ignored here)
+@click.option('-U', '--mets-server-url', help='TCP host URI or UDS path of METS server', default=None)
+@click.option('-d', '--working-dir', help="Working Directory")
+@click.option('-I', '--input-file-grp', help='File group(s) used as input.', default=None)
+@click.option('-O', '--output-file-grp', help='File group(s) used as output.', default=None)
 @click.option('-g', '--page-id', help="ID(s) of the pages to process")
-@click.option('--overwrite', is_flag=True, default=False, help="Remove output pages/images if they already exist")
+@click.option('--overwrite', is_flag=True, default=False, help="Remove output pages/images if they already exist\n"
+              "(with '--page-id', remove only those).\n"
+              "Short-hand for OCRD_EXISTING_OUTPUT=OVERWRITE")
+@click.option('--debug', is_flag=True, default=False, help="Abort on any errors with full stack trace.\n"
+              "Short-hand for OCRD_MISSING_OUTPUT=ABORT")
 @parameter_option
 @parameter_override_option
 @ocrd_loglevel
-def bashlib_input_files(**kwargs):
+def bashlib_input_files(ocrd_tool, executable, **kwargs):
     """
     List input files for processing
 
@@ -100,29 +100,49 @@ def bashlib_input_files(**kwargs):
 
     (The printing format is one associative array initializer per line.)
     """
-    initLogging()
-    mets = kwargs.pop('mets')
-    working_dir = kwargs.pop('working_dir')
-    if is_local_filename(mets) and not isfile(get_local_filename(mets)):
-        msg = "File does not exist: %s" % mets
-        raise FileNotFoundError(msg)
-    resolver = Resolver()
-    workspace = resolver.workspace_from_url(mets, working_dir)
-    processor = Processor(workspace,
-                          ocrd_tool=None,
-                          page_id=kwargs['page_id'],
-                          input_file_grp=kwargs['input_file_grp'],
-                          output_file_grp=kwargs['output_file_grp'])
-    for input_files in processor.zip_input_files(mimetype=None, on_error='abort'):
-        # ensure all input files exist locally (without persisting them in the METS)
-        # - this mimics the default behaviour of all Pythonic processors
-        input_files = [workspace.download_file(input_file) if input_file else None
-                       for input_file in input_files]
-        for field in ['url', 'local_filename', 'ID', 'mimetype', 'pageId']:
-            # make this bash-friendly (show initialization for associative array)
-            if len(input_files) > 1:
-                # single quotes allow us to preserve the list value inside the alist
-                print("[%s]='%s'" % (field, ' '.join(str(getattr(res, field)) for res in input_files)), end=' ')
-            else:
-                print("[%s]='%s'" % (field, str(getattr(input_files[0], field))), end=' ')
-        print("[outputFileId]='%s'" % make_file_id(input_files[0], kwargs['output_file_grp']))
+    class BashlibProcessor(Processor):
+        # go half way of the normal run_processor / process_workspace call tree
+        # by just delegating to process_workspace, overriding process_page_file
+        # to ensure all input files exist locally (without persisting them in the METS)
+        # and print what needs to be acted on in bash-friendly way
+        def process_page_file(self, *input_files):
+            for field in ['url', 'local_filename', 'ID', 'mimetype', 'pageId']:
+                # make this bash-friendly (show initialization for associative array)
+                if len(input_files) > 1:
+                    # single quotes allow us to preserve the list value inside the alist
+                    value = ' '.join(str(getattr(res, field)) for res in input_files)
+                else:
+                    value = str(getattr(input_files[0], field))
+                print(f"[{field}]='{value}'", end=' ')
+            output_file_id = make_file_id(input_files[0], kwargs['output_file_grp'])
+            print(f"[outputFileId]='{output_file_id}'")
+    if ocrd_tool and executable:
+        class FullBashlibProcessor(BashlibProcessor):
+            @property
+            def metadata_location(self):
+                # needed for metadata loading and validation mechanism
+                return ocrd_tool
+            @property
+            def executable(self):
+                # needed for ocrd_tool lookup
+                return executable
+        processor_class = FullBashlibProcessor
+    else:
+        # we have no true metadata file, so fill in just to make it work
+        class UnknownBashlibProcessor(BashlibProcessor):
+            @property
+            def ocrd_tool(self):
+                # needed to satisfy the validator
+                return {'executable': '',
+                        # required now
+                        'input_file_grp_cardinality': 1,
+                        'output_file_grp_cardinality': 1,
+                        'steps': ['']
+                }
+            @property
+            def version(self):
+                # needed to satisfy the validator and wrapper
+                return '1.0'
+        processor_class = UnknownBashlibProcessor
+
+    ocrd_cli_wrap_processor(processor_class, **kwargs)
diff --git a/src/ocrd/cli/ocrd_tool.py b/src/ocrd/cli/ocrd_tool.py
index 2a7fa99ec9..3ceaba40c5 100644
--- a/src/ocrd/cli/ocrd_tool.py
+++ b/src/ocrd/cli/ocrd_tool.py
@@ -17,7 +17,6 @@
 from ocrd.processor import Processor
 from ocrd_utils import (
     set_json_key_value_overrides,
-    VERSION as OCRD_VERSION,
     parse_json_string_or_file,
     parse_json_string_with_comments as loads
 )
@@ -29,7 +28,29 @@ def __init__(self, filename):
         self.filename = filename
         with codecs.open(filename, encoding='utf-8') as f:
             self.content = f.read()
+            # perhaps the validator should _always_ run (for default expansion)
+            # so validate command only for the report?
             self.json = loads(self.content)
+        self.tool_name = ''
+
+        class BashProcessor(Processor):
+            @property
+            def metadata(inner_self): # pylint: disable=no-self-argument,arguments-renamed
+                return self.json
+            @property
+            def executable(inner_self): # pylint: disable=no-self-argument,arguments-renamed
+                return self.tool_name
+            @property
+            def moduledir(inner_self): # pylint: disable=no-self-argument,arguments-renamed
+                return os.path.dirname(self.filename)
+            # set docstrings to empty
+            __doc__ = None
+            # HACK: override the module-level docstring, too
+            getmodule(OcrdToolCtx).__doc__ = None
+            def process(inner_self): # pylint: disable=no-self-argument,arguments-renamed
+                return super()
+
+        self.processor = BashProcessor
 
 pass_ocrd_tool = click.make_pass_decorator(OcrdToolCtx)
 
@@ -98,48 +119,25 @@ def ocrd_tool_tool_description(ctx):
 @ocrd_tool_tool.command('list-resources', help="List tool's file resources")
 @pass_ocrd_tool
 def ocrd_tool_tool_list_resources(ctx):
-    class BashProcessor(Processor):
-        @property
-        def moduledir(self):
-            return os.path.dirname(ctx.filename)
-    BashProcessor(None, ocrd_tool=ctx.json['tools'][ctx.tool_name],
-                  list_resources=True)
+    ctx.processor(None).list_resources()
 
 @ocrd_tool_tool.command('resolve-resource', help="Get a tool's file resource full path name")
 @click.argument('res_name')
 @pass_ocrd_tool
 def ocrd_tool_tool_resolve_resource(ctx, res_name):
-    class BashProcessor(Processor):
-        @property
-        def moduledir(self):
-            return os.path.dirname(ctx.filename)
-    BashProcessor(None, ocrd_tool=ctx.json['tools'][ctx.tool_name],
-                  resolve_resource=res_name)
+    print(ctx.processor(None).resolve_resource(res_name))
 
 @ocrd_tool_tool.command('show-resource', help="Dump a tool's file resource")
 @click.argument('res_name')
 @pass_ocrd_tool
 def ocrd_tool_tool_show_resource(ctx, res_name):
-    class BashProcessor(Processor):
-        @property
-        def moduledir(self):
-            return os.path.dirname(ctx.filename)
-    BashProcessor(None, ocrd_tool=ctx.json['tools'][ctx.tool_name],
-                  show_resource=res_name)
+    ctx.processor(None).show_resource(res_name)
 
 @ocrd_tool_tool.command('help', help="Generate help for processors")
 @click.argument('subcommand', required=False)
 @pass_ocrd_tool
 def ocrd_tool_tool_params_help(ctx, subcommand):
-    class BashProcessor(Processor):
-        # set docstrings to empty
-        __doc__ = None
-        # HACK: override the module-level docstring, too
-        getmodule(OcrdToolCtx).__doc__ = None
-        def process(self):
-            return super()
-    BashProcessor(None, ocrd_tool=ctx.json['tools'][ctx.tool_name],
-                  show_help=True, subcommand=subcommand)
+    ctx.processor(None).show_help(subcommand=subcommand)
 
 # ----------------------------------------------------------------------
 # ocrd ocrd-tool tool categories
diff --git a/src/ocrd/cli/validate.py b/src/ocrd/cli/validate.py
index b26803d053..a1ec8fafd6 100644
--- a/src/ocrd/cli/validate.py
+++ b/src/ocrd/cli/validate.py
@@ -40,7 +40,7 @@ def validate_cli():
 @click.argument('ocrd_tool', required=False, nargs=1)
 def validate_ocrd_tool(ocrd_tool):
     '''
-    Validate OCRD_TOOL as an ocrd-tool.json file.
+    Validate OCRD_TOOL as an `ocrd-tool.json` file.
     '''
     if not ocrd_tool:
         ocrd_tool = 'ocrd-tool.json'
@@ -102,16 +102,19 @@ def validate_page(page, **kwargs):
 @validate_cli.command('tasks')
 @click.option('--workspace', nargs=1, required=False, help='Workspace directory these tasks are to be run. If omitted, only validate syntax')
 @click.option('-M', '--mets-basename', nargs=1, default=DEFAULT_METS_BASENAME, help='Basename of the METS file, used in conjunction with --workspace')
+@click.option('-U', '--mets-server-url', help='TCP host URI or UDS path of METS server')
 @click.option('--overwrite', is_flag=True, default=False, help='When checking against a concrete workspace, simulate overwriting output or page range.')
 @click.option('-g', '--page-id', help="ID(s) of the pages to process")
 @click.argument('tasks', nargs=-1, required=True)
-def validate_process(tasks, workspace, mets_basename, overwrite, page_id):
+def validate_process(tasks, workspace, mets_basename, mets_server_url, overwrite, page_id):
     '''
-    Validate a sequence of tasks passable to 'ocrd process'
+    Validate a sequence of tasks passable to `ocrd process`
     '''
     if workspace:
-        _inform_of_result(validate_tasks([ProcessorTask.parse(t) for t in tasks],
-            Workspace(Resolver(), directory=workspace, mets_basename=mets_basename), page_id=page_id, overwrite=overwrite))
+        _inform_of_result(validate_tasks(
+            [ProcessorTask.parse(t) for t in tasks],
+            Workspace(Resolver(), directory=workspace, mets_basename=mets_basename, mets_server_url=mets_server_url),
+            page_id=page_id, overwrite=overwrite))
     else:
         for t in [ProcessorTask.parse(t) for t in tasks]:
             _inform_of_result(t.validate())
diff --git a/src/ocrd/cli/workspace.py b/src/ocrd/cli/workspace.py
index 0c70fd3a36..77797b3037 100644
--- a/src/ocrd/cli/workspace.py
+++ b/src/ocrd/cli/workspace.py
@@ -6,7 +6,7 @@
     :nested: full
 """
 import os
-from os import getcwd, rmdir, unlink
+from os import rmdir, unlink
 from os.path import dirname, relpath, normpath, exists, join, isabs, isdir
 from pathlib import Path
 from json import loads, dumps
@@ -14,7 +14,6 @@
 from glob import glob   # XXX pathlib.Path.glob does not support absolute globs
 import re
 import time
-import numpy as np
 
 import click
 
@@ -37,6 +36,17 @@ def __init__(self, directory, mets_url, mets_basename=DEFAULT_METS_BASENAME, met
                 = self.resolver.resolve_mets_arguments(directory, mets_url, mets_basename, mets_server_url)
         self.automatic_backup = automatic_backup
 
+    def workspace(self):
+        return Workspace(
+            self.resolver,
+            directory=self.directory,
+            mets_basename=self.mets_basename,
+            automatic_backup=self.automatic_backup,
+            mets_server_url=self.mets_server_url,
+        )
+    def backup_manager(self):
+        return WorkspaceBackupManager(self.workspace())
+
 
 pass_workspace = click.make_pass_decorator(WorkspaceCtx)
 
@@ -118,7 +128,7 @@ def workspace_validate(ctx, mets_url, download, skip, page_textequiv_consistency
 @workspace_cli.command('clone', cls=command_with_replaced_help(
     (r' \[WORKSPACE_DIR\]', ''))) # XXX deprecated argument
 @click.option('-f', '--clobber-mets', help="Overwrite existing METS file", default=False, is_flag=True)
-@click.option('-a', '--download', is_flag=True, help="Download all files and change location in METS file after cloning")
+@click.option('-a', '--download', is_flag=True, help="Download all selected files and add local path references in METS file afterwards")
 @click.argument('mets_url')
 @mets_find_options
 # XXX deprecated
@@ -129,20 +139,25 @@ def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mim
     Create a workspace from METS_URL and return the directory
 
     METS_URL can be a URL, an absolute path or a path relative to $PWD.
-    If METS_URL is not provided, use --mets accordingly.
     METS_URL can also be an OAI-PMH GetRecord URL wrapping a METS file.
+
+    Additional options pertain to the selection of files / fileGrps / pages
+    to be downloaded, if --download is used.
     """
     LOG = getLogger('ocrd.cli.workspace.clone')
     if workspace_dir:
         LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR clone' instead of argument 'WORKSPACE_DIR' ('%s')" % workspace_dir))
         ctx.directory = workspace_dir
 
+    assert not ctx.mets_server_url, \
+        f"clone cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
     workspace = ctx.resolver.workspace_from_url(
         mets_url,
         dst_dir=ctx.directory,
         mets_basename=ctx.mets_basename,
         clobber_mets=clobber_mets,
         download=download,
+        fileGrp=file_grp,
         ID=file_id,
         pageId=page_id,
         mimetype=mimetype,
@@ -171,10 +186,12 @@ def workspace_init(ctx, clobber_mets, directory):
     if directory:
         LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR init' instead of argument 'DIRECTORY' ('%s')" % directory))
         ctx.directory = directory
+    assert not ctx.mets_server_url, \
+        f"init cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
     workspace = ctx.resolver.workspace_from_nothing(
         directory=ctx.directory,
         mets_basename=ctx.mets_basename,
-        clobber_mets=clobber_mets
+        clobber_mets=clobber_mets,
     )
     workspace.save_mets()
     print(workspace.directory)
@@ -198,13 +215,7 @@ def workspace_add_file(ctx, file_grp, file_id, mimetype, page_id, ignore, check_
     Add a file or http(s) URL FNAME to METS in a workspace.
     If FNAME is not an http(s) URL and is not a workspace-local existing file, try to copy to workspace.
     """
-    workspace = Workspace(
-        ctx.resolver,
-        directory=ctx.directory,
-        mets_basename=ctx.mets_basename,
-        automatic_backup=ctx.automatic_backup,
-        mets_server_url=ctx.mets_server_url,
-    )
+    workspace = ctx.workspace()
 
     log = getLogger('ocrd.cli.workspace.add')
     if not mimetype:
@@ -308,15 +319,10 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
           echo PHYS_0002 BIN FILE_0002_BIN BIN/FILE_0002_BIN.xml; \\
         } | ocrd workspace bulk-add -r '(?P<pageid>.*) (?P<filegrp>.*) (?P<fileid>.*) (?P<local_filename>.*)' \\
           -G '{{ filegrp }}' -g '{{ pageid }}' -i '{{ fileid }}' -S '{{ local_filename }}' -
+
     """
     log = getLogger('ocrd.cli.workspace.bulk-add') # pylint: disable=redefined-outer-name
-    workspace = Workspace(
-        ctx.resolver,
-        directory=ctx.directory,
-        mets_basename=ctx.mets_basename,
-        automatic_backup=ctx.automatic_backup,
-        mets_server_url=ctx.mets_server_url,
-    )
+    workspace = ctx.workspace()
 
     try:
         pat = re.compile(regex)
@@ -407,7 +413,7 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
         if dry_run:
             log.info('workspace.add_file(%s)' % file_dict)
         else:
-            workspace.add_file(fileGrp, ignore=ignore, force=force, **file_dict)
+            workspace.add_file(fileGrp, ignore=ignore, force=force, **file_dict) # pylint: disable=redundant-keyword-arg
 
     # save changes to disk
     workspace.save_mets()
@@ -451,13 +457,8 @@ def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, incl
     snake_to_camel = {"file_id": "ID", "page_id": "pageId", "file_grp": "fileGrp"}
     output_field = [snake_to_camel.get(x, x) for x in output_field]
     modified_mets = False
-    ret = list()
-    workspace = Workspace(
-        ctx.resolver,
-        directory=ctx.directory,
-        mets_basename=ctx.mets_basename,
-        mets_server_url=ctx.mets_server_url,
-    )
+    ret = []
+    workspace = ctx.workspace()
     with pushd_popd(workspace.directory):
         for f in workspace.find_files(
                 file_id=file_id,
@@ -507,7 +508,9 @@ def workspace_remove_file(ctx, id, force, keep_file):  # pylint: disable=redefin
     (If any ``ID`` starts with ``//``, then its remainder
      will be interpreted as a regular expression.)
     """
-    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
+    assert not ctx.mets_server_url, \
+        f"remove cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
+    workspace = ctx.workspace()
     for i in id:
         workspace.remove_file(i, force=force, keep_file=keep_file)
     workspace.save_mets()
@@ -525,7 +528,9 @@ def rename_group(ctx, old, new):
     """
     Rename fileGrp (USE attribute ``NEW`` to ``OLD``).
     """
-    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)
+    assert not ctx.mets_server_url, \
+        f"rename-group cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
+    workspace = ctx.workspace()
     workspace.rename_file_group(old, new)
     workspace.save_mets()
 
@@ -546,7 +551,9 @@ def remove_group(ctx, group, recursive, force, keep_files):
     (If any ``GROUP`` starts with ``//``, then its remainder
      will be interpreted as a regular expression.)
     """
-    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)
+    assert not ctx.mets_server_url, \
+        f"remove-group cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
+    workspace = ctx.workspace()
     for g in group:
         workspace.remove_file_group(g, recursive=recursive, force=force, keep_files=keep_files)
     workspace.save_mets()
@@ -568,7 +575,9 @@ def prune_files(ctx, file_grp, mimetype, page_id, file_id):
     (If any ``FILTER`` starts with ``//``, then its remainder
      will be interpreted as a regular expression.)
     """
-    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
+    assert not ctx.mets_server_url, \
+        f"prune-files cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
+    workspace = ctx.workspace()
     with pushd_popd(workspace.directory):
         for f in workspace.find_files(
             file_id=file_id,
@@ -605,8 +614,7 @@ def clean(ctx, dry_run, directories, path_glob):
     If no PATH_GLOB are specified, then all files and directories
     may match.
     """
-    log = getLogger('ocrd.cli.workspace.clean')
-    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
+    workspace = ctx.workspace()
     allowed_files = [normpath(f.local_filename) for f in workspace.find_files(local_only=True)]
     allowed_files.append(relpath(workspace.mets_target, start=workspace.directory))
     allowed_dirs = set(dirname(path) for path in allowed_files)
@@ -624,7 +632,7 @@ def clean(ctx, dry_run, directories, path_glob):
             if normpath(path) in allowed_files:
                 continue
             if dry_run:
-                log.info('unlink(%s)' % path)
+                ctx.log.info('unlink(%s)' % path)
             else:
                 unlink(path)
         if not directories:
@@ -634,7 +642,7 @@ def clean(ctx, dry_run, directories, path_glob):
             if normpath(path) in allowed_dirs:
                 continue
             if dry_run:
-                log.info('rmdir(%s)' % path)
+                ctx.log.info('rmdir(%s)' % path)
             else:
                 rmdir(path)
 
@@ -648,7 +656,7 @@ def list_groups(ctx):
     """
     List fileGrp USE attributes
     """
-    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)
+    workspace = ctx.workspace()
     print("\n".join(workspace.mets.file_groups))
 
 # ----------------------------------------------------------------------
@@ -674,20 +682,16 @@ def list_pages(ctx, output_field, output_format, chunk_number, chunk_index, page
     (If any ``FILTER`` starts with ``//``, then its remainder
      will be interpreted as a regular expression.)
     """
-    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)
-    find_kwargs = {}
-    if page_id_range and 'ID' in output_field:
-        find_kwargs['pageId'] = page_id_range
-    page_ids = sorted({x.pageId for x in workspace.mets.find_files(**find_kwargs) if x.pageId})
+    workspace = ctx.workspace()
     ret = []
-
-    if output_field == ['ID']:
-        ret = [[x] for x in page_ids]
-    else:
-        for i, page_div in enumerate(workspace.mets.get_physical_pages(for_pageIds=','.join(page_ids), return_divs=True)):
+    if page_id_range or list(output_field) != ['ID']:
+        for i, page_div in enumerate(workspace.mets.get_physical_pages(for_pageIds=page_id_range, return_divs=True)):
             ret.append([])
             for k in output_field:
                 ret[i].append(page_div.get(k, 'None'))
+    else:
+        for page_id in workspace.mets.physical_pages:
+            ret.append([page_id])
 
     if numeric_range:
         start, end = map(int, numeric_range.split('..'))
@@ -721,7 +725,7 @@ def get_id(ctx):
     """
     Get METS id if any
     """
-    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)
+    workspace = ctx.workspace()
     ID = workspace.mets.unique_identifier
     if ID:
         print(ID)
@@ -741,13 +745,13 @@ def set_id(ctx, id):   # pylint: disable=redefined-builtin
 
     Otherwise will create a new <mods:identifier type="purl">{{ ID }}</mods:identifier>.
     """
-    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
+    workspace = ctx.workspace()
     workspace.mets.unique_identifier = id
     workspace.save_mets()
 
 @workspace_cli.command('update-page')
 @click.option('--set', 'attr_value_pairs', help=f"set mets:div ATTR to VALUE. possible keys: {METS_PAGE_DIV_ATTRIBUTE.names()}", metavar="ATTR VALUE", nargs=2, multiple=True)
-@click.option('--order', help="[DEPRECATED - use --set ATTR VALUE", metavar='ORDER')               
+@click.option('--order', help="[DEPRECATED - use --set ATTR VALUE", metavar='ORDER')
 @click.option('--orderlabel', help="DEPRECATED - use --set ATTR VALUE", metavar='ORDERLABEL')
 @click.option('--contentids', help="DEPRECATED - use --set ATTR VALUE", metavar='ORDERLABEL')
 @click.argument('PAGE_ID')
@@ -756,7 +760,7 @@ def update_page(ctx, attr_value_pairs, order, orderlabel, contentids, page_id):
     """
     Update the @ID, @ORDER, @ORDERLABEL, @LABEL or @CONTENTIDS attributes of the mets:div with @ID=PAGE_ID
     """
-    update_kwargs = {k: v for k, v in attr_value_pairs}
+    update_kwargs = dict(attr_value_pairs)
     if order:
         update_kwargs['ORDER'] = order
     if orderlabel:
@@ -764,7 +768,9 @@ def update_page(ctx, attr_value_pairs, order, orderlabel, contentids, page_id):
     if contentids:
         update_kwargs['CONTENTIDS'] = contentids
     try:
-        workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
+        assert not ctx.mets_server_url, \
+            f"update-page cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
+        workspace = ctx.workspace()
         workspace.mets.update_physical_page_attributes(page_id, **update_kwargs)
         workspace.save_mets()
     except Exception as err:
@@ -802,7 +808,9 @@ def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pa
     mets_path = Path(mets_path)
     if filegrp_mapping:
         filegrp_mapping = loads(filegrp_mapping)
-    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
+    assert not ctx.mets_server_url, \
+        f"merge cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}"
+    workspace = ctx.workspace()
     other_workspace = Workspace(ctx.resolver, directory=str(mets_path.parent), mets_basename=str(mets_path.name))
     workspace.merge(
         other_workspace,
@@ -826,11 +834,12 @@ def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pa
 # ----------------------------------------------------------------------
 
 @workspace_cli.group('backup')
-@click.pass_context
+@pass_workspace
 def workspace_backup_cli(ctx): # pylint: disable=unused-argument
     """
     Backing and restoring workspaces - dev edition
     """
+    assert not ctx.mets_server_url, "Workspace backups currently not interoperable with METS Server"
 
 @workspace_backup_cli.command('add')
 @pass_workspace
@@ -838,7 +847,7 @@ def workspace_backup_add(ctx):
     """
     Create a new backup
     """
-    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))
+    backup_manager = ctx.backup_manager()
     backup_manager.add()
 
 @workspace_backup_cli.command('list')
@@ -847,7 +856,7 @@ def workspace_backup_list(ctx):
     """
     List backups
     """
-    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))
+    backup_manager = ctx.backup_manager()
     for b in backup_manager.list():
         print(b)
 
@@ -859,7 +868,7 @@ def workspace_backup_restore(ctx, choose_first, bak):
     """
     Restore backup BAK
     """
-    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))
+    backup_manager = ctx.backup_manager()
     backup_manager.restore(bak, choose_first)
 
 @workspace_backup_cli.command('undo')
@@ -868,7 +877,7 @@ def workspace_backup_undo(ctx):
     """
     Restore the last backup
     """
-    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))
+    backup_manager = ctx.backup_manager()
     backup_manager.undo()
 
 
@@ -885,15 +894,24 @@ def workspace_serve_cli(ctx): # pylint: disable=unused-argument
 @workspace_serve_cli.command('stop')
 @pass_workspace
 def workspace_serve_stop(ctx): # pylint: disable=unused-argument
-    """Stop the METS server"""
-    workspace = Workspace(
-        ctx.resolver,
-        directory=ctx.directory,
-        mets_basename=ctx.mets_basename,
-        mets_server_url=ctx.mets_server_url,
-    )
+    """Stop the METS server (saving changes to disk)"""
+    workspace = ctx.workspace()
     workspace.mets.stop()
 
+@workspace_serve_cli.command('reload')
+@pass_workspace
+def workspace_serve_reload(ctx): # pylint: disable=unused-argument
+    """Reload the METS server from disk"""
+    workspace = ctx.workspace()
+    workspace.mets.reload()
+
+@workspace_serve_cli.command('save')
+@pass_workspace
+def workspace_serve_save(ctx): # pylint: disable=unused-argument
+    """Save the METS changes to disk"""
+    workspace = ctx.workspace()
+    workspace.mets.save()
+
 @workspace_serve_cli.command('start')
 @pass_workspace
 def workspace_serve_start(ctx): # pylint: disable=unused-argument
diff --git a/src/ocrd/decorators/__init__.py b/src/ocrd/decorators/__init__.py
index 580a75b0c0..f659bf58a0 100644
--- a/src/ocrd/decorators/__init__.py
+++ b/src/ocrd/decorators/__init__.py
@@ -1,4 +1,5 @@
 import sys
+from contextlib import nullcontext
 
 from ocrd_utils import (
     config,
@@ -9,9 +10,9 @@
     parse_json_string_with_comments,
     set_json_key_value_overrides,
     parse_json_string_or_file,
+    redirect_stderr_and_stdout_to_file,
 )
 from ocrd_validators import WorkspaceValidator
-from ocrd_network import ProcessingWorker, ProcessorServer, AgentType
 
 from ..resolver import Resolver
 from ..processor.base import ResourceNotFoundError, run_processor
@@ -21,8 +22,6 @@
 from .ocrd_cli_options import ocrd_cli_options
 from .mets_find_options import mets_find_options
 
-SUBCOMMANDS = [AgentType.PROCESSING_WORKER, AgentType.PROCESSOR_SERVER]
-
 
 def ocrd_cli_wrap_processor(
     processorClass,
@@ -36,6 +35,8 @@ def ocrd_cli_wrap_processor(
     profile_file=None,
     version=False,
     overwrite=False,
+    debug=False,
+    resolve_resource=None,
     show_resource=None,
     list_resources=False,
     # ocrd_network params start #
@@ -47,82 +48,79 @@ def ocrd_cli_wrap_processor(
     # ocrd_network params end #
     **kwargs
 ):
+    # FIXME: remove workspace arg entirely
+    processor = processorClass(None)
     if not sys.argv[1:]:
-        processorClass(None, show_help=True)
+        processor.show_help(subcommand=subcommand)
         sys.exit(1)
-    if dump_json or dump_module_dir or help or version or show_resource or list_resources:
-        processorClass(
-            None,
-            dump_json=dump_json,
-            dump_module_dir=dump_module_dir,
-            show_help=help,
-            subcommand=subcommand,
-            show_version=version,
-            show_resource=show_resource,
-            list_resources=list_resources
-        )
+    if help:
+        processor.show_help(subcommand=subcommand)
+        sys.exit()
+    if version:
+        processor.show_version()
+        sys.exit()
+    if dump_json:
+        processor.dump_json()
+        sys.exit()
+    if dump_module_dir:
+        processor.dump_module_dir()
+        sys.exit()
+    if resolve_resource:
+        try:
+            res = processor.resolve_resource(resolve_resource)
+            print(res)
+            sys.exit()
+        except ResourceNotFoundError as e:
+            log = getLogger('ocrd.processor.base')
+            log.critical(e.message)
+            sys.exit(1)
+    if show_resource:
+        try:
+            processor.show_resource(show_resource)
+            sys.exit()
+        except ResourceNotFoundError as e:
+            log = getLogger('ocrd.processor.base')
+            log.critical(e.message)
+            sys.exit(1)
+    if list_resources:
+        processor.list_resources()
         sys.exit()
-    if subcommand:
+    if subcommand or address or queue or database:
         # Used for checking/starting network agents for the WebAPI architecture
         check_and_run_network_agent(processorClass, subcommand, address, database, queue)
-    elif address or queue or database:
-        raise ValueError(f"Subcommand options --address --queue and --database are only valid for subcommands: {SUBCOMMANDS}")
 
+    # from here: single-run processing context
     initLogging()
-
-    LOG = getLogger('ocrd.cli_wrap_processor')
-    assert kwargs['input_file_grp'] is not None
-    assert kwargs['output_file_grp'] is not None
-    # LOG.info('kwargs=%s' % kwargs)
     if 'parameter' in kwargs:
         # Disambiguate parameter file/literal, and resolve file
-        # (but avoid entering processing context of constructor)
-        class DisposableSubclass(processorClass):
-            def show_version(self):
-                pass
-        disposable = DisposableSubclass(None, show_version=True)
         def resolve(name):
             try:
-                return disposable.resolve_resource(name)
+                return processor.resolve_resource(name)
             except ResourceNotFoundError:
                 return None
         kwargs['parameter'] = parse_json_string_or_file(*kwargs['parameter'],
                                                         resolve_preset_file=resolve)
     else:
-        kwargs['parameter'] = dict()
+        kwargs['parameter'] = {}
     # Merge parameter overrides and parameters
     if 'parameter_override' in kwargs:
-        set_json_key_value_overrides(kwargs['parameter'], *kwargs['parameter_override'])
-    # TODO OCR-D/core#274
+        set_json_key_value_overrides(kwargs['parameter'], *kwargs.pop('parameter_override'))
     # Assert -I / -O
-    # if not kwargs['input_file_grp']:
-    #     raise ValueError('-I/--input-file-grp is required')
-    # if not kwargs['output_file_grp']:
-    #     raise ValueError('-O/--output-file-grp is required')
+    if not kwargs['input_file_grp']:
+        raise ValueError('-I/--input-file-grp is required')
+    if not kwargs['output_file_grp']:
+        raise ValueError('-O/--output-file-grp is required')
     resolver = Resolver()
     working_dir, mets, _, mets_server_url = \
             resolver.resolve_mets_arguments(working_dir, mets, None, mets_server_url)
     workspace = resolver.workspace_from_url(mets, working_dir, mets_server_url=mets_server_url)
     page_id = kwargs.get('page_id')
-    # XXX not possible while processors do not adhere to # https://github.com/OCR-D/core/issues/505
-    # if overwrite
-    #     if 'output_file_grp' not in kwargs or not kwargs['output_file_grp']:
-    #         raise Exception("--overwrite requires --output-file-grp")
-    #     LOG.info("Removing files because of --overwrite")
-    #     for grp in kwargs['output_file_grp'].split(','):
-    #         if page_id:
-    #             for one_page_id in kwargs['page_id'].split(','):
-    #                 LOG.debug("Removing files in output file group %s with page ID %s", grp, one_page_id)
-    #                 for file in workspace.mets.find_files(pageId=one_page_id, fileGrp=grp):
-    #                     workspace.remove_file(file, force=True, keep_file=False, page_recursive=True)
-    #         else:
-    #             LOG.debug("Removing all files in output file group %s ", grp)
-    #             # TODO: can be reduced to `page_same_group=True` as soon as core#505 has landed (in all processors)
-    #             workspace.remove_file_group(grp, recursive=True, force=True, keep_files=False, page_recursive=True, page_same_group=False)
-    #     workspace.save_mets()
-    # XXX While https://github.com/OCR-D/core/issues/505 is open, set 'overwrite_mode' globally on the workspace
+    if debug:
+        config.OCRD_MISSING_INPUT = 'ABORT'
+        config.OCRD_MISSING_OUTPUT = 'ABORT'
+        config.OCRD_EXISTING_OUTPUT = 'ABORT'
     if overwrite:
-        workspace.overwrite_mode = True
+        config.OCRD_EXISTING_OUTPUT = 'OVERWRITE'
     report = WorkspaceValidator.check_file_grp(workspace, kwargs['input_file_grp'], '' if overwrite else kwargs['output_file_grp'], page_id)
     if not report.is_valid:
         raise Exception("Invalid input/output file grps:\n\t%s" % '\n\t'.join(report.errors))
@@ -139,22 +137,31 @@ def resolve(name):
         print("Profiling...")
         pr = cProfile.Profile()
         pr.enable()
-        def exit():
+        def goexit():
             pr.disable()
             print("Profiling completed")
             if profile_file:
-                with open(profile_file, 'wb') as f:
-                    pr.dump_stats(profile_file)
+                pr.dump_stats(profile_file)
             s = io.StringIO()
             pstats.Stats(pr, stream=s).sort_stats("cumulative").print_stats()
             print(s.getvalue())
-        atexit.register(exit)
-    run_processor(processorClass, mets_url=mets, workspace=workspace, **kwargs)
+        atexit.register(goexit)
+    if log_filename:
+        log_ctx = redirect_stderr_and_stdout_to_file(log_filename)
+    else:
+        log_ctx = nullcontext()
+    with log_ctx:
+        run_processor(processorClass, mets_url=mets, workspace=workspace, **kwargs)
 
 
 def check_and_run_network_agent(ProcessorClass, subcommand: str, address: str, database: str, queue: str):
     """
     """
+    from ocrd_network import ProcessingWorker, ProcessorServer, AgentType
+    SUBCOMMANDS = [AgentType.PROCESSING_WORKER, AgentType.PROCESSOR_SERVER]
+
+    if not subcommand:
+        raise ValueError(f"Subcommand options --address --queue and --database are only valid for subcommands: {SUBCOMMANDS}")
     if subcommand not in SUBCOMMANDS:
         raise ValueError(f"SUBCOMMAND can only be one of {SUBCOMMANDS}")
 
diff --git a/src/ocrd/decorators/ocrd_cli_options.py b/src/ocrd/decorators/ocrd_cli_options.py
index e640a20032..a401264ed2 100644
--- a/src/ocrd/decorators/ocrd_cli_options.py
+++ b/src/ocrd/decorators/ocrd_cli_options.py
@@ -33,21 +33,23 @@ def cli(mets_url):
         option('-O', '--output-file-grp', default=None),
         option('-g', '--page-id'),
         option('--overwrite', is_flag=True, default=False),
+        option('--debug', is_flag=True, default=False),
         option('--profile', is_flag=True, default=False),
         option('--profile-file', type=Path(dir_okay=False, writable=True)),
         parameter_option,
         parameter_override_option,
         loglevel_option,
+        option('--log-filename', default=None),
         option('--address', type=ServerAddressParamType()),
         option('--queue', type=QueueServerParamType()),
         option('--database', type=DatabaseParamType()),
+        option('-R', '--resolve-resource'),
         option('-C', '--show-resource'),
         option('-L', '--list-resources', is_flag=True, default=False),
         option('-J', '--dump-json', is_flag=True, default=False),
         option('-D', '--dump-module-dir', is_flag=True, default=False),
         option('-h', '--help', is_flag=True, default=False),
         option('-V', '--version', is_flag=True, default=False),
-        option('--log-filename', default=None),
         # Subcommand, only used for 'worker'/'server'. Cannot be handled in
         # click because processors use the @command decorator and even if they
         # were using `group`, you cannot combine have a command with
diff --git a/src/ocrd/decorators/parameter_option.py b/src/ocrd/decorators/parameter_option.py
index 0fbe3e0577..2f8be3d868 100644
--- a/src/ocrd/decorators/parameter_option.py
+++ b/src/ocrd/decorators/parameter_option.py
@@ -1,16 +1,16 @@
 from click import option
-#from ocrd_utils import parse_json_string_or_file
 
 __all__ = ['parameter_option', 'parameter_override_option']
 
 
 def _handle_param_option(ctx, param, value):
+    from ocrd_utils import parse_json_string_or_file
     return parse_json_string_or_file(*list(value))
 
 parameter_option = option('-p', '--parameter',
                                 help="Parameters, either JSON string or path to JSON file",
                                 multiple=True,
-                                default=['{}'],
+                                default=[],
                                 # now handled in ocrd_cli_wrap_processor to resolve processor preset files
                                 # callback=_handle_param_option
                                 callback=lambda ctx, param, kv: list(kv))
diff --git a/src/ocrd/lib.bash b/src/ocrd/lib.bash
index 1e3ecfc6eb..52bde30258 100644
--- a/src/ocrd/lib.bash
+++ b/src/ocrd/lib.bash
@@ -27,12 +27,22 @@ ocrd__log () {
 ## Ensure minimum version
 # ht https://stackoverflow.com/posts/4025065
 ocrd__minversion () {
-    local minversion="$1"
-    local version=$(ocrd --version|sed 's/ocrd, version //')
-    #echo "$minversion < $version?"
-    local IFS=.
-    version=($version)
-    minversion=($minversion)
+    set -e
+    local minversion_raw="$1"
+    local version_raw=$(ocrd --version|sed 's/ocrd, version //')
+    local version_mmp=$(echo "$version_raw" | grep -Eo '([0-9]+\.?){3}')
+    local version_prerelease_suffix="${version_raw#$version_mmp}"
+    if [[ -z $version_prerelease_suffix ]];then
+      version_prerelease_suffix=0
+    fi
+    local minversion_mmp=$(echo "$minversion_raw" | grep -Eo '([0-9]+\.?){3}')
+    local minversion_prerelease_suffix="${minversion_raw#$minversion_mmp}"
+    if [[ -z $minversion_prerelease_suffix ]];then
+      minversion_prerelease_suffix=0
+    fi
+    local IFS='.'
+    version=($version_mmp)
+    minversion=($minversion_mmp)
     # MAJOR > MAJOR
     if (( ${version[0]} > ${minversion[0]} ));then
         return
@@ -44,12 +54,17 @@ ocrd__minversion () {
         # MINOR == MINOR
         elif (( ${version[1]} == ${minversion[1]} ));then
             # PATCH > PATCH
-            if (( ${version[2]} >= ${minversion[2]} ));then
+            if (( ${version[2]} > ${minversion[2]} ));then
                 return
+            elif (( ${version[2]} == ${minversion[2]}));then
+              # Match prerelease suffix like a1, b1 alphabetically
+              if [ "$version_prerelease_suffix" = "$minversion_prerelease_suffix" -o "$version_prerelease_suffix" \> "$minversion_prerelease_suffix" ]; then
+                return
+              fi
             fi
         fi
     fi
-    ocrd__raise "ocrd/core is too old (${version[*]} < ${minversion[*]}). Please update OCR-D/core"
+    ocrd__raise "ocrd/core is too old ($version_raw < $minversion_raw). Please update OCR-D/core"
 }
 
 ## ### `ocrd__dumpjson`
@@ -108,6 +123,7 @@ ocrd__usage () {
 ## declare -A ocrd__argv=()
 ## ```
 ocrd__parse_argv () {
+    set -e
 
     # if [[ -n "$ZSH_VERSION" ]];then
     #     print -r -- ${+ocrd__argv} ${(t)ocrd__argv}
@@ -120,11 +136,16 @@ ocrd__parse_argv () {
         ocrd__raise "Must set \$params (declare -A params)"
     fi
 
+    if ! declare -p "params_json" >/dev/null 2>/dev/null ;then
+        ocrd__raise "Must set \$params_json (declare params_json)"
+    fi
+
     if [[ $# = 0 ]];then
         ocrd__usage
         exit 1
     fi
 
+    ocrd__argv[debug]=false
     ocrd__argv[overwrite]=false
     ocrd__argv[profile]=false
     ocrd__argv[profile_file]=
@@ -141,6 +162,7 @@ ocrd__parse_argv () {
     while [[ "${1:-}" = -* ]];do
         case "$1" in
             -l|--log-level) ocrd__argv[log_level]=$2 ; shift ;;
+            --log-filename) exec 2> "$2" ; shift ;;
             -h|--help|--usage) ocrd__usage; exit ;;
             -J|--dump-json) ocrd__dumpjson; exit ;;
             -D|--dump-module-dir) echo $(dirname "$OCRD_TOOL_JSON"); exit ;;
@@ -154,6 +176,7 @@ ocrd__parse_argv () {
             -w|--working-dir) ocrd__argv[working_dir]=$(realpath "$2") ; shift ;;
             -m|--mets) ocrd__argv[mets_file]=$(realpath "$2") ; shift ;;
             -U|--mets-server-url) ocrd__argv[mets_server_url]="$2" ; shift ;;
+            --debug) ocrd__argv[debug]=true ;;
             --overwrite) ocrd__argv[overwrite]=true ;;
             --profile) ocrd__argv[profile]=true ;;
             --profile-file) ocrd__argv[profile_file]=$(realpath "$2") ; shift ;;
@@ -226,17 +249,6 @@ ocrd__parse_argv () {
         trap showtime DEBUG
     fi
 
-    # check fileGrps
-    local _valopts=( --workspace "${ocrd__argv[working_dir]}" --mets-basename "$(basename ${ocrd__argv[mets_file]})" )
-    if [[ ${ocrd__argv[overwrite]} = true ]]; then
-        _valopts+=( --overwrite )
-    fi
-    if [[ -n "${ocrd__argv[page_id]:-}" ]]; then
-        _valopts+=( --page-id "${ocrd__argv[page_id]}" )
-    fi
-    _valopts+=( "${OCRD_TOOL_NAME#ocrd-} -I ${ocrd__argv[input_file_grp]} -O ${ocrd__argv[output_file_grp]} ${__parameters[*]@Q} ${__parameter_overrides[*]@Q}" )
-    ocrd validate tasks "${_valopts[@]}" || exit $?
-
     # check parameters
     local params_parsed retval
     params_parsed="$(ocrd ocrd-tool "$OCRD_TOOL_JSON" tool $OCRD_TOOL_NAME parse-params "${__parameters[@]}" "${__parameter_overrides[@]}")" || {
@@ -245,10 +257,12 @@ ocrd__parse_argv () {
 $params_parsed"
     }
     eval "$params_parsed"
+    params_json="$(ocrd ocrd-tool "$OCRD_TOOL_JSON" tool $OCRD_TOOL_NAME parse-params --json "${__parameters[@]}" "${__parameter_overrides[@]}")"
 
 }
 
 ocrd__wrap () {
+    set -e
 
     declare -gx OCRD_TOOL_JSON="$1"
     declare -gx OCRD_TOOL_NAME="$2"
@@ -256,6 +270,7 @@ ocrd__wrap () {
     shift
     declare -Agx params
     params=()
+    declare -g params_json
     declare -Agx ocrd__argv
     ocrd__argv=()
 
@@ -277,20 +292,26 @@ ocrd__wrap () {
 
     ocrd__parse_argv "$@"
 
-    i=0
-    declare -ag ocrd__files=()
-    while read line; do
-        eval declare -Ag "ocrd__file$i=( $line )"
-        eval "ocrd__files[$i]=ocrd__file$i"
-        let ++i
-    done < <(ocrd bashlib input-files \
+    declare -ag ocrd__files
+    IFS=$'\n'
+    ocrd__files=( $(ocrd bashlib input-files \
+                  --ocrd-tool $OCRD_TOOL_JSON \
+                  --executable $OCRD_TOOL_NAME \
+                  $(if [[ ${ocrd__argv[debug]} = true ]]; then echo --debug; fi) \
+                  $(if [[ ${ocrd__argv[overwrite]} = true ]]; then echo --overwrite; fi) \
                   -m "${ocrd__argv[mets_file]}" \
+                  -d "${ocrd__argv[working_dir]}" \
+                  ${ocrd__argv[mets_server_url]:+-U} ${ocrd__argv[mets_server_url]:-} \
+                  -p "$params_json" \
                   -I "${ocrd__argv[input_file_grp]}" \
                   -O "${ocrd__argv[output_file_grp]}" \
-                  ${ocrd__argv[page_id]:+-g} ${ocrd__argv[page_id]:-})
+                  ${ocrd__argv[page_id]:+-g} ${ocrd__argv[page_id]:-}) )
+    IFS=$' \t\n'
 }
 
 ## usage: pageId=$(ocrd__input_file 3 pageId)
 ocrd__input_file() {
-    eval echo "\${${ocrd__files[$1]}[$2]}"
+    declare -A input_file
+    eval input_file=( "${ocrd__files[$1]}" )
+    eval echo "${input_file[$2]}"
 }
diff --git a/src/ocrd/mets_server.py b/src/ocrd/mets_server.py
index 0d4c0a0785..101727e064 100644
--- a/src/ocrd/mets_server.py
+++ b/src/ocrd/mets_server.py
@@ -21,7 +21,7 @@
 import uvicorn
 
 from ocrd_models import OcrdFile, ClientSideOcrdFile, OcrdAgent, ClientSideOcrdAgent
-from ocrd_utils import getLogger, deprecated_alias
+from ocrd_utils import getLogger
 
 
 #
@@ -88,6 +88,14 @@ def create(file_groups: List[str]):
         return OcrdFileGroupListModel(file_groups=file_groups)
 
 
+class OcrdPageListModel(BaseModel):
+    physical_pages: List[str] = Field()
+
+    @staticmethod
+    def create(physical_pages: List[str]):
+        return OcrdPageListModel(physical_pages=physical_pages)
+
+
 class OcrdAgentListModel(BaseModel):
     agents: List[OcrdAgentModel] = Field()
 
@@ -120,7 +128,7 @@ class ClientSideOcrdMets:
 
     def __init__(self, url, workspace_path: Optional[str] = None):
         self.protocol = "tcp" if url.startswith("http://") else "uds"
-        self.log = getLogger(f"ocrd.mets_client[{url}]")
+        self.log = getLogger(f"ocrd.models.ocrd_mets.client.{url}")
         self.url = url if self.protocol == "tcp" else f'http+unix://{url.replace("/", "%2F")}'
         self.ws_dir_path = workspace_path if workspace_path else None
 
@@ -210,6 +218,17 @@ def workspace_path(self):
             ).json()["text"]
             return self.ws_dir_path
 
+    @property
+    def physical_pages(self) -> List[str]:
+        if not self.multiplexing_mode:
+            return self.session.request("GET", f"{self.url}/physical_pages").json()["physical_pages"]
+        else:
+            return self.session.request(
+                "POST",
+                self.url,
+                json=MpxReq.physical_pages(self.ws_dir_path)
+            ).json()["physical_pages"]
+
     @property
     def file_groups(self):
         if not self.multiplexing_mode:
@@ -236,7 +255,7 @@ def agents(self):
             agent_dict["_type"] = agent_dict.pop("type")
         return [ClientSideOcrdAgent(None, **agent_dict) for agent_dict in agent_dicts]
 
-    def add_agent(self, *args, **kwargs):
+    def add_agent(self, **kwargs):
         if not self.multiplexing_mode:
             return self.session.request("POST", f"{self.url}/agent", json=OcrdAgentModel.create(**kwargs).dict())
         else:
@@ -247,11 +266,9 @@ def add_agent(self, *args, **kwargs):
             ).json()
             return OcrdAgentModel.create(**kwargs)
 
-    @deprecated_alias(ID="file_id")
-    @deprecated_alias(pageId="page_id")
-    @deprecated_alias(fileGrp="file_grp")
     def find_files(self, **kwargs):
         self.log.debug("find_files(%s)", kwargs)
+        # translate from native OcrdMets kwargs to OcrdMetsServer REST params
         if "pageId" in kwargs:
             kwargs["page_id"] = kwargs.pop("pageId")
         if "ID" in kwargs:
@@ -277,28 +294,31 @@ def find_files(self, **kwargs):
     def find_all_files(self, *args, **kwargs):
         return list(self.find_files(*args, **kwargs))
 
-    @deprecated_alias(pageId="page_id")
-    @deprecated_alias(ID="file_id")
     def add_file(
-        self, file_grp, content=None, file_id=None, url=None, local_filename=None, mimetype=None, page_id=None, **kwargs
+        self, file_grp, content=None, ID=None, url=None, local_filename=None, mimetype=None, pageId=None, **kwargs
     ):
         data = OcrdFileModel.create(
-            file_id=file_id, file_grp=file_grp, page_id=page_id, mimetype=mimetype, url=url,
-            local_filename=local_filename
+            file_grp=file_grp,
+            # translate from native OcrdMets kwargs to OcrdMetsServer REST params
+            file_id=ID, page_id=pageId,
+            mimetype=mimetype, url=url, local_filename=local_filename
         )
+        # add force+ignore
+        kwargs = {**kwargs, **data.dict()}
 
         if not self.multiplexing_mode:
-            r = self.session.request("POST", f"{self.url}/file", data=data.dict())
-            if not r:
-                raise RuntimeError("Add file failed. Please check provided parameters")
+            r = self.session.request("POST", f"{self.url}/file", data=kwargs)
+            if not r.ok:
+                raise RuntimeError(f"Failed to add file ({str(data)}): {r.json()}")
         else:
-            r = self.session.request("POST", self.url, json=MpxReq.add_file(self.ws_dir_path, data.dict()))
-            if "error" in r:
-                raise RuntimeError(f"Add file failed: Msg: {r['error']}")
+            r = self.session.request("POST", self.url, json=MpxReq.add_file(self.ws_dir_path, kwargs))
+            if not r.ok:
+                raise RuntimeError(f"Failed to add file ({str(data)}): {r.json()[errors]}")
 
         return ClientSideOcrdFile(
-            None, ID=file_id, fileGrp=file_grp, url=url, pageId=page_id, mimetype=mimetype,
-            local_filename=local_filename
+            None, fileGrp=file_grp,
+            ID=ID, pageId=pageId,
+            url=url, mimetype=mimetype, local_filename=local_filename
         )
 
 
@@ -348,6 +368,11 @@ def workspace_path(ws_dir_path: str) -> Dict:
         return MpxReq.__args_wrapper(
             ws_dir_path, method_type="GET", response_type="text", request_url="workspace_path", request_data={})
 
+    @staticmethod
+    def physical_pages(ws_dir_path: str) -> Dict:
+        return MpxReq.__args_wrapper(
+            ws_dir_path, method_type="GET", response_type="dict", request_url="physical_pages", request_data={})
+
     @staticmethod
     def file_groups(ws_dir_path: str) -> Dict:
         return MpxReq.__args_wrapper(
@@ -404,7 +429,6 @@ def create_process(mets_server_url: str, ws_dir_path: str, log_file: str) -> int
     @staticmethod
     def kill_process(mets_server_pid: int):
         subprocess_run(args=["kill", "-s", "SIGINT", f"{mets_server_pid}"], shell=False, universal_newlines=True)
-        return
 
     def shutdown(self):
         if self.is_uds:
@@ -468,6 +492,10 @@ async def unique_identifier():
         async def workspace_path():
             return Response(content=workspace.directory, media_type="text/plain")
 
+        @app.get(path='/physical_pages', response_model=OcrdPageListModel)
+        async def physical_pages():
+            return {'physical_pages': workspace.mets.physical_pages}
+
         @app.get(path='/file_groups', response_model=OcrdFileGroupListModel)
         async def file_groups():
             return {'file_groups': workspace.mets.file_groups}
@@ -507,7 +535,8 @@ async def add_file(
             page_id: Optional[str] = Form(),
             mimetype: str = Form(),
             url: Optional[str] = Form(None),
-            local_filename: Optional[str] = Form(None)
+            local_filename: Optional[str] = Form(None),
+            force: bool = Form(False),
         ):
             """
             Add a file
@@ -519,7 +548,7 @@ async def add_file(
             )
             # Add to workspace
             kwargs = file_resource.dict()
-            workspace.add_file(**kwargs)
+            workspace.add_file(**kwargs, force=force)
             return file_resource
 
         # ------------- #
diff --git a/src/ocrd/processor/__init__.py b/src/ocrd/processor/__init__.py
index 21b0c69eb2..7cbcb851de 100644
--- a/src/ocrd/processor/__init__.py
+++ b/src/ocrd/processor/__init__.py
@@ -1,9 +1,15 @@
 from .base import (
     Processor,
-    ResourceNotFoundError
+    ResourceNotFoundError,
+    NonUniqueInputFile,
+    MissingInputFile,
+    generate_processor_help,
+)
+from .ocrd_page_result import (
+    OcrdPageResult,
+    OcrdPageResultImage
 )
 from .helpers import (
     run_cli,
     run_processor,
-    generate_processor_help
 )
diff --git a/src/ocrd/processor/base.py b/src/ocrd/processor/base.py
index 8303413933..d6348b40e1 100644
--- a/src/ocrd/processor/base.py
+++ b/src/ocrd/processor/base.py
@@ -9,34 +9,68 @@
     'run_processor'
 ]
 
-from os.path import exists
+from functools import cached_property
+from os.path import exists, join
 from shutil import copyfileobj
 import json
 import os
 from os import getcwd
 from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union, get_args
 import sys
+import logging
+import logging.handlers
+import inspect
 import tarfile
 import io
-from ocrd.workspace import Workspace
+from collections import defaultdict
+from frozendict import frozendict
+# concurrent.futures is buggy in py38,
+# this is where the fixes came from:
+from loky import Future, ProcessPoolExecutor
+import multiprocessing as mp
+from threading import Timer
+from _thread import interrupt_main
 
+from click import wrap_text
+from deprecated import deprecated
+from requests import HTTPError
+
+from ..workspace import Workspace
+from ..mets_server import ClientSideOcrdMets
+from ocrd_models.ocrd_file import OcrdFileType
+from .ocrd_page_result import OcrdPageResult
 from ocrd_utils import (
     VERSION as OCRD_VERSION,
     MIMETYPE_PAGE,
     MIME_TO_EXT,
+    config,
     getLogger,
-    initLogging,
     list_resource_candidates,
     pushd_popd,
     list_all_resources,
     get_processor_resource_types,
     resource_filename,
+    parse_json_file_with_comments,
+    make_file_id,
+    deprecation_warning
 )
 from ocrd_validators import ParameterValidator
-from ocrd_models.ocrd_page import MetadataItemType, LabelType, LabelsType
+from ocrd_models.ocrd_page import (
+    PageType,
+    AlternativeImageType,
+    MetadataItemType,
+    LabelType,
+    LabelsType,
+    OcrdPage,
+    to_xml,
+)
+from ocrd_modelfactory import page_from_file
+from ocrd_validators.ocrd_tool_validator import OcrdToolValidator
 
 # XXX imports must remain for backwards-compatibility
-from .helpers import run_cli, run_processor, generate_processor_help # pylint: disable=unused-import
+from .helpers import run_cli, run_processor # pylint: disable=unused-import
+
 
 class ResourceNotFoundError(FileNotFoundError):
     """
@@ -46,159 +80,785 @@ class ResourceNotFoundError(FileNotFoundError):
     def __init__(self, name, executable):
         self.name = name
         self.executable = executable
-        self.message = "Could not find resource '%s' for executable '%s'. " \
-                       "Try 'ocrd resmgr download %s %s' to download this resource." \
-                       % (name, executable, executable, name)
+        self.message = (f"Could not find resource '{name}' for executable '{executable}'. "
+                        f"Try 'ocrd resmgr download {executable} {name}' to download this resource.")
+        super().__init__(self.message)
+
+class NonUniqueInputFile(ValueError):
+    """
+    An exception signifying the specified fileGrp / pageId / mimetype
+    selector yields multiple PAGE files, or no PAGE files but multiple images,
+    or multiple files of that mimetype.
+    """
+    def __init__(self, fileGrp, pageId, mimetype):
+        self.fileGrp = fileGrp
+        self.pageId = pageId
+        self.mimetype = mimetype
+        self.message = (f"Could not determine unique input file for fileGrp {fileGrp} "
+                        f"and pageId {pageId} under mimetype {mimetype or 'PAGE+image(s)'}")
+        super().__init__(self.message)
+
+class MissingInputFile(ValueError):
+    """
+    An exception signifying the specified fileGrp / pageId / mimetype
+    selector yields no PAGE files, or no PAGE and no image files,
+    or no files of that mimetype.
+    """
+    def __init__(self, fileGrp, pageId, mimetype):
+        self.fileGrp = fileGrp
+        self.pageId = pageId
+        self.mimetype = mimetype
+        self.message = (f"Could not find input file for fileGrp {fileGrp} "
+                        f"and pageId {pageId} under mimetype {mimetype or 'PAGE+image(s)'}")
         super().__init__(self.message)
 
+class DummyFuture:
+    """
+    Mimics some of `concurrent.futures.Future` but runs immediately.
+    """
+    def __init__(self, fn, *args, **kwargs):
+        self.fn = fn
+        self.args = args
+        self.kwargs = kwargs
+    def result(self):
+        return self.fn(*self.args, **self.kwargs)
+class DummyExecutor:
+    """
+    Mimics some of `concurrent.futures.ProcessPoolExecutor` but runs
+    everything immediately in this process.
+    """
+    def __init__(self, initializer=None, initargs=(), **kwargs):
+        initializer(*initargs)
+    def shutdown(self, **kwargs):
+        pass
+    def submit(self, fn, *args, **kwargs) -> DummyFuture:
+        return DummyFuture(fn, *args, **kwargs)
+
+TFuture = Union[DummyFuture, Future]
+TExecutor = Union[DummyExecutor, ProcessPoolExecutor]
+
 class Processor():
     """
-    A processor is a tool that implements the uniform OCR-D command-line interface
-    for run-time data processing. That is, it executes a single workflow step,
-    or a combination of workflow steps, on the workspace (represented by local METS).
-    It reads input files for all or requested physical pages of the input fileGrp(s),
-    and writes output files for them into the output fileGrp(s). It may take 
-    a number of optional or mandatory parameters.
+    A processor is a tool that implements the uniform OCR-D
+    `command-line interface for run-time data processing <https://ocr-d.de/en/spec/cli>`_.
+
+    That is, it executes a single workflow step, or a combination of workflow steps,
+    on the workspace (represented by local METS). It reads input files for all or selected
+    physical pages of the input fileGrp(s), computes additional annotation, and writes output
+    files for them into the output fileGrp(s). It may take a number of optional or mandatory
+    parameters.
+    """
+
+    max_instances : int = -1
+    """
+    maximum number of cached instances (ignored if negative), to be applied on top of
+    :py:data:`~ocrd_utils.config.OCRD_MAX_PROCESSOR_CACHE` (i.e. whatever is smaller).
+
+    (Override this if you know how many instances fit into memory - GPU / CPU RAM - at once.)
+    """
+
+    max_workers : int = -1
+    """
+    maximum number of processor forks for page-parallel processing (ignored if negative),
+    to be applied on top of :py:data:`~ocrd_utils.config.OCRD_MAX_PARALLEL_PAGES` (i.e.
+    whatever is smaller).
+
+    (Override this if you know how many pages fit into processing units - GPU shaders / CPU cores
+    - at once, or if your class already creates threads prior to forking, e.g. during ``setup``.)
+    """
+
+    max_page_seconds : int = -1
+    """
+    maximum number of seconds may be spent processing a single page (ignored if negative),
+    to be applied on top of :py:data:`~ocrd_utils.config.OCRD_PROCESSING_PAGE_TIMEOUT`
+    (i.e. whatever is smaller).
+
+    (Override this if you know how costly this processor may be, irrespective of image size
+    or complexity of the page.)
     """
 
+    @property
+    def metadata_filename(self) -> str:
+        """
+        Relative location of the ``ocrd-tool.json`` file inside the package.
+
+        Used by :py:data:`metadata_location`.
+
+        (Override if ``ocrd-tool.json`` is not in the root of the module,
+        e.g. ``namespace/ocrd-tool.json`` or ``data/ocrd-tool.json``).
+        """
+        return 'ocrd-tool.json'
+
+    @cached_property
+    def metadata_location(self) -> Path:
+        """
+        Absolute path of the ``ocrd-tool.json`` file as distributed with the package.
+
+        Used by :py:data:`metadata_rawdict`.
+
+        (Override if ``ocrd-tool.json`` is not distributed with the Python package.)
+        """
+        module = inspect.getmodule(self)
+        module_tokens = module.__package__.split('.')
+        # for namespace packages, we cannot just use the first token
+        for i in range(len(module_tokens)):
+            prefix = '.'.join(module_tokens[:i + 1])
+            if sys.modules[prefix].__spec__.has_location:
+                return resource_filename(prefix, self.metadata_filename)
+        raise Exception("cannot find top-level module prefix for %s", module.__package__)
+
+    @cached_property
+    def metadata_rawdict(self) -> dict:
+        """
+        Raw (unvalidated, unexpanded) ``ocrd-tool.json`` dict contents of the package.
+
+        Used by :py:data:`metadata`.
+
+        (Override if ``ocrd-tool.json`` is not in a file.)
+        """
+        return parse_json_file_with_comments(self.metadata_location)
+
+    @cached_property
+    def metadata(self) -> dict:
+        """
+        The ``ocrd-tool.json`` dict contents of the package, according to the OCR-D
+        `spec <https://ocr-d.de/en/spec/ocrd_tool>`_ for processor tools.
+
+        After deserialisation, it also gets validated against the
+        `schema <https://ocr-d.de/en/spec/ocrd_tool#definition>`_ with all defaults
+        expanded.
+
+        Used by :py:data:`ocrd_tool` and :py:data:`version`.
+
+        (Override if you want to provide metadata programmatically instead of a
+        JSON file.)
+        """
+        metadata = self.metadata_rawdict
+        report = OcrdToolValidator.validate(metadata)
+        if not report.is_valid:
+            self.logger.error(f"The ocrd-tool.json of this processor is {'problematic' if not report.errors else 'invalid'}:\n"
+                              f"{report.to_xml()}.\nPlease open an issue at {metadata.get('git_url', 'the website')}.")
+        return metadata
+
+    @cached_property
+    def version(self) -> str:
+        """
+        The program version of the package.
+        Usually the ``version`` part of :py:data:`metadata`.
+
+        (Override if you do not want to use :py:data:`metadata` lookup
+        mechanism.)
+        """
+        return self.metadata['version']
+
+    @cached_property
+    def executable(self) -> str:
+        """
+        The executable name of this processor tool. Taken from the runtime
+        filename.
+
+        Used by :py:data:`ocrd_tool` for lookup in :py:data:`metadata`.
+
+        (Override if your entry-point name deviates from the ``executable``
+        name, or the processor gets instantiated from another runtime.)
+        """
+        return os.path.basename(inspect.stack()[-1].filename)
+
+    @cached_property
+    def ocrd_tool(self) -> dict:
+        """
+        The ``ocrd-tool.json`` dict contents of this processor tool.
+        Usually the :py:data:`executable` key of the ``tools`` part
+        of :py:data:`metadata`.
+
+        (Override if you do not want to use :py:data:`metadata` lookup
+        mechanism.)
+        """
+        return self.metadata['tools'][self.executable]
+
+    @property
+    def parameter(self) -> Optional[dict]:
+        """the runtime parameter dict to be used by this processor"""
+        if hasattr(self, '_parameter'):
+            return self._parameter
+        return None
+
+    @parameter.setter
+    def parameter(self, parameter : dict) -> None:
+        if self.parameter is not None:
+            self.shutdown()
+        parameterValidator = ParameterValidator(self.ocrd_tool)
+        report = parameterValidator.validate(parameter)
+        if not report.is_valid:
+            raise ValueError(f'Invalid parameters:\n{report.to_xml()}')
+        # make parameter dict read-only
+        self._parameter = frozendict(parameter)
+        # (re-)run setup to load models etc
+        self.setup()
+
     def __init__(
             self,
-            workspace : Workspace,
+            # FIXME: remove in favor of process_workspace(workspace)
+            workspace : Optional[Workspace],
             ocrd_tool=None,
             parameter=None,
             input_file_grp=None,
             output_file_grp=None,
             page_id=None,
-            resolve_resource=None,
-            show_resource=None,
-            list_resources=False,
-            show_help=False,
-            subcommand=None,
-            show_version=False,
-            dump_json=False,
-            dump_module_dir=False,
+            download_files=config.OCRD_DOWNLOAD_INPUT,
             version=None
     ):
         """
-        Instantiate, but do not process. Unless ``list_resources`` or
-        ``show_resource`` or ``show_help`` or ``show_version`` or
-        ``dump_json`` or ``dump_module_dir`` is true, setup for processing
-        (parsing and validating parameters, entering the workspace directory).
+        Instantiate, but do not setup (neither for processing nor other usage).
+        If given, do parse and validate :py:data:`.parameter`.
 
         Args:
              workspace (:py:class:`~ocrd.Workspace`): The workspace to process. \
-                 Can be ``None`` even for processing (esp. on multiple workspaces), \
-                 but then needs to be set before running.
+                 If not ``None``, then `chdir` to that directory.
+                 Deprecated since version 3.0: Should be ``None`` here, but then needs to be set \
+                 before processing.
         Keyword Args:
-             ocrd_tool (string): JSON of the ocrd-tool description for that processor. \
-                 Can be ``None`` for processing, but needs to be set before running.
              parameter (string): JSON of the runtime choices for ocrd-tool ``parameters``. \
                  Can be ``None`` even for processing, but then needs to be set before running.
-             input_file_grp (string): comma-separated list of METS ``fileGrp``s used for input.
-             output_file_grp (string): comma-separated list of METS ``fileGrp``s used for output.
+             input_file_grp (string): comma-separated list of METS ``fileGrp`` used for input. \
+                 Deprecated since version 3.0: Should be ``None`` here, but then needs to be set \
+                 before processing.
+             output_file_grp (string): comma-separated list of METS ``fileGrp`` used for output. \
+                 Deprecated since version 3.0: Should be ``None`` here, but then needs to be set \
+                 before processing.
              page_id (string): comma-separated list of METS physical ``page`` IDs to process \
-                 (or empty for all pages).
-             resolve_resource (string): If not ``None``, then instead of processing, resolve \
-                 given resource by name and print its full path to stdout.
-             show_resource (string): If not ``None``, then instead of processing, resolve \
-                 given resource by name and print its contents to stdout.
-             list_resources (boolean): If true, then instead of processing, find all installed \
-                 resource files in the search paths and print their path names.
-             show_help (boolean): If true, then instead of processing, print a usage description \
-                 including the standard CLI and all of this processor's ocrd-tool parameters and \
-                 docstrings.
-             subcommand (string): 'worker' or 'server', only used here for the right --help output
-             show_version (boolean): If true, then instead of processing, print information on \
-                 this processor's version and OCR-D version. Exit afterwards.
-             dump_json (boolean): If true, then instead of processing, print :py:attr:`ocrd_tool` \
-                 on stdout.
-             dump_module_dir (boolean): If true, then instead of processing, print :py:attr:`moduledir` \
-                 on stdout.
-        """
-        self.ocrd_tool = ocrd_tool
-        if dump_json:
-            print(json.dumps(ocrd_tool, indent=True))
-            return
-        if dump_module_dir:
-            print(self.moduledir)
-            return
-        if list_resources:
-            for res in self.list_all_resources():
-                print(res)
-            return
-        if resolve_resource:
-            try:
-                res = self.resolve_resource(resolve_resource)
-                print(res)
-            except ResourceNotFoundError as e:
-                log = getLogger('ocrd.processor.base')
-                log.critical(e.message)
-                sys.exit(1)
-            return
-        if show_resource:
-            try:
-                self.show_resource(show_resource)
-            except ResourceNotFoundError as e:
-                log = getLogger('ocrd.processor.base')
-                log.critical(e.message)
-                sys.exit(1)
-            return
-        if show_help:
-            self.show_help(subcommand=subcommand)
-            return
-        self.version = version
-        if show_version:
-            self.show_version()
-            return
-        self.workspace = workspace
-        # FIXME HACK would be better to use pushd_popd(self.workspace.directory)
-        # but there is no way to do that in process here since it's an
-        # overridden method. chdir is almost always an anti-pattern.
-        if self.workspace:
+                 (or empty for all pages). \
+                 Deprecated since version 3.0: Should be ``None`` here, but then needs to be set \
+                 before processing.
+             download_files (boolean): Whether input files will be downloaded prior to processing, \
+                 defaults to :py:attr:`ocrd_utils.config.OCRD_DOWNLOAD_INPUT` which is ``True`` by default
+        """
+        if ocrd_tool is not None:
+            deprecation_warning("Passing 'ocrd_tool' as keyword argument to Processor is deprecated - "
+                                "use or override metadata/executable/ocrd-tool properties instead")
+            self.ocrd_tool = ocrd_tool
+            self.executable = ocrd_tool['executable']
+        if version is not None:
+            deprecation_warning("Passing 'version' as keyword argument to Processor is deprecated - "
+                                "use or override metadata/version properties instead")
+            self.version = version
+        if workspace is not None:
+            deprecation_warning("Passing a workspace argument other than 'None' to Processor "
+                                "is deprecated - pass as argument to process_workspace instead")
+            self.workspace = workspace
             self.old_pwd = getcwd()
             os.chdir(self.workspace.directory)
-        self.input_file_grp = input_file_grp
-        self.output_file_grp = output_file_grp
-        self.page_id = None if page_id == [] or page_id is None else page_id
-        if parameter is None:
-            parameter = {}
-        parameterValidator = ParameterValidator(ocrd_tool)
-        report = parameterValidator.validate(parameter)
-        if not report.is_valid:
-            raise Exception("Invalid parameters %s" % report.errors)
-        self.parameter = parameter
+        if input_file_grp is not None:
+            deprecation_warning("Passing an input_file_grp kwarg other than 'None' to Processor "
+                                "is deprecated - pass as argument to process_workspace instead")
+            self.input_file_grp = input_file_grp
+        if output_file_grp is not None:
+            deprecation_warning("Passing an output_file_grp kwarg other than 'None' to Processor "
+                                "is deprecated - pass as argument to process_workspace instead")
+            self.output_file_grp = output_file_grp
+        if page_id is not None:
+            deprecation_warning("Passing a page_id kwarg other than 'None' to Processor "
+                                "is deprecated - pass as argument to process_workspace instead")
+            self.page_id = page_id or None
+        self.download = download_files
+        #: The logger to be used by processor implementations.
+        # `ocrd.processor.base` internals should use :py:attr:`self._base_logger`
+        self.logger = getLogger(f'ocrd.processor.{self.__class__.__name__}')
+        self._base_logger = getLogger('ocrd.processor.base')
+        if parameter is not None:
+            self.parameter = parameter
+        # workaround for deprecated#72 (@deprecated decorator does not work for subclasses):
+        setattr(self, 'process',
+                deprecated(version='3.0', reason='process() should be replaced with process_page_pcgts() or process_page_file() or process_workspace()')(getattr(self, 'process')))
+
+    def __del__(self):
+        self._base_logger.debug("shutting down")
+        self.shutdown()
 
     def show_help(self, subcommand=None):
+        """
+        Print a usage description including the standard CLI and all of this processor's ocrd-tool
+        parameters and docstrings.
+        """
         print(generate_processor_help(self.ocrd_tool, processor_instance=self, subcommand=subcommand))
 
     def show_version(self):
+        """
+        Print information on this processor's version and OCR-D version.
+        """
         print("Version %s, ocrd/core %s" % (self.version, OCRD_VERSION))
 
     def verify(self):
         """
-        Verify that the :py:attr:`input_file_grp` fulfills the processor's requirements.
+        Verify that :py:attr:`input_file_grp` and :py:attr:`output_file_grp` fulfill the processor's requirements.
         """
+        # verify input and output file groups in parameters
+        assert self.input_file_grp is not None
+        assert self.output_file_grp is not None
+        input_file_grps = self.input_file_grp.split(',')
+        output_file_grps = self.output_file_grp.split(',')
+        def assert_file_grp_cardinality(grps : List[str], spec : Union[int, List[int]], msg):
+            if isinstance(spec, int):
+                if spec > 0:
+                    assert len(grps) == spec, msg % (len(grps), str(spec))
+            else:
+                assert isinstance(spec, list)
+                minimum = spec[0]
+                maximum = spec[1]
+                if minimum > 0:
+                    assert len(grps) >= minimum, msg % (len(grps), str(spec))
+                if maximum > 0:
+                    assert len(grps) <= maximum, msg % (len(grps), str(spec))
+        # FIXME: enforce unconditionally as soon as grace period for deprecation is over
+        if 'input_file_grp_cardinality' in self.ocrd_tool:
+            assert_file_grp_cardinality(input_file_grps, self.ocrd_tool['input_file_grp_cardinality'],
+                                        "Unexpected number of input file groups %d vs %s")
+        if 'output_file_grp_cardinality' in self.ocrd_tool:
+            assert_file_grp_cardinality(output_file_grps, self.ocrd_tool['output_file_grp_cardinality'],
+                                        "Unexpected number of output file groups %d vs %s")
+        # verify input and output file groups in METS
+        for input_file_grp in input_file_grps:
+            assert input_file_grp in self.workspace.mets.file_groups, \
+                f"input fileGrp {input_file_grp} does not exist in workspace {self.workspace}"
+        for output_file_grp in output_file_grps:
+            assert output_file_grp not in self.workspace.mets.file_groups \
+                or config.OCRD_EXISTING_OUTPUT in ['OVERWRITE', 'SKIP'] \
+                or not any(self.workspace.mets.find_files(
+                    pageId=self.page_id, fileGrp=output_file_grp)), \
+                    f"output fileGrp {output_file_grp} already exists in workspace {self.workspace}"
+        # keep this for backwards compatibility:
         return True
 
+    def dump_json(self):
+        """
+        Print :py:attr:`ocrd_tool` on stdout.
+        """
+        print(json.dumps(self.ocrd_tool, indent=True))
+
+    def dump_module_dir(self):
+        """
+        Print :py:attr:`moduledir` on stdout.
+        """
+        print(self.moduledir)
+
+    def list_resources(self):
+        """
+        Find all installed resource files in the search paths and print their path names.
+        """
+        for res in self.list_all_resources():
+            print(res)
+
+    def setup(self) -> None:
+        """
+        Prepare the processor for actual data processing,
+        prior to changing to the workspace directory but
+        after parsing parameters.
+
+        (Override this to load models into memory etc.)
+        """
+        pass
+
+    def shutdown(self) -> None:
+        """
+        Bring down the processor after data processing,
+        after to changing back from the workspace directory but
+        before exiting (or setting up with different parameters).
+
+        (Override this to unload models from memory etc.)
+        """
+        pass
+
+    @deprecated(version='3.0', reason='process() should be replaced with process_page_pcgts() or process_page_file() or process_workspace()')
     def process(self) -> None:
         """
-        Process the :py:attr:`workspace` 
-        from the given :py:attr:`input_file_grp`
-        to the given :py:attr:`output_file_grp`
-        for the given :py:attr:`page_id`
-        under the given :py:attr:`parameter`.
-        
-        (This contains the main functionality and needs to be overridden by subclasses.)
+        Process all files of the :py:data:`workspace`
+        from the given :py:data:`input_file_grp`
+        to the given :py:data:`output_file_grp`
+        for the given :py:data:`page_id` (or all pages)
+        under the given :py:data:`parameter`.
+
+        (This contains the main functionality and needs to be
+        overridden by subclasses.)
         """
         raise NotImplementedError()
 
+    def process_workspace(self, workspace: Workspace) -> None:
+        """
+        Process all files of the given ``workspace``,
+        from the given :py:data:`input_file_grp`
+        to the given :py:data:`output_file_grp`
+        for the given :py:data:`page_id` (or all pages)
+        under the given :py:data:`parameter`.
+
+        Delegates to :py:meth:`.process_workspace_submit_tasks`
+        and :py:meth:`.process_workspace_handle_tasks`.
+
+        (This will iterate over pages and files, calling
+        :py:meth:`.process_page_file` and handling exceptions.
+        It should be overridden by subclasses to handle cases
+        like post-processing or computation across pages.)
+        """
+        with pushd_popd(workspace.directory):
+            self.workspace = workspace
+            self.verify()
+            try:
+                # set up multitasking
+                max_workers = max(0, config.OCRD_MAX_PARALLEL_PAGES)
+                if self.max_workers > 0 and self.max_workers < config.OCRD_MAX_PARALLEL_PAGES:
+                    self._base_logger.info("limiting number of threads from %d to %d", max_workers, self.max_workers)
+                    max_workers = self.max_workers
+                if max_workers > 1:
+                    assert isinstance(workspace.mets, ClientSideOcrdMets), \
+                        "OCRD_MAX_PARALLEL_PAGES>1 requires also using --mets-server-url"
+                max_seconds = max(0, config.OCRD_PROCESSING_PAGE_TIMEOUT)
+                if self.max_page_seconds > 0 and self.max_page_seconds < config.OCRD_PROCESSING_PAGE_TIMEOUT:
+                    self._base_logger.info("limiting page timeout from %d to %d sec", max_seconds, self.max_page_seconds)
+                    max_seconds = self.max_page_seconds
+
+                if max_workers > 1:
+                    executor_cls = ProcessPoolExecutor
+                    log_queue = mp.Queue()
+                    # forward messages from log queue (in subprocesses) to all root handlers
+                    log_listener = logging.handlers.QueueListener(log_queue, *logging.root.handlers, respect_handler_level=True)
+                else:
+                    executor_cls = DummyExecutor
+                    log_queue = None
+                    log_listener = None
+                executor = executor_cls(
+                    max_workers=max_workers or 1,
+                    # only forking method avoids pickling
+                    context=mp.get_context('fork'),
+                    # share processor instance as global to avoid pickling
+                    initializer=_page_worker_set_ctxt,
+                    initargs=(self, log_queue),
+                )
+                if max_workers > 1:
+                    log_listener.start()
+                try:
+                    self._base_logger.debug("started executor %s with %d workers", str(executor), max_workers or 1)
+                    tasks = self.process_workspace_submit_tasks(executor, max_seconds)
+                    stats = self.process_workspace_handle_tasks(tasks)
+                finally:
+                    executor.shutdown(kill_workers=True, wait=False)
+                    if max_workers > 1:
+                        log_listener.stop()
+
+            except NotImplementedError:
+                # fall back to deprecated method
+                try:
+                    self.process()
+                except Exception as err:
+                    # suppress the NotImplementedError context
+                    raise err from None
+
+    def process_workspace_submit_tasks(self, executor : TExecutor, max_seconds : int) -> Dict[TFuture, Tuple[str, List[Optional[OcrdFileType]]]]:
+        """
+        Look up all input files of the given ``workspace``
+        from the given :py:data:`input_file_grp`
+        for the given :py:data:`page_id` (or all pages),
+        and schedules calling :py:meth:`.process_page_file`
+        on them for each page via `executor` (enforcing
+        a per-page time limit of `max_seconds`).
+
+        When running with `OCRD_MAX_PARALLEL_PAGES>1` and
+        the workspace via METS Server, the executor will fork
+        this many worker parallel subprocesses each processing
+        one page at a time. (Interprocess communication is
+        done via task and result queues.)
+
+        Otherwise, tasks are run sequentially in the
+        current process.
+
+        Delegates to :py:meth:`.zip_input_files` to get 
+        the input files for each page, and then calls
+        :py:meth:`.process_workspace_submit_page_task`.
+
+        Returns a dict mapping the per-page tasks
+        (i.e. futures submitted to the executor)
+        to their corresponding pageId and input files.
+        """
+        tasks = {}
+        for input_file_tuple in self.zip_input_files(on_error='abort', require_first=False):
+            task, page_id, input_files = self.process_workspace_submit_page_task(executor, max_seconds, input_file_tuple)
+            tasks[task] = (page_id, input_files)
+        self._base_logger.debug("submitted %d processing tasks", len(tasks))
+        return tasks
+
+    def process_workspace_submit_page_task(self, executor : TExecutor, max_seconds : int, input_file_tuple : List[Optional[OcrdFileType]]) -> Tuple[TFuture, str, List[Optional[OcrdFileType]]]:
+        """
+        Ensure all input files for a single page are
+        downloaded to the workspace, then schedule
+        :py:meth:`.process_process_file` to be run on
+        them via `executor` (enforcing a per-page time
+        limit of `max_seconds`).
+
+        Delegates to :py:meth:`.process_page_file`
+        (wrapped in :py:func:`_page_worker` to share
+        the processor instance across forked processes).
+
+        \b
+        Returns a tuple of:
+        - the scheduled future object,
+        - the corresponding pageId,
+        - the corresponding input files.
+        """
+        input_files : List[Optional[OcrdFileType]] = [None] * len(input_file_tuple)
+        page_id = next(input_file.pageId
+                       for input_file in input_file_tuple
+                       if input_file)
+        self._base_logger.info(f"preparing page {page_id}")
+        for i, input_file in enumerate(input_file_tuple):
+            if input_file is None:
+                # file/page not found in this file grp
+                continue
+            input_files[i] = input_file
+            if not self.download:
+                continue
+            try:
+                input_files[i] = self.workspace.download_file(input_file)
+            except (ValueError, FileNotFoundError, HTTPError) as e:
+                self._base_logger.error(repr(e))
+                self._base_logger.warning(f"failed downloading file {input_file} for page {page_id}")
+        # process page
+        #executor.submit(self.process_page_file, *input_files)
+        return executor.submit(_page_worker, max_seconds, *input_files), page_id, input_files
+
+    def process_workspace_handle_tasks(self, tasks : Dict[TFuture, Tuple[str, List[Optional[OcrdFileType]]]]) -> Tuple[int, int, Dict[str, int], int]:
+        """
+        Look up scheduled per-page futures one by one,
+        handle errors (exceptions) and gather results.
+
+        \b
+        Enforces policies configured by the following
+        environment variables:
+        - `OCRD_EXISTING_OUTPUT` (abort/skip/overwrite)
+        - `OCRD_MISSING_OUTPUT` (abort/skip/fallback-copy)
+        - `OCRD_MAX_MISSING_OUTPUTS` (abort after all).
+
+        \b
+        Returns a tuple of:
+        - the number of successfully processed pages
+        - the number of failed (i.e. skipped or copied) pages
+        - a dict of the type and corresponding number of exceptions seen
+        - the number of total requested pages (i.e. success+fail+existing).
+
+        Delegates to :py:meth:`.process_workspace_handle_page_task`
+        for each page.
+        """
+        # aggregate info for logging:
+        nr_succeeded = 0
+        nr_failed = 0
+        nr_errors = defaultdict(int) # count causes
+        if config.OCRD_MISSING_OUTPUT == 'SKIP':
+            reason = "skipped"
+        elif config.OCRD_MISSING_OUTPUT == 'COPY':
+            reason = "fallback-copied"
+        for task in tasks:
+            # wait for results, handle errors
+            page_id, input_files = tasks[task]
+            result = self.process_workspace_handle_page_task(page_id, input_files, task)
+            if isinstance(result, Exception):
+                nr_errors[result.__class__.__name__] += 1
+                nr_failed += 1
+                # FIXME: this is just prospective, because len(tasks)==nr_failed+nr_succeeded is not guaranteed
+                if config.OCRD_MAX_MISSING_OUTPUTS > 0 and nr_failed / len(tasks) > config.OCRD_MAX_MISSING_OUTPUTS:
+                    # already irredeemably many failures, stop short
+                    nr_errors = dict(nr_errors)
+                    raise Exception(f"too many failures with {reason} output ({nr_failed} of {nr_failed+nr_succeeded}, {str(nr_errors)})")
+            elif result:
+                nr_succeeded += 1
+            # else skipped - already exists
+        nr_errors = dict(nr_errors)
+        if nr_failed > 0:
+            nr_all = nr_succeeded + nr_failed
+            if config.OCRD_MAX_MISSING_OUTPUTS > 0 and nr_failed / nr_all > config.OCRD_MAX_MISSING_OUTPUTS:
+                raise Exception(f"too many failures with {reason} output ({nr_failed} of {nr_all}, {str(nr_errors)})")
+            self._base_logger.warning("%s %d of %d pages due to %s", reason, nr_failed, nr_all, str(nr_errors))
+        return nr_succeeded, nr_failed, nr_errors, len(tasks)
+
+    def process_workspace_handle_page_task(self, page_id : str, input_files : List[Optional[OcrdFileType]], task : TFuture) -> Union[bool, Exception]:
+        """
+        \b
+        Await a single page result and handle errors (exceptions), 
+        enforcing policies configured by the following
+        environment variables:
+        - `OCRD_EXISTING_OUTPUT` (abort/skip/overwrite)
+        - `OCRD_MISSING_OUTPUT` (abort/skip/fallback-copy)
+        - `OCRD_MAX_MISSING_OUTPUTS` (abort after all).
+
+        \b
+        Returns
+        - true in case of success
+        - false in case the output already exists
+        - the exception in case of failure
+        """
+        # FIXME: differentiate error cases in various ways:
+        # - ResourceNotFoundError → use ResourceManager to download (once), then retry
+        # - transient (I/O or OOM) error → maybe sleep, retry
+        # - persistent (data) error → skip / dummy / raise
+        try:
+            self._base_logger.debug("waiting for output of task %s (page %s)", task, page_id)
+            # timeout kwarg on future is useless: it only raises TimeoutError here,
+            # but does not stop the running process/thread, and executor itself
+            # offers nothing to that effect:
+            # task.result(timeout=max_seconds or None)
+            # so we instead applied the timeout within the worker function
+            task.result()
+            return True
+        except NotImplementedError:
+            # exclude NotImplementedError, so we can try process() below
+            raise
+        # handle input failures separately
+        except FileExistsError as err:
+            if config.OCRD_EXISTING_OUTPUT == 'ABORT':
+                raise err
+            if config.OCRD_EXISTING_OUTPUT == 'SKIP':
+                return False
+            if config.OCRD_EXISTING_OUTPUT == 'OVERWRITE':
+                # too late here, must not happen
+                raise Exception(f"got {err} despite OCRD_EXISTING_OUTPUT==OVERWRITE")
+        except KeyboardInterrupt:
+            raise
+        # broad coverage of output failures (including TimeoutError)
+        except Exception as err:
+            # FIXME: add re-usable/actionable logging
+            if config.OCRD_MISSING_OUTPUT == 'ABORT':
+                self._base_logger.error(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}")
+                raise err
+            self._base_logger.exception(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}")
+            if config.OCRD_MISSING_OUTPUT == 'SKIP':
+                pass
+            elif config.OCRD_MISSING_OUTPUT == 'COPY':
+                self._copy_page_file(input_files[0])
+            else:
+                desc = config.describe('OCRD_MISSING_OUTPUT', wrap_text=False, indent_text=False)
+                raise ValueError(f"unknown configuration value {config.OCRD_MISSING_OUTPUT} - {desc}")
+            return err
+
+    def _copy_page_file(self, input_file : OcrdFileType) -> None:
+        """
+        Copy the given ``input_file`` of the :py:data:`workspace`,
+        representing one physical page (passed as one opened
+        :py:class:`~ocrd_models.OcrdFile` per input fileGrp)
+        and add it as if it was a processing result.
+        """
+        input_pcgts : OcrdPage
+        assert isinstance(input_file, get_args(OcrdFileType))
+        self._base_logger.debug(f"parsing file {input_file.ID} for page {input_file.pageId}")
+        try:
+            input_pcgts = page_from_file(input_file)
+        except ValueError as err:
+            # not PAGE and not an image to generate PAGE for
+            self._base_logger.error(f"non-PAGE input for page {input_file.pageId}: {err}")
+            return
+        output_file_id = make_file_id(input_file, self.output_file_grp)
+        input_pcgts.set_pcGtsId(output_file_id)
+        self.add_metadata(input_pcgts)
+        self.workspace.add_file(
+            file_id=output_file_id,
+            file_grp=self.output_file_grp,
+            page_id=input_file.pageId,
+            local_filename=os.path.join(self.output_file_grp, output_file_id + '.xml'),
+            mimetype=MIMETYPE_PAGE,
+            content=to_xml(input_pcgts),
+        )
+
+    def process_page_file(self, *input_files : Optional[OcrdFileType]) -> None:
+        """
+        Process the given ``input_files`` of the :py:data:`workspace`,
+        representing one physical page (passed as one opened
+        :py:class:`.OcrdFile` per input fileGrp)
+        under the given :py:data:`.parameter`, and make sure the
+        results get added accordingly.
+
+        (This uses :py:meth:`.process_page_pcgts`, but should be overridden by subclasses
+        to handle cases like multiple output fileGrps, non-PAGE input etc.)
+        """
+        input_pcgts : List[Optional[OcrdPage]] = [None] * len(input_files)
+        assert isinstance(input_files[0], get_args(OcrdFileType))
+        page_id = input_files[0].pageId
+        self._base_logger.info("processing page %s", page_id)
+        for i, input_file in enumerate(input_files):
+            assert isinstance(input_file, get_args(OcrdFileType))
+            self._base_logger.debug(f"parsing file {input_file.ID} for page {page_id}")
+            try:
+                page_ = page_from_file(input_file)
+                assert isinstance(page_, OcrdPage)
+                input_pcgts[i] = page_
+            except ValueError as err:
+                # not PAGE and not an image to generate PAGE for
+                self._base_logger.error(f"non-PAGE input for page {page_id}: {err}")
+        output_file_id = make_file_id(input_files[0], self.output_file_grp)
+        output_file = next(self.workspace.mets.find_files(ID=output_file_id), None)
+        if output_file and config.OCRD_EXISTING_OUTPUT != 'OVERWRITE':
+            # short-cut avoiding useless computation:
+            raise FileExistsError(
+                f"A file with ID=={output_file_id} already exists {output_file} and neither force nor ignore are set"
+            )
+        result = self.process_page_pcgts(*input_pcgts, page_id=page_id)
+        for image_result in result.images:
+            image_file_id = f'{output_file_id}_{image_result.file_id_suffix}'
+            image_file_path = join(self.output_file_grp, f'{image_file_id}.png')
+            if isinstance(image_result.alternative_image, PageType):
+                # special case: not an alternative image, but replacing the original image
+                # (this is needed by certain processors when the original's coordinate system
+                #  cannot or must not be kept)
+                image_result.alternative_image.set_imageFilename(image_file_path)
+                image_result.alternative_image.set_imageWidth(image_result.pil.width)
+                image_result.alternative_image.set_imageHeight(image_result.pil.height)
+            elif isinstance(image_result.alternative_image, AlternativeImageType):
+                image_result.alternative_image.set_filename(image_file_path)
+            elif image_result.alternative_image is None:
+                pass # do not reference in PAGE result
+            else:
+                raise ValueError(f"process_page_pcgts returned an OcrdPageResultImage of unknown type "
+                                 f"{type(image_result.alternative_image)}")
+            self.workspace.save_image_file(
+                image_result.pil,
+                image_file_id,
+                self.output_file_grp,
+                page_id=page_id,
+                file_path=image_file_path,
+            )
+        result.pcgts.set_pcGtsId(output_file_id)
+        self.add_metadata(result.pcgts)
+        self.workspace.add_file(
+            file_id=output_file_id,
+            file_grp=self.output_file_grp,
+            page_id=page_id,
+            local_filename=os.path.join(self.output_file_grp, output_file_id + '.xml'),
+            mimetype=MIMETYPE_PAGE,
+            content=to_xml(result.pcgts),
+        )
+
+    def process_page_pcgts(self, *input_pcgts : Optional[OcrdPage], page_id : Optional[str] = None) -> OcrdPageResult:
+        """
+        Process the given ``input_pcgts`` of the :py:data:`.workspace`,
+        representing one physical page (passed as one parsed
+        :py:class:`.OcrdPage` per input fileGrp)
+        under the given :py:data:`.parameter`, and return the
+        resulting :py:class:`.OcrdPageResult`.
+
+        Optionally, add to the ``images`` attribute of the resulting
+        :py:class:`.OcrdPageResult` instances of :py:class:`.OcrdPageResultImage`,
+        which have required fields for ``pil`` (:py:class:`PIL.Image` image data),
+        ``file_id_suffix`` (used for generating IDs of the saved image) and
+        ``alternative_image`` (reference of the :py:class:`ocrd_models.ocrd_page.AlternativeImageType`
+        for setting the filename of the saved image).
 
-    def add_metadata(self, pcgts):
+        (This contains the main functionality and must be overridden by subclasses,
+        unless it does not get called by some overriden :py:meth:`.process_page_file`.)
+        """
+        raise NotImplementedError()
+
+    def add_metadata(self, pcgts: OcrdPage) -> None:
         """
         Add PAGE-XML :py:class:`~ocrd_models.ocrd_page.MetadataItemType` ``MetadataItem`` describing
-        the processing step and runtime parameters to :py:class:`~ocrd_models.ocrd_page.PcGtsType` ``pcgts``.
+        the processing step and runtime parameters to :py:class:`.OcrdPage` ``pcgts``.
         """
-        pcgts.get_Metadata().add_MetadataItem(
+        metadata_obj = pcgts.get_Metadata()
+        assert metadata_obj is not None
+        metadata_obj.add_MetadataItem(
                 MetadataItemType(type_="processingStep",
                     name=self.ocrd_tool['steps'][0],
                     value=self.ocrd_tool['executable'],
@@ -220,17 +880,16 @@ def add_metadata(self, pcgts):
     def resolve_resource(self, val):
         """
         Resolve a resource name to an absolute file path with the algorithm in
-        https://ocr-d.de/en/spec/ocrd_tool#file-parameters
+        `spec <https://ocr-d.de/en/spec/ocrd_tool#file-parameters>`_
 
         Args:
             val (string): resource value to resolve
         """
-        initLogging()
         executable = self.ocrd_tool['executable']
-        log = getLogger('ocrd.processor.base')
         if exists(val):
-            log.debug("Resolved to absolute path %s" % val)
+            self._base_logger.debug("Resolved to absolute path %s" % val)
             return val
+        # FIXME: remove once workspace arg / old_pwd is gone:
         if hasattr(self, 'old_pwd'):
             cwd = self.old_pwd
         else:
@@ -239,11 +898,19 @@ def resolve_resource(self, val):
                                                          cwd=cwd, moduled=self.moduledir)
                if exists(cand)]
         if ret:
-            log.debug("Resolved %s to absolute path %s" % (val, ret[0]))
+            self._base_logger.debug("Resolved %s to absolute path %s" % (val, ret[0]))
             return ret[0]
         raise ResourceNotFoundError(val, executable)
 
     def show_resource(self, val):
+        """
+        Resolve a resource name to a file path with the algorithm in
+        `spec <https://ocr-d.de/en/spec/ocrd_tool#file-parameters>`_,
+        then print its contents to stdout.
+
+        Args:
+            val (string): resource value to show
+        """
         res_fname = self.resolve_resource(val)
         fpath = Path(res_fname)
         if fpath.is_dir():
@@ -308,8 +975,9 @@ def input_files(self):
           files for that page)
         - Otherwise raise an error (complaining that only PAGE-XML warrants
           having multiple images for a single page)
-        Algorithm <https://github.com/cisocrgroup/ocrd_cis/pull/57#issuecomment-656336593>_
-        
+
+        See `algorithm <https://github.com/cisocrgroup/ocrd_cis/pull/57#issuecomment-656336593>`_
+
         Returns:
             A list of :py:class:`ocrd_models.ocrd_file.OcrdFile` objects.
         """
@@ -350,11 +1018,13 @@ def zip_input_files(self, require_first=True, mimetype=None, on_error='skip'):
         - if ``last``, then the last matching file for the page will be
           silently selected (as if the last was the only match)
         - if ``abort``, then an exception will be raised.
+
         Multiple matches for PAGE-XML will always raise an exception.
 
         Keyword Args:
              require_first (boolean): If true, then skip a page entirely
                  whenever it is not available in the first input `fileGrp`.
+             on_error (string): How to handle multiple file matches per page.
              mimetype (string): If not `None`, filter by the specified MIME
                  type (literal or regex prefixed by `//`). Otherwise prefer
                  PAGE or image.
@@ -364,36 +1034,30 @@ def zip_input_files(self, require_first=True, mimetype=None, on_error='skip'):
         if not self.input_file_grp:
             raise ValueError("Processor is missing input fileGrp")
 
-        LOG = getLogger('ocrd.processor.base')
         ifgs = self.input_file_grp.split(",")
         # Iterating over all files repeatedly may seem inefficient at first sight,
         # but the unnecessary OcrdFile instantiations for posterior fileGrp filtering
         # can actually be much more costly than traversing the ltree.
         # This might depend on the number of pages vs number of fileGrps.
 
-        pages = dict()
+        pages = {}
         for i, ifg in enumerate(ifgs):
             files_ = sorted(self.workspace.mets.find_all_files(
                     pageId=self.page_id, fileGrp=ifg, mimetype=mimetype),
                                 # sort by MIME type so PAGE comes before images
                                 key=lambda file_: file_.mimetype)
-            # Warn if no files found but pageId was specified because that
-            # might be because of invalid page_id (range)
-            if self.page_id and not files_:
-                msg = (f"Could not find any files for --page-id {self.page_id} - "
-                       f"compare '{self.page_id}' with the output of 'orcd workspace list-page'.")
-                if on_error == 'abort':
-                    raise ValueError(msg)
-                LOG.warning(msg)
             for file_ in files_:
                 if not file_.pageId:
+                    # ignore document-global files
                     continue
                 ift = pages.setdefault(file_.pageId, [None]*len(ifgs))
                 if ift[i]:
-                    LOG.debug("another file %s for page %s in input file group %s", file_.ID, file_.pageId, ifg)
+                    self._base_logger.debug(f"another file {file_.ID} for page {file_.pageId} in input file group {ifg}")
                     # fileGrp has multiple files for this page ID
                     if mimetype:
                         # filter was active, this must not happen
+                        self._base_logger.warning(f"added file {file_.ID} for page {file_.pageId} in input file group {ifg} "
+                                                  f"conflicts with file {ift[i].ID} of same MIME type {mimetype} - on_error={on_error}")
                         if on_error == 'skip':
                             ift[i] = None
                         elif on_error == 'first':
@@ -401,9 +1065,7 @@ def zip_input_files(self, require_first=True, mimetype=None, on_error='skip'):
                         elif on_error == 'last':
                             ift[i] = file_
                         elif on_error == 'abort':
-                            raise ValueError(
-                                "Multiple '%s' matches for page '%s' in fileGrp '%s'." % (
-                                    mimetype, file_.pageId, ifg))
+                            raise NonUniqueInputFile(ifg, file_.pageId, mimetype)
                         else:
                             raise Exception("Unknown 'on_error' strategy '%s'" % on_error)
                     elif (ift[i].mimetype == MIMETYPE_PAGE and
@@ -411,11 +1073,11 @@ def zip_input_files(self, require_first=True, mimetype=None, on_error='skip'):
                         pass # keep PAGE match
                     elif (ift[i].mimetype == MIMETYPE_PAGE and
                           file_.mimetype == MIMETYPE_PAGE):
-                        raise ValueError(
-                            "Multiple PAGE-XML matches for page '%s' in fileGrp '%s'." % (
-                                file_.pageId, ifg))
+                        raise NonUniqueInputFile(ifg, file_.pageId, None)
                     else:
                         # filter was inactive but no PAGE is in control, this must not happen
+                        self._base_logger.warning(f"added file {file_.ID} for page {file_.pageId} in input file group {ifg} "
+                                                  f"conflicts with file {ift[i].ID} but no PAGE available - on_error={on_error}")
                         if on_error == 'skip':
                             ift[i] = None
                         elif on_error == 'first':
@@ -423,21 +1085,217 @@ def zip_input_files(self, require_first=True, mimetype=None, on_error='skip'):
                         elif on_error == 'last':
                             ift[i] = file_
                         elif on_error == 'abort':
-                            raise ValueError(
-                                "No PAGE-XML for page '%s' in fileGrp '%s' but multiple matches." % (
-                                    file_.pageId, ifg))
+                            raise NonUniqueInputFile(ifg, file_.pageId, None)
                         else:
                             raise Exception("Unknown 'on_error' strategy '%s'" % on_error)
                 else:
-                    LOG.debug("adding file %s for page %s to input file group %s", file_.ID, file_.pageId, ifg)
+                    self._base_logger.debug(f"adding file {file_.ID} for page {file_.pageId} to input file group {ifg}")
                     ift[i] = file_
-        ifts = list()
+        # Warn if no files found but pageId was specified, because that might be due to invalid page_id (range)
+        if self.page_id and not any(pages):
+            self._base_logger.critical(f"Could not find any files for selected pageId {self.page_id}.\n"
+                                       f"compare '{self.page_id}' with the output of 'orcd workspace list-page'.")
+        ifts = []
         for page, ifiles in pages.items():
             for i, ifg in enumerate(ifgs):
                 if not ifiles[i]:
-                    # other fallback options?
-                    LOG.error('found no page %s in file group %s',
-                              page, ifg)
+                    # could be from non-unique with on_error=skip or from true gap
+                    self._base_logger.error(f'Found no file for page {page} in file group {ifg}')
+                    if config.OCRD_MISSING_INPUT == 'abort':
+                        raise MissingInputFile(ifg, page, mimetype)
+            if not any(ifiles):
+                # must be from non-unique with on_error=skip
+                self._base_logger.warning(f'Found no files for {page} - skipping')
+                continue
             if ifiles[0] or not require_first:
                 ifts.append(tuple(ifiles))
         return ifts
+
+_page_worker_processor = None
+"""
+This global binding for the processor is required to avoid
+squeezing the processor through a mp.Queue (which is impossible
+due to unpicklable attributes like .workspace.mets._tree anyway)
+when calling Processor.process_page_file as page worker processes
+in Processor.process_workspace. Forking allows inheriting global
+objects, and with the METS Server we do not mutate the local
+processor instance anyway.
+"""
+def _page_worker_set_ctxt(processor, log_queue):
+    """
+    Overwrites `ocrd.processor.base._page_worker_processor` instance
+    for sharing with subprocesses in ProcessPoolExecutor initializer.
+    """
+    global _page_worker_processor
+    _page_worker_processor = processor
+    if log_queue:
+        # replace all log handlers with just one queue handler
+        logging.root.handlers = [logging.handlers.QueueHandler(log_queue)]
+
+def _page_worker(timeout, *input_files):
+    """
+    Wraps a `Processor.process_page_file` call as payload (call target)
+    of the ProcessPoolExecutor workers, but also enforces the given timeout.
+    """
+    page_id = next((file.pageId for file in input_files
+                    if hasattr(file, 'pageId')), "")
+    if timeout > 0:
+        timer = Timer(timeout, interrupt_main)
+        timer.start()
+    try:
+        _page_worker_processor.process_page_file(*input_files)
+        _page_worker_processor.logger.debug("page worker completed for page %s", page_id)
+    except KeyboardInterrupt:
+        _page_worker_processor.logger.debug("page worker timed out for page %s", page_id)
+        raise TimeoutError()
+    finally:
+        if timeout > 0:
+            timer.cancel()
+
+def generate_processor_help(ocrd_tool, processor_instance=None, subcommand=None):
+    """Generate a string describing the full CLI of this processor including params.
+
+    Args:
+         ocrd_tool (dict): this processor's ``tools`` section of the module's ``ocrd-tool.json``
+         processor_instance (object, optional): the processor implementation
+             (for adding any module/class/function docstrings)
+        subcommand (string): 'worker' or 'server'
+    """
+    doc_help = ''
+    if processor_instance:
+        module = inspect.getmodule(processor_instance)
+        if module and module.__doc__:
+            doc_help += '\n' + inspect.cleandoc(module.__doc__) + '\n'
+        if processor_instance.__doc__:
+            doc_help += '\n' + inspect.cleandoc(processor_instance.__doc__) + '\n'
+        # Try to find the most concrete docstring among the various methods that an implementation
+        # could overload, first serving.
+        # In doing so, compare with Processor to avoid a glitch in the way py>=3.5 inherits docstrings.
+        # (They are supposed to only repeat information inspect.getdoc, rather than inherit __doc__ itself.)
+        for method in ['process_page_pcgts', 'process_page_file', 'process_workspace', 'process']:
+            instance_method = getattr(processor_instance, method)
+            superclass_method = getattr(Processor, method)
+            if instance_method.__doc__ and instance_method.__doc__ != superclass_method.__doc__:
+                doc_help += '\n' + inspect.cleandoc(instance_method.__doc__) + '\n'
+                break
+        if doc_help:
+            doc_help = '\n\n' + wrap_text(doc_help, width=72,
+                                          initial_indent='  > ',
+                                          subsequent_indent='  > ',
+                                          preserve_paragraphs=True)
+    subcommands = '''\
+    worker      Start a processing worker rather than do local processing
+    server      Start a processor server rather than do local processing
+'''
+
+    processing_worker_options = '''\
+  --queue                         The RabbitMQ server address in format
+                                  "amqp://{user}:{pass}@{host}:{port}/{vhost}"
+                                  [amqp://admin:admin@localhost:5672]
+  --database                      The MongoDB server address in format
+                                  "mongodb://{host}:{port}"
+                                  [mongodb://localhost:27018]
+  --log-filename                  Filename to redirect STDOUT/STDERR to,
+                                  if specified.
+'''
+
+    processing_server_options = '''\
+  --address                       The Processor server address in format
+                                  "{host}:{port}"
+  --database                      The MongoDB server address in format
+                                  "mongodb://{host}:{port}"
+                                  [mongodb://localhost:27018]
+'''
+
+    processing_options = '''\
+  -m, --mets URL-PATH             URL or file path of METS to process [./mets.xml]
+  -w, --working-dir PATH          Working directory of local workspace [dirname(URL-PATH)]
+  -I, --input-file-grp USE        File group(s) used as input
+  -O, --output-file-grp USE       File group(s) used as output
+  -g, --page-id ID                Physical page ID(s) to process instead of full document []
+  --overwrite                     Remove existing output pages/images
+                                  (with "--page-id", remove only those).
+                                  Short-hand for OCRD_EXISTING_OUTPUT=OVERWRITE
+  --debug                         Abort on any errors with full stack trace.
+                                  Short-hand for OCRD_MISSING_OUTPUT=ABORT
+  --profile                       Enable profiling
+  --profile-file PROF-PATH        Write cProfile stats to PROF-PATH. Implies "--profile"
+  -p, --parameter JSON-PATH       Parameters, either verbatim JSON string
+                                  or JSON file path
+  -P, --param-override KEY VAL    Override a single JSON object key-value pair,
+                                  taking precedence over --parameter
+  -U, --mets-server-url URL       URL of a METS Server for parallel incremental access to METS
+                                  If URL starts with http:// start an HTTP server there,
+                                  otherwise URL is a path to an on-demand-created unix socket
+  -l, --log-level [OFF|ERROR|WARN|INFO|DEBUG|TRACE]
+                                  Override log level globally [INFO]
+  --log-filename LOG-PATH         File to redirect stderr logging to (overriding ocrd_logging.conf).
+'''
+
+    information_options = '''\
+  -C, --show-resource RESNAME     Dump the content of processor resource RESNAME
+  -L, --list-resources            List names of processor resources
+  -J, --dump-json                 Dump tool description as JSON
+  -D, --dump-module-dir           Show the 'module' resource location path for this processor
+  -h, --help                      Show this message
+  -V, --version                   Show version
+'''
+
+    parameter_help = ''
+    if 'parameters' not in ocrd_tool or not ocrd_tool['parameters']:
+        parameter_help = '  NONE\n'
+    else:
+        def wrap(s):
+            return wrap_text(s, initial_indent=' '*3,
+                             subsequent_indent=' '*4,
+                             width=72, preserve_paragraphs=True)
+        for param_name, param in ocrd_tool['parameters'].items():
+            parameter_help += wrap('"%s" [%s%s]' % (
+                param_name,
+                param['type'],
+                ' - REQUIRED' if 'required' in param and param['required'] else
+                ' - %s' % json.dumps(param['default']) if 'default' in param else ''))
+            parameter_help += '\n ' + wrap(param['description'])
+            if 'enum' in param:
+                parameter_help += '\n ' + wrap('Possible values: %s' % json.dumps(param['enum']))
+            parameter_help += "\n"
+
+    if not subcommand:
+        return f'''\
+Usage: {ocrd_tool['executable']} [worker|server] [OPTIONS]
+
+  {ocrd_tool['description']}{doc_help}
+
+Subcommands:
+{subcommands}
+Options for processing:
+{processing_options}
+Options for information:
+{information_options}
+Parameters:
+{parameter_help}
+'''
+    elif subcommand == 'worker':
+        return f'''\
+Usage: {ocrd_tool['executable']} worker [OPTIONS]
+
+  Run {ocrd_tool['executable']} as a processing worker.
+
+  {ocrd_tool['description']}{doc_help}
+
+Options:
+{processing_worker_options}
+'''
+    elif subcommand == 'server':
+        return f'''\
+Usage: {ocrd_tool['executable']} server [OPTIONS]
+
+  Run {ocrd_tool['executable']} as a processor sever.
+
+  {ocrd_tool['description']}{doc_help}
+
+Options:
+{processing_server_options}
+'''
+    else:
+        pass
diff --git a/src/ocrd/processor/builtin/dummy/ocrd-tool.json b/src/ocrd/processor/builtin/dummy/ocrd-tool.json
index 30a6d99fd9..ef4a4810fe 100644
--- a/src/ocrd/processor/builtin/dummy/ocrd-tool.json
+++ b/src/ocrd/processor/builtin/dummy/ocrd-tool.json
@@ -1,12 +1,14 @@
 {
+  "version": "1.0.0",
+  "git_url": "https://github.com/OCR-D/core",
   "tools": {
     "ocrd-dummy": {
       "executable": "ocrd-dummy",
       "description": "Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group",
       "steps": ["preprocessing/optimization"],
       "categories": ["Image preprocessing"],
-      "input_file_grp": "DUMMY_INPUT",
-      "output_file_grp": "DUMMY_OUTPUT",
+      "input_file_grp_cardinality": 1,
+      "output_file_grp_cardinality": 1,
       "parameters": {
         "copy_files": {
           "type": "boolean",
diff --git a/src/ocrd/processor/builtin/dummy_processor.py b/src/ocrd/processor/builtin/dummy_processor.py
index 774332a733..72a260968f 100644
--- a/src/ocrd/processor/builtin/dummy_processor.py
+++ b/src/ocrd/processor/builtin/dummy_processor.py
@@ -1,87 +1,82 @@
 # pylint: disable=missing-module-docstring,invalid-name
-from os.path import join, basename
+from os.path import join
+from typing import Optional
 
 import click
 
 from ocrd import Processor
 from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
-from ocrd_models.ocrd_page import to_xml
+from ocrd.processor.ocrd_page_result import OcrdPageResult
+from ocrd_models.ocrd_file import OcrdFileType
+from ocrd_models.ocrd_page import OcrdPage, to_xml
 from ocrd_utils import (
-    getLogger,
-    assert_file_grp_cardinality,
     make_file_id,
     MIME_TO_EXT,
     MIMETYPE_PAGE,
     parse_json_string_with_comments,
-    resource_string
+    resource_string,
+    config
 )
 from ocrd_modelfactory import page_from_file
 
-OCRD_TOOL = parse_json_string_with_comments(resource_string(__package__ + '.dummy', 'ocrd-tool.json'))
-
 class DummyProcessor(Processor):
     """
     Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group
     """
 
-    def process(self) -> None:
-        LOG = getLogger('ocrd.dummy')
-        assert_file_grp_cardinality(self.input_file_grp, 1)
-        assert_file_grp_cardinality(self.output_file_grp, 1)
-        copy_files = self.parameter['copy_files']
-        for input_file in self.input_files:
-            input_file = self.workspace.download_file(input_file)
+    def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult:
+        assert input_pcgts[0]
+        # nothing to do here
+        return OcrdPageResult(input_pcgts[0])
+
+    def process_page_file(self, *input_files: Optional[OcrdFileType]) -> None:
+        input_file = input_files[0]
+        assert input_file
+        assert input_file.local_filename
+        if self.parameter['copy_files'] and input_file.mimetype != MIMETYPE_PAGE:
+            # we need to mimic the actual copying in addition to the PAGE boilerplate
             file_id = make_file_id(input_file, self.output_file_grp)
             ext = MIME_TO_EXT.get(input_file.mimetype, '')
             local_filename = join(self.output_file_grp, file_id + ext)
-            pcgts = page_from_file(self.workspace.download_file(input_file))
-            pcgts.set_pcGtsId(file_id)
-            self.add_metadata(pcgts)
-            if input_file.mimetype == MIMETYPE_PAGE:
-                LOG.info("cp %s %s # %s -> %s", input_file.url, local_filename, input_file.ID, file_id)
-                # Source file is PAGE-XML: Write out in-memory PcGtsType
-                self.workspace.add_file(
+            self.logger.info("cp %s %s # %s -> %s", input_file.url, local_filename, input_file.ID, file_id)
+            with open(input_file.local_filename, 'rb') as f:
+                output_file = self.workspace.add_file(
                     file_id=file_id,
                     file_grp=self.output_file_grp,
                     page_id=input_file.pageId,
                     mimetype=input_file.mimetype,
                     local_filename=local_filename,
-                    content=to_xml(pcgts).encode('utf-8'))
+                    content=f.read(),
+                )
+            file_id = file_id + '_PAGE'
+            pcgts = page_from_file(output_file)
+            assert isinstance(pcgts, OcrdPage)
+            pcgts = self.process_page_pcgts(pcgts).pcgts
+            pcgts.set_pcGtsId(file_id)
+            self.add_metadata(pcgts)
+            self.logger.info("Add PAGE-XML %s generated for %s", file_id, output_file)
+            self.workspace.add_file(file_id=file_id,
+                                    file_grp=self.output_file_grp,
+                                    page_id=input_file.pageId,
+                                    local_filename=join(self.output_file_grp, file_id + '.xml'),
+                                    mimetype=MIMETYPE_PAGE,
+                                    content=to_xml(pcgts),
+            )
+        else:
+            if self.parameter['copy_files']:
+                self.logger.info("Not copying %s because it is a PAGE-XML file, which gets identity-transformed", input_file.local_filename)
             else:
-                # Source file is not PAGE-XML: Copy byte-by-byte unless copy_files is False
-                if not copy_files:
-                    LOG.info("Not copying %s because it is not a PAGE-XML file and copy_files was false" % input_file.local_filename)
-                else:
-                    LOG.info("cp %s %s # %s -> %s", input_file.url, local_filename, input_file.ID, file_id)
-                    with open(input_file.local_filename, 'rb') as f:
-                        content = f.read()
-                        self.workspace.add_file(
-                            ID=file_id,
-                            file_grp=self.output_file_grp,
-                            pageId=input_file.pageId,
-                            mimetype=input_file.mimetype,
-                            local_filename=local_filename,
-                            content=content)
-                if input_file.mimetype.startswith('image/'):
-                    # write out the PAGE-XML representation for this image
-                    page_file_id = file_id + '_PAGE'
-                    pcgts.set_pcGtsId(page_file_id)
-                    pcgts.get_Page().set_imageFilename(local_filename if copy_files else input_file.local_filename)
-                    page_filename = join(self.output_file_grp, file_id + '.xml')
-                    LOG.info("Add PAGE-XML %s generated for %s at %s", page_file_id, file_id, page_filename)
-                    self.workspace.add_file(
-                        file_id=page_file_id,
-                        file_grp=self.output_file_grp,
-                        page_id=input_file.pageId,
-                        mimetype=MIMETYPE_PAGE,
-                        local_filename=page_filename,
-                        content=to_xml(pcgts).encode('utf-8'))
+                self.logger.info("Not copying %s because it is not a PAGE-XML file and copy_files was false", input_file.local_filename)
+            # we can rely on base implementation verbatim
+            super().process_page_file(input_file)
 
+    @property
+    def metadata_filename(self):
+        return 'processor/builtin/dummy/ocrd-tool.json'
 
-    def __init__(self, *args, **kwargs):
-        kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-dummy']
-        kwargs['version'] = '0.0.3'
-        super(DummyProcessor, self).__init__(*args, **kwargs)
+    @property
+    def executable(self):
+        return 'ocrd-dummy'
 
 @click.command()
 @ocrd_cli_options
diff --git a/src/ocrd/processor/helpers.py b/src/ocrd/processor/helpers.py
index f5b6010636..757f7ac045 100644
--- a/src/ocrd/processor/helpers.py
+++ b/src/ocrd/processor/helpers.py
@@ -1,21 +1,19 @@
 """
 Helper methods for running and documenting processors
 """
-from os import chdir, getcwd
 from time import perf_counter, process_time
+from os import times
 from functools import lru_cache
 import json
 import inspect
 from subprocess import run
-from typing import List
+from typing import List, Optional
 
-from click import wrap_text
-from ocrd.workspace import Workspace
+from ..workspace import Workspace
 from ocrd_utils import freeze_args, getLogger, config, setOverrideLogLevel, getLevelName, sparkline
 
 
 __all__ = [
-    'generate_processor_help',
     'run_cli',
     'run_processor'
 ]
@@ -39,10 +37,7 @@ def run_processor(
         log_level=None,
         input_file_grp=None,
         output_file_grp=None,
-        show_resource=None,
-        list_resources=False,
         parameter=None,
-        parameter_override=None,
         working_dir=None,
         mets_server_url=None,
         instance_caching=False
@@ -83,9 +78,8 @@ def run_processor(
     log = getLogger('ocrd.processor.helpers.run_processor')
     log.debug("Running processor %s", processorClass)
 
-    old_cwd = getcwd()
     processor = get_processor(
-        processor_class=processorClass,
+        processorClass,
         parameter=parameter,
         workspace=None,
         page_id=page_id,
@@ -93,21 +87,20 @@ def run_processor(
         output_file_grp=output_file_grp,
         instance_caching=instance_caching
     )
-    processor.workspace = workspace
-    chdir(processor.workspace.directory)
 
     ocrd_tool = processor.ocrd_tool
     name = '%s v%s' % (ocrd_tool['executable'], processor.version)
-    otherrole = ocrd_tool['steps'][0]
+    otherrole = ocrd_tool.get('steps', [''])[0]
     logProfile = getLogger('ocrd.process.profile')
     log.debug("Processor instance %s (%s doing %s)", processor, name, otherrole)
     t0_wall = perf_counter()
     t0_cpu = process_time()
+    t0_os = times()
     if any(x in config.OCRD_PROFILE for x in ['RSS', 'PSS']):
         backend = 'psutil_pss' if 'PSS' in config.OCRD_PROFILE else 'psutil'
-        from memory_profiler import memory_usage
+        from memory_profiler import memory_usage # pylint: disable=import-outside-toplevel
         try:
-            mem_usage = memory_usage(proc=processor.process,
+            mem_usage = memory_usage(proc=(processor.process_workspace, [workspace], {}),
                                      # only run process once
                                      max_iterations=1,
                                      interval=.1, timeout=None, timestamps=True,
@@ -118,8 +111,6 @@ def run_processor(
         except Exception as err:
             log.exception("Failure in processor '%s'" % ocrd_tool['executable'])
             raise err
-        finally:
-            chdir(old_cwd)
         mem_usage_values = [mem for mem, _ in mem_usage]
         mem_output = 'memory consumption: '
         mem_output += sparkline(mem_usage_values)
@@ -127,16 +118,20 @@ def run_processor(
         logProfile.info(mem_output)
     else:
         try:
-            processor.process()
+            processor.process_workspace(workspace)
         except Exception as err:
             log.exception("Failure in processor '%s'" % ocrd_tool['executable'])
             raise err
-        finally:
-            chdir(old_cwd)
 
     t1_wall = perf_counter() - t0_wall
     t1_cpu = process_time() - t0_cpu
-    logProfile.info("Executing processor '%s' took %fs (wall) %fs (CPU)( [--input-file-grp='%s' --output-file-grp='%s' --parameter='%s' --page-id='%s']" % (
+    t1_os = times()
+    # add CPU time from child processes (page worker etc)
+    t1_cpu += t1_os.children_user - t0_os.children_user
+    t1_cpu += t1_os.children_system - t0_os.children_system
+    logProfile.info(
+        "Executing processor '%s' took %fs (wall) %fs (CPU)( "
+        "[--input-file-grp='%s' --output-file-grp='%s' --parameter='%s' --page-id='%s']",
         ocrd_tool['executable'],
         t1_wall,
         t1_cpu,
@@ -144,7 +139,7 @@ def run_processor(
         processor.output_file_grp or '',
         json.dumps(processor.parameter) or '',
         processor.page_id or ''
-    ))
+    )
     workspace.mets.add_agent(
         name=name,
         _type='OTHER',
@@ -167,6 +162,7 @@ def run_cli(
         workspace=None,
         page_id=None,
         overwrite=None,
+        debug=None,
         log_level=None,
         log_filename=None,
         input_file_grp=None,
@@ -209,6 +205,8 @@ def run_cli(
         args += ['--parameter', parameter]
     if overwrite:
         args += ['--overwrite']
+    if debug:
+        args += ['--debug']
     if mets_server_url:
         args += ['--mets-server-url', mets_server_url]
     log = getLogger('ocrd.processor.helpers.run_cli')
@@ -216,151 +214,15 @@ def run_cli(
     if not log_filename:
         result = run(args, check=False)
     else:
-        with open(log_filename, 'a') as file_desc:
+        with open(log_filename, 'a', encoding='utf-8') as file_desc:
             result = run(args, check=False, stdout=file_desc, stderr=file_desc)
     return result.returncode
 
 
-def generate_processor_help(ocrd_tool, processor_instance=None, subcommand=None):
-    """Generate a string describing the full CLI of this processor including params.
-
-    Args:
-         ocrd_tool (dict): this processor's ``tools`` section of the module's ``ocrd-tool.json``
-         processor_instance (object, optional): the processor implementation
-             (for adding any module/class/function docstrings)
-        subcommand (string): 'worker' or 'server'
-    """
-    doc_help = ''
-    if processor_instance:
-        module = inspect.getmodule(processor_instance)
-        if module and module.__doc__:
-            doc_help += '\n' + inspect.cleandoc(module.__doc__) + '\n'
-        if processor_instance.__doc__:
-            doc_help += '\n' + inspect.cleandoc(processor_instance.__doc__) + '\n'
-        if processor_instance.process.__doc__:
-            doc_help += '\n' + inspect.cleandoc(processor_instance.process.__doc__) + '\n'
-        if doc_help:
-            doc_help = '\n\n' + wrap_text(doc_help, width=72,
-                                          initial_indent='  > ',
-                                          subsequent_indent='  > ',
-                                          preserve_paragraphs=True)
-    subcommands = '''\
-    worker      Start a processing worker rather than do local processing
-    server      Start a processor server rather than do local processing
-'''
-
-    processing_worker_options = '''\
-  --queue                         The RabbitMQ server address in format
-                                  "amqp://{user}:{pass}@{host}:{port}/{vhost}"
-                                  [amqp://admin:admin@localhost:5672]
-  --database                      The MongoDB server address in format
-                                  "mongodb://{host}:{port}"
-                                  [mongodb://localhost:27018]
-  --log-filename                  Filename to redirect STDOUT/STDERR to,
-                                  if specified.
-'''
-
-    processing_server_options = '''\
-  --address                       The Processor server address in format
-                                  "{host}:{port}"
-  --database                      The MongoDB server address in format
-                                  "mongodb://{host}:{port}"
-                                  [mongodb://localhost:27018]
-'''
-
-    processing_options = '''\
-  -m, --mets URL-PATH             URL or file path of METS to process [./mets.xml]
-  -w, --working-dir PATH          Working directory of local workspace [dirname(URL-PATH)]
-  -I, --input-file-grp USE        File group(s) used as input
-  -O, --output-file-grp USE       File group(s) used as output
-  -g, --page-id ID                Physical page ID(s) to process instead of full document []
-  --overwrite                     Remove existing output pages/images
-                                  (with "--page-id", remove only those)
-  --profile                       Enable profiling
-  --profile-file PROF-PATH        Write cProfile stats to PROF-PATH. Implies "--profile"
-  -p, --parameter JSON-PATH       Parameters, either verbatim JSON string
-                                  or JSON file path
-  -P, --param-override KEY VAL    Override a single JSON object key-value pair,
-                                  taking precedence over --parameter
-  -U, --mets-server-url URL       URL of a METS Server for parallel incremental access to METS
-                                  If URL starts with http:// start an HTTP server there,
-                                  otherwise URL is a path to an on-demand-created unix socket
-  -l, --log-level [OFF|ERROR|WARN|INFO|DEBUG|TRACE]
-                                  Override log level globally [INFO]
-'''
-
-    information_options = '''\
-  -C, --show-resource RESNAME     Dump the content of processor resource RESNAME
-  -L, --list-resources            List names of processor resources
-  -J, --dump-json                 Dump tool description as JSON
-  -D, --dump-module-dir           Show the 'module' resource location path for this processor
-  -h, --help                      Show this message
-  -V, --version                   Show version
-'''
-
-    parameter_help = ''
-    if 'parameters' not in ocrd_tool or not ocrd_tool['parameters']:
-        parameter_help = '  NONE\n'
-    else:
-        def wrap(s):
-            return wrap_text(s, initial_indent=' '*3,
-                             subsequent_indent=' '*4,
-                             width=72, preserve_paragraphs=True)
-        for param_name, param in ocrd_tool['parameters'].items():
-            parameter_help += wrap('"%s" [%s%s]' % (
-                param_name,
-                param['type'],
-                ' - REQUIRED' if 'required' in param and param['required'] else
-                ' - %s' % json.dumps(param['default']) if 'default' in param else ''))
-            parameter_help += '\n ' + wrap(param['description'])
-            if 'enum' in param:
-                parameter_help += '\n ' + wrap('Possible values: %s' % json.dumps(param['enum']))
-            parameter_help += "\n"
-
-    if not subcommand:
-        return f'''\
-Usage: {ocrd_tool['executable']} [worker|server] [OPTIONS]
 
-  {ocrd_tool['description']}{doc_help}
-
-Subcommands:
-{subcommands}
-Options for processing:
-{processing_options}
-Options for information:
-{information_options}
-Parameters:
-{parameter_help}
-'''
-    elif subcommand == 'worker':
-        return f'''\
-Usage: {ocrd_tool['executable']} worker [OPTIONS]
-
-  Run {ocrd_tool['executable']} as a processing worker.
-
-  {ocrd_tool['description']}{doc_help}
-
-Options:
-{processing_worker_options}
-'''
-    elif subcommand == 'server':
-        return f'''\
-Usage: {ocrd_tool['executable']} server [OPTIONS]
-
-  Run {ocrd_tool['executable']} as a processor sever.
-
-  {ocrd_tool['description']}{doc_help}
-
-Options:
-{processing_server_options}
-'''
-    else:
-        pass
-
-
-# Taken from https://github.com/OCR-D/core/pull/884
-@freeze_args
-@lru_cache(maxsize=config.OCRD_MAX_PROCESSOR_CACHE)
+# not decorated here but at runtime (on first use)
+#@freeze_args
+#@lru_cache(maxsize=config.OCRD_MAX_PROCESSOR_CACHE)
 def get_cached_processor(parameter: dict, processor_class):
     """
     Call this function to get back an instance of a processor.
@@ -373,36 +235,42 @@ def get_cached_processor(parameter: dict, processor_class):
         Otherwise, an instance of the `:py:class:~ocrd.Processor` is returned.
     """
     if processor_class:
-        dict_params = dict(parameter) if parameter else None
-        return processor_class(workspace=None, parameter=dict_params)
+        processor = processor_class(None, parameter=dict(parameter))
+        return processor
     return None
 
-
 def get_processor(
         processor_class,
-        parameter: dict,
-        workspace: Workspace = None,
-        page_id: str = None,
-        input_file_grp: List[str] = None,
-        output_file_grp: List[str] = None,
+        parameter: Optional[dict] = None,
+        workspace: Optional[Workspace] = None,
+        page_id: Optional[str] = None,
+        input_file_grp: Optional[List[str]] = None,
+        output_file_grp: Optional[List[str]] = None,
         instance_caching: bool = False,
 ):
     if processor_class:
+        if parameter is None:
+            parameter = {}
         if instance_caching:
-            cached_processor = get_cached_processor(
-                parameter=parameter,
-                processor_class=processor_class
-            )
-            cached_processor.workspace = workspace
-            cached_processor.page_id = page_id
-            cached_processor.input_file_grp = input_file_grp
-            cached_processor.output_file_grp = output_file_grp
-            return cached_processor
-        return processor_class(
-            workspace=workspace,
-            page_id=page_id,
-            input_file_grp=input_file_grp,
-            output_file_grp=output_file_grp,
-            parameter=parameter
-        )
+            global get_cached_processor
+            if not hasattr(get_cached_processor, '__wrapped__'):
+                # first call: wrap
+                if processor_class.max_instances < 0:
+                    maxsize = config.OCRD_MAX_PROCESSOR_CACHE
+                else:
+                    maxsize = min(config.OCRD_MAX_PROCESSOR_CACHE, processor_class.max_instances)
+                # wrapping in call cache
+                # wrapping dict into frozendict (from https://github.com/OCR-D/core/pull/884)
+                get_cached_processor = freeze_args(lru_cache(maxsize=maxsize)(get_cached_processor))
+            processor = get_cached_processor(parameter, processor_class)
+        else:
+            # avoid passing workspace already (deprecated chdir behaviour)
+            processor = processor_class(None, parameter=parameter)
+        assert processor
+        # set current processing parameters
+        processor.workspace = workspace
+        processor.page_id = page_id
+        processor.input_file_grp = input_file_grp
+        processor.output_file_grp = output_file_grp
+        return processor
     raise ValueError("Processor class is not known")
diff --git a/src/ocrd/processor/ocrd_page_result.py b/src/ocrd/processor/ocrd_page_result.py
new file mode 100644
index 0000000000..5f21a72f57
--- /dev/null
+++ b/src/ocrd/processor/ocrd_page_result.py
@@ -0,0 +1,17 @@
+from dataclasses import dataclass, field
+from typing import List, Union, Optional
+from ocrd_models.ocrd_page import OcrdPage
+from PIL.Image import Image
+
+from ocrd_models.ocrd_page_generateds import AlternativeImageType, PageType
+
+@dataclass
+class OcrdPageResultImage():
+    pil : Image
+    file_id_suffix : str
+    alternative_image : Optional[Union[AlternativeImageType, PageType]]
+
+@dataclass
+class OcrdPageResult():
+    pcgts : OcrdPage
+    images : List[OcrdPageResultImage] = field(default_factory=list)
diff --git a/src/ocrd/resolver.py b/src/ocrd/resolver.py
index 124d006927..7ed58d4d4d 100644
--- a/src/ocrd/resolver.py
+++ b/src/ocrd/resolver.py
@@ -18,7 +18,6 @@
 )
 from ocrd.workspace import Workspace
 from ocrd_models import OcrdMets
-from ocrd_models.constants import NAMESPACES as NS
 from ocrd_models.utils import handle_oai_response
 
 class Resolver():
@@ -310,5 +309,3 @@ def resolve_mets_arguments(self, directory, mets_url, mets_basename=DEFAULT_METS
                         raise ValueError("--mets '%s' has a directory part inconsistent with --directory '%s'" % (mets_url, directory))
 
         return str(Path(directory).resolve()), str(mets_url), str(mets_basename), mets_server_url
-
-
diff --git a/src/ocrd/resource_manager.py b/src/ocrd/resource_manager.py
index 44bbd081bc..3c4c603060 100644
--- a/src/ocrd/resource_manager.py
+++ b/src/ocrd/resource_manager.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 from os.path import join
-from os import environ, listdir, makedirs, getcwd, path, unlink
+from os import environ, listdir, getcwd, unlink
 from shutil import copytree, rmtree, copy
 from fnmatch import filter as apply_glob
 from datetime import datetime
@@ -13,14 +13,18 @@
 from gdown.download import get_url_from_gdrive_confirmation
 from yaml import safe_load, safe_dump
 
+# pylint: disable=wrong-import-position
+
 # https://github.com/OCR-D/core/issues/867
 # https://stackoverflow.com/questions/50900727/skip-converting-entities-while-loading-a-yaml-string-using-pyyaml
 import yaml.constructor
-yaml.constructor.SafeConstructor.yaml_constructors[u'tag:yaml.org,2002:timestamp'] = \
-    yaml.constructor.SafeConstructor.yaml_constructors[u'tag:yaml.org,2002:str']
+yaml.constructor.SafeConstructor.yaml_constructors['tag:yaml.org,2002:timestamp'] = \
+    yaml.constructor.SafeConstructor.yaml_constructors['tag:yaml.org,2002:str']
+
+# pylint: enable=wrong-import-position
 
 from ocrd_validators import OcrdResourceListValidator
-from ocrd_utils import getLogger, directory_size, get_moduledir, EXT_TO_MIME, nth_url_segment, guess_media_type, config
+from ocrd_utils import getLogger, directory_size, get_moduledir, guess_media_type, config
 from ocrd_utils.os import get_processor_resource_types, list_all_resources, pushd_popd, get_ocrd_tool_json
 from .constants import RESOURCE_LIST_FILENAME, RESOURCE_USER_LIST_COMMENT
 
@@ -248,7 +252,7 @@ def _download_impl(url, filename, progress_cb=None, size=None):
                         if "Content-Disposition" not in r.headers:
                             url = get_url_from_gdrive_confirmation(r.text)
                 except RuntimeError as e:
-                    log.warning("Cannot unwrap Google Drive URL: ", e)
+                    log.warning("Cannot unwrap Google Drive URL: %s", e)
             with open(filename, 'wb') as f:
                 with requests.get(url, stream=True) as r:
                     r.raise_for_status()
diff --git a/src/ocrd/workspace.py b/src/ocrd/workspace.py
index ff856011be..3cbc58c78c 100644
--- a/src/ocrd/workspace.py
+++ b/src/ocrd/workspace.py
@@ -1,7 +1,7 @@
 import io
 from os import makedirs, unlink, listdir, path
 from pathlib import Path
-from shutil import move, copyfileobj
+from shutil import copyfileobj
 from re import sub
 from tempfile import NamedTemporaryFile
 from contextlib import contextmanager
@@ -19,11 +19,13 @@
 from ocrd_modelfactory import exif_from_filename, page_from_file
 from ocrd_utils import (
     atomic_write,
+    config,
     getLogger,
     image_from_polygon,
     coordinates_of_segment,
     adjust_canvas_to_rotation,
     adjust_canvas_to_transposition,
+    scale_coordinates,
     shift_coordinates,
     rotate_coordinates,
     transform_coordinates,
@@ -41,7 +43,7 @@
     MIME_TO_EXT,
     MIME_TO_PIL,
     MIMETYPE_PAGE,
-    REGEX_PREFIX
+    REGEX_PREFIX,
 )
 
 from .workspace_backup import WorkspaceBackupManager
@@ -74,7 +76,6 @@ class Workspace():
             `OcrdMets` of this workspace. If `None`, then the METS will be read from and written to
             the filesystem directly.
         baseurl (string, None) : Base URL to prefix to relative URL.
-        overwrite_mode (boolean, False) : Whether to force add operations on this workspace globally
     """
 
     def __init__(
@@ -90,14 +91,13 @@ def __init__(
         self.resolver = resolver
         self.directory = directory
         self.mets_target = str(Path(directory, mets_basename))
-        self.overwrite_mode = False
         self.is_remote = bool(mets_server_url)
         if mets is None:
             if self.is_remote:
                 mets = ClientSideOcrdMets(mets_server_url, self.directory)
                 if mets.workspace_path != self.directory:
-                    raise ValueError(f"METS server {mets_server_url} workspace directory {mets.workspace_path} differs "
-                            f"from local workspace directory {self.directory}. These are not the same workspaces.")
+                    raise ValueError(f"METS server {mets_server_url} workspace directory '{mets.workspace_path}' differs "
+                            f"from local workspace directory '{self.directory}'. These are not the same workspaces.")
             else:
                 mets = OcrdMets(filename=self.mets_target)
         self.mets = mets
@@ -111,7 +111,7 @@ def __init__(
 
     def __repr__(self):
         return 'Workspace[remote=%s, directory=%s, baseurl=%s, file_groups=%s, files=%s]' % (
-            not not self.is_remote,
+            self.is_remote,
             self.directory,
             self.baseurl,
             self.mets.file_groups,
@@ -122,7 +122,10 @@ def reload_mets(self):
         """
         Reload METS from the filesystem.
         """
-        self.mets = OcrdMets(filename=self.mets_target)
+        if self.is_remote:
+            self.mets.reload()
+        else:
+            self.mets = OcrdMets(filename=self.mets_target)
 
     @deprecated_alias(pageId="page_id")
     @deprecated_alias(ID="file_id")
@@ -242,8 +245,6 @@ def remove_file(self, file_id, force=False, keep_file=False, page_recursive=Fals
         """
         log = getLogger('ocrd.workspace.remove_file')
         log.debug('Deleting mets:file %s', file_id)
-        if self.overwrite_mode:
-            force = True
         if isinstance(file_id, OcrdFile):
             file_id = file_id.ID
         try:
@@ -295,9 +296,6 @@ def remove_file_group(self, USE, recursive=False, force=False, keep_files=False,
             page_same_group (boolean): Remove only images in the same file group as the PAGE-XML.
                 Has no effect unless ``page_recursive`` is `True`.
         """
-        if not force and self.overwrite_mode:
-            force = True
-
         if (not USE.startswith(REGEX_PREFIX)) and (USE not in self.mets.file_groups) and (not force):
             raise Exception("No such fileGrp: %s" % USE)
 
@@ -418,8 +416,6 @@ def add_file(self, file_grp, content=None, **kwargs) -> Union[OcrdFile, ClientSi
             raise ValueError("workspace.add_file must be passed a 'page_id' kwarg, even if it is None.")
         if content is not None and not kwargs.get('local_filename'):
             raise Exception("'content' was set but no 'local_filename'")
-        if self.overwrite_mode:
-            kwargs['force'] = True
 
         with pushd_popd(self.directory):
             if kwargs.get('local_filename'):
@@ -432,6 +428,8 @@ def add_file(self, file_grp, content=None, **kwargs) -> Union[OcrdFile, ClientSi
             kwargs["pageId"] = kwargs.pop("page_id")
             if "file_id" in kwargs:
                 kwargs["ID"] = kwargs.pop("file_id")
+            if config.OCRD_EXISTING_OUTPUT == 'OVERWRITE':
+                kwargs["force"] = True
 
             ret = self.mets.add_file(file_grp, **kwargs)
 
@@ -613,7 +611,6 @@ def image_from_page(self, page, page_id,
         Cropping uses a polygon mask (not just the bounding box rectangle).
         Areas outside the polygon will be filled according to ``fill``:
 
-        \b
         - if `"background"` (the default),
           then fill with the median color of the image;
         - else if `"none"`, then avoid masking polygons where possible
@@ -635,6 +632,7 @@ def image_from_page(self, page, page_id,
                    i.e. after cropping to the page's border / bounding box (if any)
                    and deskewing with the page's orientation angle (if any)
                - `"angle"`: the rotation/reflection angle applied to the image so far,
+               - `"DPI"`: the pixel density of the original image,
                - `"features"`: the `AlternativeImage` `@comments` for the image, i.e.
                  names of all applied operations that lead up to this result,
              * an :py:class:`ocrd_models.ocrd_exif.OcrdExif` instance associated with
@@ -655,7 +653,7 @@ def image_from_page(self, page, page_id,
         log = getLogger('ocrd.workspace.image_from_page')
         page_image_info = self.resolve_image_exif(page.imageFilename)
         page_image = self._resolve_image_as_pil(page.imageFilename)
-        page_coords = dict()
+        page_coords = {}
         # use identity as initial affine coordinate transform:
         page_coords['transform'] = np.eye(3)
         # interim bbox (updated with each change to the transform):
@@ -676,6 +674,13 @@ def image_from_page(self, page, page_id,
         page_coords['angle'] = 0 # nothing applied yet (depends on filters)
         log.debug("page '%s' has %s orientation=%d skew=%.2f",
                   page_id, "border," if border else "", orientation, skew)
+        if page_image_info.resolution != 1:
+            dpi = page_image_info.resolution
+            if page_image_info.resolutionUnit == 'cm':
+                dpi = round(dpi * 2.54)
+            dpi = int(dpi)
+            log.debug("page '%s' images will use %d DPI from image meta-data", page_id, dpi)
+            page_coords['DPI'] = dpi
 
         # initialize AlternativeImage@comments classes as empty:
         page_coords['features'] = ''
@@ -794,6 +799,11 @@ def image_from_page(self, page, page_id,
                             'filter="%s" in page "%s"' % (
                                 feature_filter, page_id))
         page_image.format = 'PNG' # workaround for tesserocr#194
+        # ensure DPI will be set in image meta-data again
+        if 'DPI' in page_coords:
+            dpi = page_coords['DPI']
+            if 'dpi' not in page_image.info:
+                page_image.info['dpi'] = (dpi, dpi)
         return page_image, page_coords, page_image_info
 
     def image_from_segment(self, segment, parent_image, parent_coords,
@@ -814,6 +824,7 @@ def image_from_segment(self, segment, parent_image, parent_coords,
                  converts from absolute coordinates to those relative to the image,
                  i.e. after applying all operations (starting with the original image)
                - `"angle"`: the rotation/reflection angle applied to the image so far,
+               - `"DPI"`: the pixel density of the parent image,
                - `"features"`: the ``AlternativeImage/@comments`` for the image, i.e.
                  names of all operations that lead up to this result, and
         Keyword Args:
@@ -843,7 +854,6 @@ def image_from_segment(self, segment, parent_image, parent_coords,
         Cropping uses a polygon mask (not just the bounding box rectangle).
         Areas outside the polygon will be filled according to `fill`:
 
-        \b
         - if `"background"` (the default),
           then fill with the median color of the image;
         - else if `"none"`, then avoid masking polygons where possible
@@ -879,6 +889,7 @@ def image_from_segment(self, segment, parent_image, parent_coords,
                    the segment's bounding box, and deskewing with the segment's
                    orientation angle (if any)
                - `"angle"`: the rotation/reflection angle applied to the image so far,
+               - `"DPI"`: the pixel density of this image,
                - `"features"`: the ``AlternativeImage/@comments`` for the image, i.e.
                  names of all applied operations that lead up to this result.
 
@@ -941,6 +952,8 @@ def image_from_segment(self, segment, parent_image, parent_coords,
             orientation = 0
             skew = 0
         segment_coords['angle'] = parent_coords['angle'] # nothing applied yet (depends on filters)
+        if 'DPI' in parent_coords:
+            segment_coords['DPI'] = parent_coords['DPI'] # not rescaled yet
 
         # initialize AlternativeImage@comments classes from parent, except
         # for those operations that can apply on multiple hierarchy levels:
@@ -1048,15 +1061,21 @@ def image_from_segment(self, segment, parent_image, parent_coords,
                             'filter="%s" in segment "%s"' % (
                                 feature_filter, segment.id))
         segment_image.format = 'PNG' # workaround for tesserocr#194
+        # ensure DPI will be set in image meta-data again
+        if 'DPI' in segment_coords:
+            dpi = segment_coords['DPI']
+            if 'dpi' not in segment_image.info:
+                segment_image.info['dpi'] = (dpi, dpi)
         return segment_image, segment_coords
 
     # pylint: disable=redefined-builtin
-    def save_image_file(self, image,
-                        file_id,
-                        file_grp,
-                        page_id=None,
-                        mimetype='image/png',
-                        force=False):
+    def save_image_file(self, image : Image.Image,
+                        file_id : str,
+                        file_grp : str,
+                        file_path : Optional[str] = None,
+                        page_id : Optional[str] = None,
+                        mimetype : str = 'image/png',
+                        force : bool = False) -> str:
         """Store an image in the filesystem and reference it as new file in the METS.
 
         Args:
@@ -1064,22 +1083,26 @@ def save_image_file(self, image,
             file_id (string): `@ID` of the METS `file` to use
             file_grp (string): `@USE` of the METS `fileGrp` to use
         Keyword Args:
+            file_path (string): `@href` of the METS `file/FLocat` to use.
             page_id (string): `@ID` in the METS physical `structMap` to use
             mimetype (string): MIME type of the image format to serialize as
             force (boolean): whether to replace any existing `file` with that `@ID`
 
         Serialize the image into the filesystem, and add a `file` for it in the METS.
-        Use a filename extension based on ``mimetype``.
+        Use ``file_grp`` as directory and ``file_id`` concatenated with extension
+        based on ``mimetype`` as file name, unless directly passing ``file_path``.
 
         Returns:
             The (absolute) path of the created file.
         """
         log = getLogger('ocrd.workspace.save_image_file')
-        if self.overwrite_mode:
-            force = True
+        saveargs = {}
+        if 'dpi' in image.info:
+            saveargs['dpi'] = image.info['dpi']
         image_bytes = io.BytesIO()
-        image.save(image_bytes, format=MIME_TO_PIL[mimetype])
-        file_path = str(Path(file_grp, '%s%s' % (file_id, MIME_TO_EXT[mimetype])))
+        image.save(image_bytes, format=MIME_TO_PIL[mimetype], **saveargs)
+        if file_path is None:
+            file_path = str(Path(file_grp, '%s%s' % (file_id, MIME_TO_EXT[mimetype])))
         out = self.add_file(
             file_grp,
             file_id=file_id,
@@ -1150,9 +1173,9 @@ def _reflect(log, name, orientation, segment_image, segment_coords, segment_xywh
     # Transpose in affine coordinate transform:
     # (consistent with image transposition or AlternativeImage below)
     transposition = {
-        90: Image.ROTATE_90,
-        180: Image.ROTATE_180,
-        270: Image.ROTATE_270
+        90: Image.Transpose.ROTATE_90,
+        180: Image.Transpose.ROTATE_180,
+        270: Image.Transpose.ROTATE_270
     }.get(orientation) # no default
     segment_coords['transform'] = transpose_coordinates(
         segment_coords['transform'], transposition,
@@ -1220,5 +1243,5 @@ def _scale(log, name, factor, segment_image, segment_coords, segment_xywh, **kwa
         segment_image = segment_image.resize((int(segment_image.width * factor),
                                               int(segment_image.height * factor)),
                                              # slowest, but highest quality:
-                                             Image.BICUBIC)
+                                             Image.Resampling.BICUBIC)
     return segment_image, segment_coords, segment_xywh
diff --git a/src/ocrd/workspace_backup.py b/src/ocrd/workspace_backup.py
index 6cc3f1530d..87ee884bd1 100644
--- a/src/ocrd/workspace_backup.py
+++ b/src/ocrd/workspace_backup.py
@@ -1,6 +1,6 @@
 from datetime import datetime
 from os import makedirs
-from os.path import join, basename, getsize, abspath
+from os.path import join, basename, getsize
 from glob import glob
 from shutil import copy
 import hashlib
diff --git a/src/ocrd_modelfactory/__init__.py b/src/ocrd_modelfactory/__init__.py
index 7afc5b1765..828949fe96 100644
--- a/src/ocrd_modelfactory/__init__.py
+++ b/src/ocrd_modelfactory/__init__.py
@@ -14,9 +14,10 @@
 from ocrd_utils import VERSION, MIMETYPE_PAGE, guess_media_type
 from ocrd_models import OcrdExif, OcrdFile, ClientSideOcrdFile
 from ocrd_models.ocrd_page import (
-    PcGtsType, PageType, MetadataType,
+    OcrdPage, PcGtsType, PageType, MetadataType,
     parse, parseEtree
 )
+from ocrd_utils.deprecate import deprecation_warning
 
 __all__ = [
     'exif_from_filename',
@@ -39,7 +40,7 @@ def exif_from_filename(image_filename):
         ocrd_exif = OcrdExif(pil_img)
     return ocrd_exif
 
-def page_from_image(input_file, with_tree=False):
+def page_from_image(input_file : Union[OcrdFile, ClientSideOcrdFile], **kwargs) -> OcrdPage:
     """
     Create :py:class:`~ocrd_models.ocrd_page.OcrdPage`
     from an :py:class:`~ocrd_models.ocrd_file.OcrdFile`
@@ -48,10 +49,9 @@ def page_from_image(input_file, with_tree=False):
     Arguments:
         input_file (:py:class:`~ocrd_models.ocrd_file.OcrdFile`): file to open \
             and produce a PAGE DOM for
-    Keyword arguments:
-        with_tree (boolean): whether to return XML node tree, element-node mapping \
-            and reverse mapping, too (cf. :py:func:`ocrd_models.ocrd_page.parseEtree`)
     """
+    if 'with_etree' in kwargs:
+        deprecation_warning('kwarg "with_etree" is obsolete now, we always return OcrdPage including etree')
     if not input_file.local_filename:
         raise ValueError("input_file must have 'local_filename' property")
     if not Path(input_file.local_filename).exists():
@@ -72,14 +72,12 @@ def page_from_image(input_file, with_tree=False):
         ),
         pcGtsId=input_file.ID
     )
-    if not with_tree:
-        return pcgts
-    mapping = dict()
-    etree = pcgts.to_etree(mapping_=mapping)
+    mapping = {}
+    etree : ET._Element = pcgts.to_etree(mapping_=mapping)
     revmap = dict(((node, element) for element, node in mapping.items()))
-    return pcgts, etree, mapping, revmap
+    return OcrdPage(pcgts, etree, mapping, revmap)
 
-def page_from_file(input_file, with_tree=False) -> Union[PcGtsType, Tuple[PcGtsType, ET.Element, dict, dict]]:
+def page_from_file(input_file, **kwargs) -> OcrdPage:
     """
     Create :py:class:`~ocrd_models.ocrd_page.OcrdPage`
     from an :py:class:`~ocrd_models.ocrd_file.OcrdFile` or a file path
@@ -88,10 +86,9 @@ def page_from_file(input_file, with_tree=False) -> Union[PcGtsType, Tuple[PcGtsT
     Arguments:
         input_file (:py:class:`~ocrd_models.ocrd_file.OcrdFile` or `str`): file to open \
             and produce a PAGE DOM for
-    Keyword arguments:
-        with_tree (boolean): whether to return XML node tree, element-node mapping \
-            and reverse mapping, too (cf. :py:func:`ocrd_models.ocrd_page.parseEtree`)
     """
+    if 'with_etree' in kwargs:
+        deprecation_warning('kwarg "with_etree" is obsolete now, we always return OcrdPage including etree')
     if not isinstance(input_file, (OcrdFile, ClientSideOcrdFile)):
         mimetype = guess_media_type(input_file, application_xml=MIMETYPE_PAGE)
         input_file = OcrdFile(ET.Element("dummy"),
@@ -102,7 +99,7 @@ def page_from_file(input_file, with_tree=False) -> Union[PcGtsType, Tuple[PcGtsT
     if not Path(input_file.local_filename).exists():
         raise FileNotFoundError("File not found: '%s' (%s)" % (input_file.local_filename, input_file))
     if input_file.mimetype.startswith('image'):
-        return page_from_image(input_file, with_tree=with_tree)
+        return page_from_image(input_file)
     if input_file.mimetype == MIMETYPE_PAGE:
-        return (parseEtree if with_tree else parse)(input_file.local_filename, silence=True)
+        return OcrdPage(*parseEtree(input_file.local_filename, silence=True))
     raise ValueError("Unsupported mimetype '%s'" % input_file.mimetype)
diff --git a/src/ocrd_models/__init__.py b/src/ocrd_models/__init__.py
index a89ee1dec8..ff4e31798b 100644
--- a/src/ocrd_models/__init__.py
+++ b/src/ocrd_models/__init__.py
@@ -3,7 +3,8 @@
 """
 from .ocrd_agent import OcrdAgent, ClientSideOcrdAgent
 from .ocrd_exif import OcrdExif
-from .ocrd_file import OcrdFile, ClientSideOcrdFile
+from .ocrd_file import OcrdFile, ClientSideOcrdFile, OcrdFileType
 from .ocrd_mets import OcrdMets
+from .ocrd_page import OcrdPage, OcrdPageType
 from .ocrd_xml_base import OcrdXmlDocument
 from .report import ValidationReport
diff --git a/src/ocrd_models/constants.py b/src/ocrd_models/constants.py
index db6e51e3a2..a67bfecc13 100644
--- a/src/ocrd_models/constants.py
+++ b/src/ocrd_models/constants.py
@@ -44,7 +44,6 @@
     'ocrd': 'https://ocr-d.de',
 }
 
-# pylint: disable=bad-whitespace
 TAG_METS_AGENT            = '{%s}agent' % NAMESPACES['mets']
 TAG_METS_DIV              = '{%s}div' % NAMESPACES['mets']
 TAG_METS_FILE             = '{%s}file' % NAMESPACES['mets']
diff --git a/src/ocrd_models/ocrd_exif.py b/src/ocrd_models/ocrd_exif.py
index 406e60a85a..ab050bae59 100644
--- a/src/ocrd_models/ocrd_exif.py
+++ b/src/ocrd_models/ocrd_exif.py
@@ -21,6 +21,7 @@ class OcrdExif():
             * ``RGB`` for 24-bit truecolor,
             * ``I`` for 32-bit signed integer grayscale,
             * ``F`` for floating-point grayscale
+
           (see PIL concept **mode**)
         resolution (int): pixel density
         xResolution (int): pixel density
@@ -101,7 +102,7 @@ def to_xml(self):
         Serialize all properties as XML string.
         """
         ret = '<exif>'
-        for k in self.__dict__:
-            ret += '<%s>%s</%s>' % (k, self.__dict__[k], k)
+        for k, v in self.__dict__.items():
+            ret += f'<{k}>{v}</{k}>'
         ret += '</exif>'
         return ret
diff --git a/src/ocrd_models/ocrd_file.py b/src/ocrd_models/ocrd_file.py
index 2315a08ff3..91eac8d8e3 100644
--- a/src/ocrd_models/ocrd_file.py
+++ b/src/ocrd_models/ocrd_file.py
@@ -230,12 +230,12 @@ class ClientSideOcrdFile:
 
     def __init__(
         self,
-        el,
+        el, # pylint: disable=unused-argument
         mimetype: str = '',
         pageId: str = '',
         loctype: str ='OTHER',
         local_filename: Optional[str] = None,
-        mets : Any = None,
+        mets : Any = None, # pylint: disable=unused-argument
         url: str = '',
         ID: str = '',
         fileGrp: str = ''
@@ -266,3 +266,5 @@ def __str__(self):
             for k in ['fileGrp', 'ID', 'mimetype', 'url', 'local_filename']
         ])
         return '<ClientSideOcrdFile %s]/>' % (props)
+
+OcrdFileType = Union[OcrdFile, ClientSideOcrdFile]
diff --git a/src/ocrd_models/ocrd_mets.py b/src/ocrd_models/ocrd_mets.py
index d6da3e1cda..de068567e2 100644
--- a/src/ocrd_models/ocrd_mets.py
+++ b/src/ocrd_models/ocrd_mets.py
@@ -75,7 +75,7 @@ def empty_mets(now : Optional[str] = None, cache_flag : bool = False):
     def __init__(self, **kwargs) -> None:
         """
         """
-        super(OcrdMets, self).__init__(**kwargs)
+        super().__init__(**kwargs)
 
         # XXX If the environment variable OCRD_METS_CACHING is set to "true",
         # then enable caching, if "false", disable caching, overriding the
@@ -194,11 +194,11 @@ def unique_identifier(self, purl : str) -> None:
     @property
     def agents(self) -> List[OcrdAgent]:
         """
-        List all :py:class:`ocrd_models.ocrd_agent.OcrdAgent`s
+        List all :py:class:`ocrd_models.ocrd_agent.OcrdAgent` entries.
         """
         return [OcrdAgent(el_agent) for el_agent in self._tree.getroot().findall('mets:metsHdr/mets:agent', NS)]
 
-    def add_agent(self, *args, **kwargs) -> OcrdAgent:
+    def add_agent(self, **kwargs) -> OcrdAgent:
         """
         Add an :py:class:`ocrd_models.ocrd_agent.OcrdAgent` to the list of agents in the ``metsHdr``.
         """
@@ -213,12 +213,12 @@ def add_agent(self, *args, **kwargs) -> OcrdAgent:
             el_agent_last.addnext(el_agent)
         except StopIteration:
             el_metsHdr.insert(0, el_agent)
-        return OcrdAgent(el_agent, *args, **kwargs)
+        return OcrdAgent(el_agent, **kwargs)
 
     @property
     def file_groups(self) -> List[str]:
         """
-        List the `@USE` of all `mets:fileGrp` entries.
+        List the ``@USE`` of all ``mets:fileGrp`` entries.
         """
 
         # WARNING: Actually we cannot return strings in place of elements!
@@ -488,11 +488,12 @@ def add_file(self, fileGrp : str, mimetype : Optional[str] = None, url : Optiona
                         f"A file with ID=={ID} already exists {mets_file} but unrelated - cannot mitigate")
 
         # To get rid of Python's FutureWarning - checking if v is not None
-        kwargs = {k: v for k, v in locals().items() if
-                  k in ['url', 'ID', 'mimetype', 'pageId', 'local_filename'] and v is not None}
+        kwargs = {k: v for k, v in locals().items()
+                  if k in ['url', 'ID', 'mimetype', 'pageId', 'local_filename'] and v is not None}
         # This separation is needed to reuse the same el_mets_file element in the caching if block
         el_mets_file = ET.SubElement(el_fileGrp, TAG_METS_FILE)
         # The caching of the physical page is done in the OcrdFile constructor
+        # (which calls us back with set_physical_page_for_file)
         mets_file = OcrdFile(el_mets_file, mets=self, **kwargs)
 
         if self._cache_flag:
@@ -542,9 +543,9 @@ def remove_one_file(self, ID : Union[str, OcrdFile], fileGrp : str = None) -> Oc
         # Delete the physical page ref
         fptrs = []
         if self._cache_flag:
-            for page in self._fptr_cache.keys():
-                if ID in self._fptr_cache[page]:
-                    fptrs.append(self._fptr_cache[page][ID])
+            for pageId, fptrdict in self._fptr_cache.items():
+                if ID in fptrdict:
+                    fptrs.append(fptrdict[ID])
         else:
             fptrs = self._tree.getroot().findall('.//mets:fptr[@FILEID="%s"]' % ID, namespaces=NS)
 
@@ -598,7 +599,16 @@ def get_physical_pages(self, for_fileIds : Optional[List[str]] = None, for_pageI
         If return_divs is set, returns div memory objects instead of strings of ids
         """
         if for_fileIds is None and for_pageIds is None:
+            if return_divs:
+                if self._cache_flag:
+                    return list(self._page_cache[METS_PAGE_DIV_ATTRIBUTE.ID].values())
+
+                return [x for x in self._tree.getroot().xpath(
+                    'mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div[@TYPE="page"]',
+                    namespaces=NS)]
+
             return self.physical_pages
+
         # log = getLogger('ocrd.models.ocrd_mets.get_physical_pages')
         if for_pageIds is not None:
             ret = []
@@ -700,8 +710,8 @@ def get_physical_pages(self, for_fileIds : Optional[List[str]] = None, for_pageI
         assert for_fileIds # at this point we know for_fileIds is set, assert to convince pyright
         ret = [None] * len(for_fileIds)
         if self._cache_flag:
-            for pageId in self._fptr_cache.keys():
-                for fptr in self._fptr_cache[pageId].keys():
+            for pageId, fptrdict in self._fptr_cache.items():
+                for fptr in fptrdict:
                     if fptr in for_fileIds:
                         index = for_fileIds.index(fptr)
                         if return_divs:
@@ -737,10 +747,10 @@ def set_physical_page_for_file(self, pageId : str, ocrd_file : OcrdFile,
         # delete any existing page mapping for this file.ID
         fptrs = []
         if self._cache_flag:
-            for page_id in self._fptr_cache.keys():
-                if ocrd_file.ID in self._fptr_cache[page_id].keys():
-                    if self._fptr_cache[page_id][ocrd_file.ID] is not None:
-                        fptrs.append(self._fptr_cache[page_id][ocrd_file.ID])
+            for page, fptrdict in self._fptr_cache.items():
+                if ocrd_file.ID in fptrdict:
+                    if fptrdict[ocrd_file.ID] is not None:
+                        fptrs.append(fptrdict[ocrd_file.ID])
         else:
             fptrs = self._tree.getroot().findall(
                 'mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div[@TYPE="page"]/mets:fptr[@FILEID="%s"]' %
@@ -791,7 +801,7 @@ def set_physical_page_for_file(self, pageId : str, ocrd_file : OcrdFile,
             self._fptr_cache[pageId].update({ocrd_file.ID: el_fptr})
 
     def update_physical_page_attributes(self, page_id : str, **kwargs) -> None:
-        invalid_keys = list(k for k in kwargs.keys() if k not in METS_PAGE_DIV_ATTRIBUTE.names())
+        invalid_keys = list(k for k in kwargs if k not in METS_PAGE_DIV_ATTRIBUTE.names())
         if invalid_keys:
             raise ValueError(f"Invalid attribute {invalid_keys}. Allowed values: {METS_PAGE_DIV_ATTRIBUTE.names()}")
 
@@ -812,8 +822,8 @@ def get_physical_page_for_file(self, ocrd_file : OcrdFile) -> Optional[str]:
         corresponding to the ``mets:file`` :py:attr:`ocrd_file`.
         """
         if self._cache_flag:
-            for pageId in self._fptr_cache.keys():
-                if ocrd_file.ID in self._fptr_cache[pageId].keys():
+            for pageId, fptrdict in self._fptr_cache.items():
+                if ocrd_file.ID in fptrdict:
                     return pageId
         else:
             ret = self._tree.getroot().find(
@@ -828,7 +838,7 @@ def remove_physical_page(self, ID : str) -> None:
         """
         mets_div = None
         if self._cache_flag:
-            if ID in self._page_cache[METS_PAGE_DIV_ATTRIBUTE.ID].keys():
+            if ID in self._page_cache[METS_PAGE_DIV_ATTRIBUTE.ID]:
                 mets_div = [self._page_cache[METS_PAGE_DIV_ATTRIBUTE.ID][ID]]
         else:
             mets_div = self._tree.getroot().xpath(
@@ -857,9 +867,9 @@ def remove_physical_page_fptr(self, fileId : str) -> List[str]:
         # If that's the case then we do not need to iterate 2 loops, just one.
         mets_fptrs = []
         if self._cache_flag:
-            for page_id in self._fptr_cache.keys():
-                if fileId in self._fptr_cache[page_id].keys():
-                    mets_fptrs.append(self._fptr_cache[page_id][fileId])
+            for pageId, fptrdict in self._fptr_cache.items():
+                if fileId in fptrdict:
+                    mets_fptrs.append(fptrdict[fileId])
         else:
             mets_fptrs = self._tree.getroot().xpath(
                 'mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div[@TYPE="page"]/mets:fptr[@FILEID="%s"]' % fileId,
@@ -894,7 +904,7 @@ def merge(self, other_mets, force : bool = False,
         Add all files from other_mets.
         Accepts the same kwargs as :py:func:`find_files`
         Keyword Args:
-            force (boolean): Whether to :py:meth:`add_file`s with force (overwriting existing ``mets:file``s)
+            force (boolean): Whether to do :py:meth:`add_file` with ``force`` (overwriting existing ``mets:file`` entries)
             fileGrp_mapping (dict): Map :py:attr:`other_mets` fileGrp to fileGrp in this METS
             fileId_mapping (dict): Map :py:attr:`other_mets` file ID to file ID in this METS
             pageId_mapping (dict): Map :py:attr:`other_mets` page ID to page ID in this METS
@@ -919,4 +929,3 @@ def merge(self, other_mets, force : bool = False,
             # FIXME: merge structMap logical and structLink as well
             if after_add_cb:
                 after_add_cb(f_dest)
-
diff --git a/src/ocrd_models/ocrd_page.py b/src/ocrd_models/ocrd_page.py
index b0cc2b3311..3f0cc690fa 100644
--- a/src/ocrd_models/ocrd_page.py
+++ b/src/ocrd_models/ocrd_page.py
@@ -2,12 +2,15 @@
 API to PAGE-XML, generated with generateDS from XML schema.
 """
 from io import StringIO
+from typing import Dict, Union
+from lxml import etree as ET
 
 __all__ = [
     'parse',
     'parseEtree',
     'parseString',
     'OcrdPage',
+    'OcrdPageType',
 
     "AdvertRegionType",
     "AlternativeImageType",
@@ -174,10 +177,31 @@
     """
 )
 
-# add alias for DOM root
-OcrdPage = PcGtsType
-
-def to_xml(el, skip_declaration=False):
+class OcrdPage():
+    """
+    Proxy object for :py:class:`ocrd_models.PcGtsType` (i.e. PRImA PAGE-XML
+    for page content, rendered as object model by generateDS) that also offers access
+    to the underlying etree, element-node mapping and reverse mapping, too (cf.
+    :py:func:`ocrd_models.ocrd_page.parseEtree`)
+    """
+    def __init__(
+        self,
+        pcgts : PcGtsType,
+        etree : ET._Element,
+        mapping : Dict[str, ET._Element],
+        revmap : Dict[ET._Element, str],
+    ):
+        self._pcgts = pcgts
+        self.etree = etree
+        self.mapping = mapping
+        self.revmap = revmap
+
+    def __getattr__(self, name):
+        return getattr(self._pcgts, name)
+
+OcrdPageType = Union[OcrdPage, PcGtsType]
+
+def to_xml(el, skip_declaration=False) -> str:
     """
     Serialize ``pc:PcGts`` document as string.
     """
diff --git a/src/ocrd_models/ocrd_page_generateds.py b/src/ocrd_models/ocrd_page_generateds.py
index 6fef4c8635..f2b7c0551e 100644
--- a/src/ocrd_models/ocrd_page_generateds.py
+++ b/src/ocrd_models/ocrd_page_generateds.py
@@ -2,30 +2,28 @@
 # -*- coding: utf-8 -*-
 
 #
-# Generated Wed Nov  3 12:30:32 2021 by generateDS.py version 2.35.20.
-# Python 3.6.9 (default, Jan 26 2021, 15:33:00)  [GCC 8.4.0]
+# Generated Sat Sep  7 14:17:39 2024 by generateDS.py version 2.35.20.
+# Python 3.8.17+ (heads/3.8-dirty:1663f8ba84, Aug 15 2023, 18:13:01)  [GCC 8.3.0]
 #
 # Command line options:
 #   ('-f', '')
 #   ('--root-element', 'PcGts')
-#   ('-o', 'ocrd_models/ocrd_models/ocrd_page_generateds.py')
+#   ('-o', 'src/ocrd_models/ocrd_page_generateds.py')
 #   ('--silence', '')
 #   ('--export', 'write etree')
 #   ('--disable-generatedssuper-lookup', '')
-#   ('--user-methods', 'ocrd_models/ocrd_page_user_methods.py')
+#   ('--user-methods', 'src/ocrd_page_user_methods.py')
 #
 # Command line arguments:
-#   ocrd_validators/ocrd_validators/page.xsd
+#   src/ocrd_validators/page.xsd
 #
 # Command line:
-#   /home/kba/monorepo/ocrd_all/venv/bin/generateDS -f --root-element="PcGts" -o "ocrd_models/ocrd_models/ocrd_page_generateds.py" --silence --export="write etree" --disable-generatedssuper-lookup --user-methods="ocrd_models/ocrd_page_user_methods.py" ocrd_validators/ocrd_validators/page.xsd
+#   /data/ocr-d/ocrd_all/venv38/bin/generateDS -f --root-element="PcGts" -o "src/ocrd_models/ocrd_page_generateds.py" --silence --export="write etree" --disable-generatedssuper-lookup --user-methods="src/ocrd_page_user_methods.py" src/ocrd_validators/page.xsd
 #
 # Current working directory (os.getcwd()):
 #   core
 #
 
-# type: ignore
-
 from itertools import zip_longest
 import os
 import sys
@@ -223,7 +221,7 @@ def gds_validate_integer_list(
             try:
                 int(value)
             except (TypeError, ValueError):
-                raise_parse_error(node, 'Requires sequence of integer values')
+                raise_parse_error(node, 'Requires sequence of integer valuess')
         return values
     def gds_format_float(self, input_data, input_name=''):
         return ('%.15f' % input_data).rstrip('0')
@@ -1230,9 +1228,10 @@ def __hash__(self):
         return hash(self.id)
     @property
     def id(self):
+        from ocrd_utils import make_xml_id
         if hasattr(self, 'pcGtsId'):
             return self.pcGtsId or ''
-        return self.imageFilename
+        return make_xml_id(self.imageFilename)
     def get_AllAlternativeImagePaths(self, page=True, region=True, line=True, word=True, glyph=True):
         """
         Get all the ``pc:AlternativeImage/@filename`` paths referenced in the PAGE-XML document.
@@ -3116,9 +3115,10 @@ def __hash__(self):
         return hash(self.id)
     @property
     def id(self):
+        from ocrd_utils import make_xml_id
         if hasattr(self, 'pcGtsId'):
             return self.pcGtsId or ''
-        return self.imageFilename
+        return make_xml_id(self.imageFilename)
     # pylint: disable=line-too-long,invalid-name,protected-access,missing-module-docstring
     def _region_class(self, x): # pylint: disable=unused-argument
         return x.__class__.__name__.replace('RegionType', '')
@@ -3314,6 +3314,39 @@ def get_AllTextLines(self, region_order='document', respect_textline_order=True)
                 ret += lines if lo in ['top-to-bottom', 'left-to-right'] else list(reversed(lines))
         return ret
     
+    def get_ReadingOrderGroups(self) -> dict:
+        """
+        Aggregate recursive ReadingOrder into a dictionary, mapping each regionRef
+        (i.e. segment `@id`) to its referring group object (i.e one of
+    
+        \b
+        - :py:class:`.RegionRefType`
+        - :py:class:`.RegionRefIndexedType`
+        - :py:class:`.OrderedGroupType`
+        - :py:class:`.OrderedGroupIndexedType`
+        - :py:class:`.UnoderedGroupType`
+        - :py:class:`.UnoderedGroupIndexedType`
+        """
+        def get_groupdict(group):
+            regionrefs = list()
+            if isinstance(group, (OrderedGroupType, OrderedGroupIndexedType)):
+                regionrefs = (group.get_RegionRefIndexed() +
+                              group.get_OrderedGroupIndexed() +
+                              group.get_UnorderedGroupIndexed())
+            if isinstance(group, (UnorderedGroupType, UnorderedGroupIndexedType)):
+                regionrefs = (group.get_RegionRef() +
+                              group.get_OrderedGroup() +
+                              group.get_UnorderedGroup())
+            refdict = {}
+            for elem in regionrefs:
+                refdict[elem.get_regionRef()] = elem
+                if not isinstance(elem, (RegionRefType, RegionRefIndexedType)):
+                    refdict = {**refdict, **get_groupdict(elem)}
+            return refdict
+        ro = self.get_ReadingOrder()
+        if ro is None:
+            return {}
+        return get_groupdict(ro.get_OrderedGroup() or ro.get_UnorderedGroup())
     def set_orientation(self, orientation):
         """
         Set deskewing angle to given `orientation` number.
diff --git a/src/ocrd_models/ocrd_xml_base.py b/src/ocrd_models/ocrd_xml_base.py
index 8579a5b407..ea4798c5b9 100644
--- a/src/ocrd_models/ocrd_xml_base.py
+++ b/src/ocrd_models/ocrd_xml_base.py
@@ -8,8 +8,8 @@
 from .utils import xmllint_format
 
 
-for curie in NAMESPACES:
-    ET.register_namespace(curie, NAMESPACES[curie])
+for curie, url in NAMESPACES.items():
+    ET.register_namespace(curie, url)
 
 class OcrdXmlDocument():
     """
diff --git a/src/ocrd_page_user_methods.py b/src/ocrd_page_user_methods.py
index 8a2332e6e5..fe22dd89ab 100644
--- a/src/ocrd_page_user_methods.py
+++ b/src/ocrd_page_user_methods.py
@@ -116,6 +116,7 @@ def _add_method(class_re, method_name, file_name=None):
     _add_method(r'^(PageType)$', 'set_Border'),
     _add_method(r'^(CoordsType)$', 'set_points'),
     _add_method(r'^(PageType)$', 'get_AllTextLines'),
+    _add_method(r'^(PageType)$', 'get_ReadingOrderGroups'),
     # for some reason, pagecontent.xsd does not declare @orientation at the abstract/base RegionType:
     _add_method(r'^(PageType|AdvertRegionType|MusicRegionType|MapRegionType|ChemRegionType|MathsRegionType|SeparatorRegionType|ChartRegionType|TableRegionType|GraphicRegionType|LineDrawingRegionType|ImageRegionType|TextRegionType)$', 'set_orientation'),
     )
diff --git a/src/ocrd_page_user_methods/get_ReadingOrderGroups.py b/src/ocrd_page_user_methods/get_ReadingOrderGroups.py
new file mode 100644
index 0000000000..e7d6c02b77
--- /dev/null
+++ b/src/ocrd_page_user_methods/get_ReadingOrderGroups.py
@@ -0,0 +1,33 @@
+def get_ReadingOrderGroups(self) -> dict:
+    """
+    Aggregate recursive ReadingOrder into a dictionary, mapping each regionRef
+    (i.e. segment `@id`) to its referring group object (i.e one of
+
+    \b
+    - :py:class:`.RegionRefType`
+    - :py:class:`.RegionRefIndexedType`
+    - :py:class:`.OrderedGroupType`
+    - :py:class:`.OrderedGroupIndexedType`
+    - :py:class:`.UnoderedGroupType`
+    - :py:class:`.UnoderedGroupIndexedType`
+    """
+    def get_groupdict(group):
+        regionrefs = list()
+        if isinstance(group, (OrderedGroupType, OrderedGroupIndexedType)):
+            regionrefs = (group.get_RegionRefIndexed() +
+                          group.get_OrderedGroupIndexed() +
+                          group.get_UnorderedGroupIndexed())
+        if isinstance(group, (UnorderedGroupType, UnorderedGroupIndexedType)):
+            regionrefs = (group.get_RegionRef() +
+                          group.get_OrderedGroup() +
+                          group.get_UnorderedGroup())
+        refdict = {}
+        for elem in regionrefs:
+            refdict[elem.get_regionRef()] = elem
+            if not isinstance(elem, (RegionRefType, RegionRefIndexedType)):
+                refdict = {**refdict, **get_groupdict(elem)}
+        return refdict
+    ro = self.get_ReadingOrder()
+    if ro is None:
+        return {}
+    return get_groupdict(ro.get_OrderedGroup() or ro.get_UnorderedGroup())
diff --git a/src/ocrd_utils/__init__.py b/src/ocrd_utils/__init__.py
index b5bbcae121..c853a34bd3 100644
--- a/src/ocrd_utils/__init__.py
+++ b/src/ocrd_utils/__init__.py
@@ -13,6 +13,7 @@
     :py:meth:`ocrd.workspace.Workspace.image_from_segment`.)
 
 * :py:func:`rotate_coordinates`, 
+  :py:func:`scale_coordinates`,
   :py:func:`shift_coordinates`,
   :py:func:`transpose_coordinates`,
   :py:func:`transform_coordinates`
@@ -74,6 +75,7 @@
   :py:func:`concat_padded`,
   :py:func:`nth_url_segment`,
   :py:func:`remove_non_path_from_url`,
+  :py:func:`parse_json_file_with_comments`,
   :py:func:`parse_json_string_with_comments`,
   :py:func:`parse_json_string_or_file`,
   :py:func:`set_json_key_value_overrides`,
@@ -148,6 +150,7 @@
     polygon_mask,
     rotate_coordinates,
     rotate_image,
+    scale_coordinates,
     shift_coordinates,
     transform_coordinates,
     transpose_coordinates,
@@ -202,6 +205,7 @@
     make_xml_id,
     nth_url_segment,
     partition_list,
+    parse_json_file_with_comments,
     parse_json_string_or_file,
     parse_json_string_with_comments,
     sparkline,
diff --git a/src/ocrd_utils/config.py b/src/ocrd_utils/config.py
index 063af930c8..36399870e2 100644
--- a/src/ocrd_utils/config.py
+++ b/src/ocrd_utils/config.py
@@ -13,6 +13,12 @@
 from textwrap import fill, indent
 
 
+def _validator_boolean(val):
+    return isinstance(val, bool) or str.lower(val) in ('true', 'false', '0', '1')
+
+def _parser_boolean(val):
+    return bool(val) if isinstance(val, (int, bool)) else str.lower(val) in ('true', '1')
+
 class OcrdEnvVariable():
 
     def __init__(self, name, description, parser=str, validator=lambda val: True, default=[False, None]):
@@ -60,7 +66,11 @@ def __init__(self):
         self._variables = {}
 
     def add(self, name, *args, **kwargs):
-        self._variables[name] = OcrdEnvVariable(name, *args, **kwargs)
+        var = OcrdEnvVariable(name, *args, **kwargs)
+        # make visible in ocrd_utils.config docstring (apidoc)
+        txt = var.describe(wrap_text=False, indent_text=True)
+        globals()['__doc__'] += "\n\n - " + txt + "\n\n"
+        self._variables[name] = var
         return self._variables[name]
 
     def has_default(self, name):
@@ -68,14 +78,26 @@ def has_default(self, name):
             raise ValueError(f"Unregistered env variable {name}")
         return self._variables[name].has_default
 
+    def reset_defaults(self):
+        for name in self._variables:
+            try:
+                # we cannot use hasattr, because that delegates to getattr,
+                # which we override and provide defaults for (which of course
+                # cannot be removed)
+                if self.__getattribute__(name):
+                    delattr(self, name)
+            except AttributeError:
+                pass
+
     def describe(self, name, *args, **kwargs):
         if not name in self._variables:
             raise ValueError(f"Unregistered env variable {name}")
         return self._variables[name].describe(*args, **kwargs)
 
     def __getattr__(self, name):
+        # will be called if name is not accessible (has not been added directly yet)
         if not name in self._variables:
-            raise ValueError(f"Unregistered env variable {name}")
+            raise AttributeError(f"Unregistered env variable {name}")
         var_obj = self._variables[name]
         try:
             raw_value = self.raw_value(name)
@@ -102,21 +124,33 @@ def raw_value(self, name):
 
 config.add('OCRD_METS_CACHING',
     description='If set to `true`, access to the METS file is cached, speeding in-memory search and modification.',
-    validator=lambda val: val in ('true', 'false', '0', '1'),
-    parser=lambda val: val in ('true', '1'))
+    validator=_validator_boolean,
+    parser=_parser_boolean)
 
 config.add('OCRD_MAX_PROCESSOR_CACHE',
     description="Maximum number of processor instances (for each set of parameters) to be kept in memory (including loaded models) for processing workers or processor servers.",
     parser=int,
     default=(True, 128))
 
+config.add('OCRD_MAX_PARALLEL_PAGES',
+    description="Maximum number of processor threads for page-parallel processing (within each Processor's selected page range, independent of the number of Processing Workers or Processor Servers). If set >1, then a METS Server must be used for METS synchronisation.",
+    parser=int,
+    default=(True, 1))
+
+config.add('OCRD_PROCESSING_PAGE_TIMEOUT',
+    description="Timeout in seconds for processing a single page. If set >0, when exceeded, the same as OCRD_MISSING_OUTPUT applies.",
+    parser=int,
+    default=(True, 0))
+
 config.add("OCRD_PROFILE",
     description="""\
 Whether to enable gathering runtime statistics
 on the `ocrd.profile` logger (comma-separated):
+
 - `CPU`: yields CPU and wall-time,
 - `RSS`: also yields peak memory (resident set size)
 - `PSS`: also yields peak memory (proportional set size)
+
 """,
   validator=lambda val : all(t in ('', 'CPU', 'RSS', 'PSS') for t in val.split(',')),
   default=(True, ''))
@@ -125,7 +159,7 @@ def raw_value(self, name):
     description="If set, then the CPU profile is written to this file for later peruse with a analysis tools like snakeviz")
 
 config.add("OCRD_DOWNLOAD_RETRIES",
-    description="Number of times to retry failed attempts for downloads of workspace files.",
+    description="Number of times to retry failed attempts for downloads of resources or workspace files.",
     validator=int,
     parser=int)
 
@@ -141,6 +175,55 @@ def _ocrd_download_timeout_parser(val):
     description="Timeout in seconds for connecting or reading (comma-separated) when downloading.",
     parser=_ocrd_download_timeout_parser)
 
+config.add("OCRD_DOWNLOAD_INPUT",
+    description="Whether to download files not present locally during processing",
+    default=(True, True),
+    validator=_validator_boolean,
+    parser=_parser_boolean)
+
+config.add("OCRD_MISSING_INPUT",
+    description="""\
+How to deal with missing input files (for some fileGrp/pageId) during processing:
+
+ - `SKIP`: ignore and proceed with next page's input
+ - `ABORT`: throw :py:class:`.MissingInputFile`
+
+""",
+    default=(True, 'SKIP'),
+    validator=lambda val: val in ['SKIP', 'ABORT'],
+    parser=str)
+
+config.add("OCRD_MISSING_OUTPUT",
+    description="""\
+How to deal with missing output files (for some fileGrp/pageId) during processing:
+
+ - `SKIP`: ignore and proceed processing next page
+ - `COPY`: fall back to copying input PAGE to output fileGrp for page
+ - `ABORT`: re-throw whatever caused processing to fail
+
+""",
+    default=(True, 'SKIP'),
+    validator=lambda val: val in ['SKIP', 'COPY', 'ABORT'],
+    parser=str)
+
+config.add("OCRD_MAX_MISSING_OUTPUTS",
+    description="Maximal rate of skipped/fallback pages among all processed pages before aborting (decimal fraction, ignored if negative).",
+    default=(True, 0.1),
+    parser=float)
+
+config.add("OCRD_EXISTING_OUTPUT",
+    description="""\
+How to deal with already existing output files (for some fileGrp/pageId) during processing:
+
+ - `SKIP`: ignore and proceed processing next page
+ - `OVERWRITE`: force writing result to output fileGrp for page
+ - `ABORT`: re-throw :py:class:`FileExistsError`
+
+""",
+    default=(True, 'SKIP'),
+    validator=lambda val: val in ['SKIP', 'OVERWRITE', 'ABORT'],
+    parser=str)
+
 config.add("OCRD_NETWORK_SERVER_ADDR_PROCESSING",
         description="Default address of Processing Server to connect to (for `ocrd network client processing`).",
         default=(True, ''))
@@ -200,5 +283,5 @@ def _ocrd_download_timeout_parser(val):
 config.add("OCRD_LOGGING_DEBUG",
     description="Print information about the logging setup to STDERR",
     default=(True, False),
-    validator=lambda val: isinstance(val, bool) or str.lower(val) in ('true', 'false', '0', '1'),
-    parser=lambda val:  val if isinstance(val, (int, bool)) else str.lower(val) in ('true', '1'))
+    validator=_validator_boolean,
+    parser=_parser_boolean)
diff --git a/src/ocrd_utils/image.py b/src/ocrd_utils/image.py
index 3bc14e6612..6f2524608c 100644
--- a/src/ocrd_utils/image.py
+++ b/src/ocrd_utils/image.py
@@ -65,10 +65,10 @@ def adjust_canvas_to_transposition(size, method):
     
     Return a numpy array of the enlarged width and height.
     """
-    if method in [Image.ROTATE_90,
-                  Image.ROTATE_270,
-                  Image.TRANSPOSE,
-                  Image.TRANSVERSE]:
+    if method in [Image.Transpose.ROTATE_90,
+                  Image.Transpose.ROTATE_270,
+                  Image.Transpose.TRANSPOSE,
+                  Image.Transpose.TRANSVERSE]:
         size = size[::-1]
     return size
 
@@ -348,26 +348,26 @@ def transpose_coordinates(transform, method, orig=np.array([0, 0])):
     calculate the affine coordinate transform corresponding to the composition
     of both transformations, which is respectively:
 
-    - ``PIL.Image.FLIP_LEFT_RIGHT``:
+    - ``PIL.Image.Transpose.FLIP_LEFT_RIGHT``:
       entails translation to the center, followed by pure reflection
       about the y-axis, and subsequent translation back
-    - ``PIL.Image.FLIP_TOP_BOTTOM``:
+    - ``PIL.Image.Transpose.FLIP_TOP_BOTTOM``:
       entails translation to the center, followed by pure reflection
       about the x-axis, and subsequent translation back
-    - ``PIL.Image.ROTATE_180``:
+    - ``PIL.Image.Transpose.ROTATE_180``:
       entails translation to the center, followed by pure reflection
       about the origin, and subsequent translation back
-    - ``PIL.Image.ROTATE_90``:
+    - ``PIL.Image.Transpose.ROTATE_90``:
       entails translation to the center, followed by pure rotation
       by 90° counter-clockwise, and subsequent translation back
-    - ``PIL.Image.ROTATE_270``:
+    - ``PIL.Image.Transpose.ROTATE_270``:
       entails translation to the center, followed by pure rotation
       by 270° counter-clockwise, and subsequent translation back
-    - ``PIL.Image.TRANSPOSE``:
+    - ``PIL.Image.Transpose.TRANSPOSE``:
       entails translation to the center, followed by pure rotation
       by 90° counter-clockwise and pure reflection about the x-axis,
       and subsequent translation back
-    - ``PIL.Image.TRANSVERSE``:
+    - ``PIL.Image.Transpose.TRANSVERSE``:
       entails translation to the center, followed by pure rotation
       by 90° counter-clockwise and pure reflection about the y-axis,
       and subsequent translation back
@@ -388,13 +388,13 @@ def transpose_coordinates(transform, method, orig=np.array([0, 0])):
                       [0, 0, 1]])
     transform = shift_coordinates(transform, -orig)
     operations = {
-        Image.FLIP_LEFT_RIGHT: [refly],
-        Image.FLIP_TOP_BOTTOM: [reflx],
-        Image.ROTATE_180: [reflx, refly],
-        Image.ROTATE_90: [rot90],
-        Image.ROTATE_270: [rot90, reflx, refly],
-        Image.TRANSPOSE: [rot90, reflx],
-        Image.TRANSVERSE: [rot90, refly]
+        Image.Transpose.FLIP_LEFT_RIGHT: [refly],
+        Image.Transpose.FLIP_TOP_BOTTOM: [reflx],
+        Image.Transpose.ROTATE_180: [reflx, refly],
+        Image.Transpose.ROTATE_90: [rot90],
+        Image.Transpose.ROTATE_270: [rot90, reflx, refly],
+        Image.Transpose.TRANSPOSE: [rot90, reflx],
+        Image.Transpose.TRANSVERSE: [rot90, refly]
     }.get(method) # no default
     for operation in operations:
         transform = np.dot(operation, transform)
@@ -411,29 +411,29 @@ def transpose_image(image, method):
     Given a PIL.Image ``image`` and a transposition mode ``method``,
     apply the respective operation:
 
-    - ``PIL.Image.FLIP_LEFT_RIGHT``:
+    - ``PIL.Image.Transpose.FLIP_LEFT_RIGHT``:
       all pixels get mirrored at half the width of the image
-    - ``PIL.Image.FLIP_TOP_BOTTOM``:
+    - ``PIL.Image.Transpose.FLIP_TOP_BOTTOM``:
       all pixels get mirrored at half the height of the image
-    - ``PIL.Image.ROTATE_180``:
+    - ``PIL.Image.Transpose.ROTATE_180``:
       all pixels get mirrored at both, the width and half the height
       of the image,
       i.e. the image gets rotated by 180° counter-clockwise
-    - ``PIL.Image.ROTATE_90``:
+    - ``PIL.Image.Transpose.ROTATE_90``:
       rows become columns (but counted from the right) and
       columns become rows,
       i.e. the image gets rotated by 90° counter-clockwise;
       width becomes height and vice versa
-    - ``PIL.Image.ROTATE_270``:
+    - ``PIL.Image.Transpose.ROTATE_270``:
       rows become columns and
       columns become rows (but counted from the bottom),
       i.e. the image gets rotated by 270° counter-clockwise;
       width becomes height and vice versa
-    - ``PIL.Image.TRANSPOSE``:
+    - ``PIL.Image.Transpose.TRANSPOSE``:
       rows become columns and vice versa,
       i.e. all pixels get mirrored at the main diagonal;
       width becomes height and vice versa
-    - ``PIL.Image.TRANSVERSE``:
+    - ``PIL.Image.Transpose.TRANSVERSE``:
       rows become columns (but counted from the right) and
       columns become rows (but counted from the bottom),
       i.e. all pixels get mirrored at the opposite diagonal;
diff --git a/src/ocrd_utils/logging.py b/src/ocrd_utils/logging.py
index bb771fc0ce..52b01883f1 100644
--- a/src/ocrd_utils/logging.py
+++ b/src/ocrd_utils/logging.py
@@ -5,9 +5,9 @@
 
 Logging can be overridden either programmatically in code using the library or by creating one or more of
 
-- /etc/ocrd_logging.py
-- $HOME/ocrd_logging.py
-- $PWD/ocrd_logging.py
+- ``/etc/ocrd_logging.py``
+- ``$HOME/ocrd_logging.py``
+- ``$PWD/ocrd_logging.py``
 
 These files will be executed in the context of ocrd/ocrd_logging.py, with `logging` global set.
 
@@ -16,20 +16,18 @@
     - Try to be less intrusive with OCR-D specific logging conventions to
       make it easier and less surprising to define logging behavior when
       using OCR-D/core as a library
-    - Change setOverrideLogLevel to only override the log level of the ``ocrd``
+    - Change :py:meth:`setOverrideLogLevel` to only override the log level of the ``ocrd``
       logger and its descendants
-    - initLogging will set exactly one handler, for the root logger or for the
+    - :py:meth:`initLogging` will set exactly one handler, for the root logger or for the
       ``ocrd`` logger.
     - Child loggers should propagate to the ancestor logging (default
-      behavior of the logging library - no more PropagationShyLogger)
-    - disableLogging only removes any handlers from the ``ocrd`` logger
+      behavior of the logging library - no more ``PropagationShyLogger``)
+    - :py:meth:`disableLogging` only removes any handlers from the ``ocrd`` logger
 """
 # pylint: disable=no-member
 
 from __future__ import absolute_import
 
-from traceback import format_stack
-
 import logging
 import logging.config
 from pathlib import Path
@@ -48,13 +46,8 @@
     'setOverrideLogLevel',
 ]
 
-# These are the loggers we add handlers to
-ROOT_OCRD_LOGGERS = [
-    'ocrd',
-    'ocrd_network'
-]
-
 LOGGING_DEFAULTS = {
+    '': logging.WARNING,
     'ocrd': logging.INFO,
     'ocrd_network': logging.INFO,
     # 'ocrd.resolver': logging.INFO,
@@ -81,10 +74,10 @@
 
 def tf_disable_interactive_logs():
     try:
-        from os import environ
+        from os import environ # pylint: disable=import-outside-toplevel
         # This env variable must be set before importing from Keras
         environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-        from tensorflow.keras.utils import disable_interactive_logging
+        from tensorflow.keras.utils import disable_interactive_logging # pylint: disable=import-outside-toplevel
         # Enabled interactive logging throws an exception
         # due to a call of sys.stdout.flush()
         disable_interactive_logging()
@@ -115,18 +108,15 @@ def setOverrideLogLevel(lvl, silent=not config.OCRD_LOGGING_DEBUG):
         lvl (string): Log level name.
         silent (boolean): Whether to log the override call
     """
-    if not _initialized_flag:
-        initLogging(silent=silent)
-    ocrd_logger = logging.getLogger('ocrd')
-
-    if lvl is None:
-        if not silent:
-            print('[LOGGING] Reset log level override', file=sys.stderr)
-        ocrd_logger.setLevel(logging.NOTSET)
-    else:
-        if not silent:
-            print(f'[LOGGING] Overriding ocrd log level to {lvl}', file=sys.stderr)
-        ocrd_logger.setLevel(lvl)
+    if lvl is not None:
+        lvl = getLevelName(lvl)
+        if not _initialized_flag:
+            initLogging(silent=silent)
+        # affect all configured loggers
+        for logger_name in logging.root.manager.loggerDict:
+            if not silent:
+                print(f'[LOGGING] Overriding {logger_name} log level to {lvl}', file=sys.stderr)
+            logging.getLogger(logger_name).setLevel(lvl)
 
 def get_logging_config_files():
     """
@@ -143,37 +133,28 @@ def get_logging_config_files():
 
 def initLogging(builtin_only=False, force_reinit=False, silent=not config.OCRD_LOGGING_DEBUG):
     """
-    Reset ``ocrd`` logger, read logging configuration if exists, otherwise use basicConfig
+    Reset ``ocrd`` logger, read logging configuration if exists, otherwise use :py:meth:`logging.basicConfig`
 
-    initLogging is to be called by OCR-D/core once, i.e.
+    This is to be called by OCR-D/core only once, i.e.
         -  for the ``ocrd`` CLI
         -  for the processor wrapper methods
 
     Other processes that use OCR-D/core as a library can, but do not have to, use this functionality.
 
     Keyword Args:
-        - builtin_only (bool, False): Whether to search for logging configuration
-                                      on-disk (``False``) or only use the
-                                      hard-coded config (``True``). For testing
-        - force_reinit (bool, False): Whether to ignore the module-level
-                                      ``_initialized_flag``. For testing only.
-        - silent (bool, True): Whether to log logging behavior by printing to stderr
+        - builtin_only (bool): Whether to search for logging configuration
+              on-disk (``False``) or only use the hard-coded config (``True``).
+              For testing
+        - force_reinit (bool): Whether to ignore the module-level ``_initialized_flag``.
+              For testing only
+        - silent (bool): Whether to log logging behavior by printing to stderr
     """
     global _initialized_flag
-    if _initialized_flag and not force_reinit:
-        return
-    # disableLogging()
-
-    # https://docs.python.org/3/library/logging.html#logging.disable
-    # If logging.disable(logging.NOTSET) is called, it effectively removes this
-    # overriding level, so that logging output again depends on the effective
-    # levels of individual loggers.
-    logging.disable(logging.NOTSET)
-
-    # remove all handlers for the ocrd root loggers
-    for logger_name in ROOT_OCRD_LOGGERS:
-        for handler in logging.getLogger(logger_name).handlers[:]:
-            logging.getLogger(logger_name).removeHandler(handler)
+    if _initialized_flag:
+        if force_reinit:
+            disableLogging(silent=silent)
+        else:
+            return
 
     config_file = None
     if not builtin_only:
@@ -192,8 +173,8 @@ def initLogging(builtin_only=False, force_reinit=False, silent=not config.OCRD_L
         ocrd_handler = logging.StreamHandler(stream=sys.stderr)
         ocrd_handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT, datefmt=LOG_TIMEFMT))
         ocrd_handler.setLevel(logging.DEBUG)
-        for logger_name in ROOT_OCRD_LOGGERS:
-            logging.getLogger(logger_name).addHandler(ocrd_handler)
+        root_logger = logging.getLogger('')
+        root_logger.addHandler(ocrd_handler)
         for logger_name, logger_level in LOGGING_DEFAULTS.items():
             logging.getLogger(logger_name).setLevel(logger_level)
     _initialized_flag = True
@@ -209,22 +190,16 @@ def disableLogging(silent=not config.OCRD_LOGGING_DEBUG):
     if _initialized_flag and not silent:
         print("[LOGGING] Disabling logging", file=sys.stderr)
     _initialized_flag = False
-    # logging.basicConfig(level=logging.CRITICAL)
-    # logging.disable(logging.ERROR)
-    # remove all handlers for the ocrd logger
-    for logger_name in ROOT_OCRD_LOGGERS:
-        for handler in logging.getLogger(logger_name).handlers[:]:
-            logging.getLogger(logger_name).removeHandler(handler)
-    for logger_name in LOGGING_DEFAULTS:
-        logging.getLogger(logger_name).setLevel(logging.NOTSET)
-
-# Initializing stream handlers at module level
-# would cause message output in all runtime contexts,
-# including those which are already run for std output
-# (--dump-json, --version, ocrd-tool, bashlib etc).
-# So this needs to be an opt-in from the CLIs/decorators:
-#initLogging()
-# Also, we even have to block log output for libraries
-# (like matplotlib/tensorflow) which set up logging
-# themselves already:
-disableLogging()
+    # remove all handlers we might have added (via initLogging on builtin or file config)
+    for logger_name in logging.root.manager.loggerDict:
+        if not silent:
+            print(f'[LOGGING] Resetting {logger_name} log level and handlers')
+        logger = logging.getLogger(logger_name)
+        logger.setLevel(logging.NOTSET)
+        for handler in logger.handlers[:]:
+            logger.removeHandler(handler)
+    for handler in logging.root.handlers[:]:
+        logging.root.removeHandler(handler)
+    # Python default log level is WARNING
+    logging.root.setLevel(logging.WARNING)
+
diff --git a/src/ocrd_utils/ocrd_logging.conf b/src/ocrd_utils/ocrd_logging.conf
index 5cf161398e..41e6d5af7a 100644
--- a/src/ocrd_utils/ocrd_logging.conf
+++ b/src/ocrd_utils/ocrd_logging.conf
@@ -34,7 +34,7 @@ keys=defaultFormatter,detailedFormatter
 # default logger "root" using consoleHandler
 #
 [logger_root]
-level=INFO
+level=WARNING
 handlers=consoleHandler,fileHandler
 
 
@@ -56,22 +56,22 @@ handlers=consoleHandler,fileHandler
 # ocrd loggers
 [logger_ocrd]
 level=INFO
-handlers=consoleHandler,fileHandler
+handlers=
 qualname=ocrd
-propagate=0
 
 [logger_ocrd_network]
 level=INFO
-handlers=consoleHandler,processingServerHandler
+#handlers=consoleHandler,processingServerHandler
+handlers=processingServerHandler
 qualname=ocrd_network
-propagate=0
+#propagate=0
 
 #
 # logger tensorflow
 #
 [logger_ocrd_tensorflow]
 level=ERROR
-handlers=consoleHandler
+handlers=
 qualname=tensorflow
 
 #
@@ -79,7 +79,7 @@ qualname=tensorflow
 #
 [logger_ocrd_shapely_geos]
 level=ERROR
-handlers=consoleHandler
+handlers=
 qualname=shapely.geos
 
 
@@ -88,7 +88,7 @@ qualname=shapely.geos
 #
 [logger_ocrd_PIL]
 level=INFO
-handlers=consoleHandler
+handlers=
 qualname=PIL
 
 #
@@ -96,34 +96,32 @@ qualname=PIL
 #
 [logger_paramiko]
 level=INFO
-handlers=consoleHandler
+handlers=
 qualname=paramiko
-propagate=0
 
 [logger_paramiko_transport]
 level=INFO
-handlers=consoleHandler
+handlers=
 qualname=paramiko.transport
-propagate=0
 
 #
 # uvicorn loggers
 #
 [logger_uvicorn]
 level=INFO
-handlers=consoleHandler
+handlers=
 qualname=uvicorn
 [logger_uvicorn_access]
 level=WARN
-handlers=consoleHandler
+handlers=
 qualname=uvicorn.access
 [logger_uvicorn_error]
 level=INFO
-handlers=consoleHandler
+handlers=
 qualname=uvicorn.error
 [logger_multipart]
 level=INFO
-handlers=consoleHandler
+handlers=
 qualname=multipart
 
 
diff --git a/src/ocrd_utils/os.py b/src/ocrd_utils/os.py
index 18463de0c0..70721acbe3 100644
--- a/src/ocrd_utils/os.py
+++ b/src/ocrd_utils/os.py
@@ -71,9 +71,8 @@ def unzip_file_to_dir(path_to_zip, output_directory):
     """
     Extract a ZIP archive to a directory
     """
-    z = ZipFile(path_to_zip, 'r')
-    z.extractall(output_directory)
-    z.close()
+    with ZipFile(path_to_zip, 'r') as z:
+        z.extractall(output_directory)
 
 @lru_cache()
 def get_ocrd_tool_json(executable):
@@ -87,7 +86,7 @@ def get_ocrd_tool_json(executable):
         ocrd_tool = ocrd_all_tool[executable]
     except (JSONDecodeError, OSError, KeyError):
         try:
-            ocrd_tool = loads(run([executable, '--dump-json'], stdout=PIPE).stdout)
+            ocrd_tool = loads(run([executable, '--dump-json'], stdout=PIPE, check=False).stdout)
         except (JSONDecodeError, OSError) as e:
             getLogger('ocrd.utils.get_ocrd_tool_json').error(f'{executable} --dump-json produced invalid JSON: {e}')
     if 'resource_locations' not in ocrd_tool:
@@ -102,7 +101,7 @@ def get_moduledir(executable):
         moduledir = ocrd_all_moduledir[executable]
     except (JSONDecodeError, OSError, KeyError):
         try:
-            moduledir = run([executable, '--dump-module-dir'], encoding='utf-8', stdout=PIPE).stdout.rstrip('\n')
+            moduledir = run([executable, '--dump-module-dir'], encoding='utf-8', stdout=PIPE, check=False).stdout.rstrip('\n')
         except (JSONDecodeError, OSError) as e:
             getLogger('ocrd.utils.get_moduledir').error(f'{executable} --dump-module-dir failed: {e}')
     return moduledir
diff --git a/src/ocrd_utils/str.py b/src/ocrd_utils/str.py
index dea3715bf4..13d03cc5b8 100644
--- a/src/ocrd_utils/str.py
+++ b/src/ocrd_utils/str.py
@@ -4,9 +4,10 @@
 
 import re
 import json
-from typing import List, Union
+from typing import List
 from .constants import REGEX_FILE_ID, SPARKLINE_CHARS
-from .deprecate import deprecation_warning
+#from .deprecate import deprecation_warning
+from deprecated import deprecated
 from warnings import warn
 from numpy import array_split
 
@@ -20,6 +21,7 @@
     'make_file_id',
     'make_xml_id',
     'nth_url_segment',
+    'parse_json_file_with_comments',
     'parse_json_string_or_file',
     'parse_json_string_with_comments',
     'remove_non_path_from_url',
@@ -27,6 +29,7 @@
 ]
 
 
+@deprecated(version='3.0', reason='specify input and output file_grp_cardinality in ocrd-tool.json instead')
 def assert_file_grp_cardinality(grps, n, msg=None):
     """
     Assert that a string of comma-separated fileGrps contains exactly ``n`` entries.
@@ -105,10 +108,11 @@ def make_xml_id(idstr: str) -> str:
     ret = idstr
     if not REGEX_FILE_ID.fullmatch(ret):
         ret = ret.replace(':', '_')
+        ret = ret.replace('/', '_')
         ret = re.sub(r'^([^a-zA-Z_])', r'id_\1', ret)
         ret = re.sub(r'[^\w.-]', r'', ret)
     return ret
-    
+
 def nth_url_segment(url, n=-1):
     """
     Return the last /-delimited segment of a URL-like string
@@ -160,6 +164,13 @@ def is_string(val):
     return isinstance(val, str)
 
 
+def parse_json_file_with_comments(val):
+    """
+    Parse a file of JSON interspersed with #-prefixed full-line comments
+    """
+    with open(val, 'r', encoding='utf-8') as inputf:
+        return parse_json_string_with_comments(inputf.read())
+
 def parse_json_string_with_comments(val):
     """
     Parse a string of JSON interspersed with #-prefixed full-line comments
@@ -263,4 +274,3 @@ def sparkline(values : List[int]) -> str:
     # normalize to 0..1 and convert to index in SPARKLINE_CHARS
     mapped = [int(x / max_value * max_mapping) for x in values]
     return ''.join(SPARKLINE_CHARS[x] for x in mapped)
-
diff --git a/src/ocrd_validators/json_validator.py b/src/ocrd_validators/json_validator.py
index c920fc7c2d..f21a23afee 100644
--- a/src/ocrd_validators/json_validator.py
+++ b/src/ocrd_validators/json_validator.py
@@ -3,10 +3,13 @@
 """
 import json
 
-from jsonschema import Draft6Validator, validators # pylint: disable=import-error
+from jsonschema import Draft201909Validator, ValidationError, validators # pylint: disable=import-error
 
 from ocrd_models import ValidationReport
 
+class JsonSchemaDeprecationWarning(ValidationError):
+    pass
+
 # http://python-jsonschema.readthedocs.io/en/latest/faq/
 def extend_with_default(validator_class):
     """
@@ -14,21 +17,22 @@ def extend_with_default(validator_class):
     """
     validate_properties = validator_class.VALIDATORS["properties"]
 
-    def set_defaults(validator, properties, instance, schema):
+    def set_defaults_and_handle_deprecate(validator, properties, instance, schema):
         """
         Set defaults in subschemas
         """
         for prop, subschema in properties.items():
             if "default" in subschema:
                 instance.setdefault(prop, subschema["default"])
+            if subschema.get('deprecated', False) and instance.get(prop):
+                yield JsonSchemaDeprecationWarning(f"Property {prop} has been deprecated, ocrd-tool.json should be updated.")
 
-        for error in validate_properties(validator, properties, instance, schema):
-            yield error
+        yield from validate_properties(validator, properties, instance, schema)
 
-    return validators.extend(validator_class, {"properties": set_defaults})
+    return validators.extend(validator_class, {"properties": set_defaults_and_handle_deprecate})
 
 
-DefaultValidatingDraft6Validator = extend_with_default(Draft6Validator)
+DefaultValidatingDraft20199Validator = extend_with_default(Draft201909Validator)
 
 #
 # -------------------------------------------------
@@ -52,13 +56,13 @@ def validate(obj, schema):
             obj = json.loads(obj)
         return JsonValidator(schema)._validate(obj) # pylint: disable=protected-access
 
-    def __init__(self, schema, validator_class=Draft6Validator):
+    def __init__(self, schema, validator_class=Draft201909Validator):
         """
         Construct a JsonValidator.
 
         Args:
             schema (dict):
-            validator_class (Draft6Validator|DefaultValidatingDraft6Validator):
+            validator_class (Draft20199Validator|DefaultValidatingDraft20199Validator):
         """
         self.validator = validator_class(schema)
 
@@ -74,6 +78,7 @@ def _validate(self, obj):
         report = ValidationReport()
         if not self.validator.is_valid(obj):
             for v in self.validator.iter_errors(obj):
+                meth = f'add_{"warning" if isinstance(v, JsonSchemaDeprecationWarning) else "error"}'
                 #  print(">>>>>>>>> v='%s', obj='%s'" % (v, obj))
-                report.add_error("[%s] %s" % ('.'.join(str(vv) for vv in v.path), v.message))
+                getattr(report, meth)("[%s] %s" % ('.'.join(str(vv) for vv in v.path), v.message))
         return report
diff --git a/src/ocrd_validators/ocrd_tool.schema.yml b/src/ocrd_validators/ocrd_tool.schema.yml
index 766fd892cc..bdf834b6a6 100644
--- a/src/ocrd_validators/ocrd_tool.schema.yml
+++ b/src/ocrd_validators/ocrd_tool.schema.yml
@@ -11,7 +11,7 @@ properties:
     type: string
     pattern: '^[0-9]+\.[0-9]+\.[0-9]+$'
   git_url:
-    description: Github/Gitlab URL
+    description: GitHub/GitLab URL
     type: string
     format: url
   dockerhub:
@@ -29,28 +29,54 @@ properties:
           - steps
           - executable
           - categories
-          - input_file_grp
-          # Not required because not all processors produce output files
-          # - output_file_grp
+          - input_file_grp_cardinality
+          - output_file_grp_cardinality
         properties:
           executable:
             description: The name of the CLI executable in $PATH
             type: string
           input_file_grp:
-            description: Input fileGrp@USE this tool expects by default
+            deprecated: true
+            description: (DEPRECATED) Input fileGrp@USE this tool expects by default
             type: array
             items:
               type: string
               # pattern: '^OCR-D-[A-Z0-9-]+$'
           output_file_grp:
-            description: Output fileGrp@USE this tool produces by default
+            deprecated: true
+            description: (DEPRECATED) Output fileGrp@USE this tool produces by default
             type: array
             items:
               type: string
               # pattern: '^OCR-D-[A-Z0-9-]+$'
+          input_file_grp_cardinality:
+            description: Number of (comma-separated) input fileGrp@USE this tool expects (either an exact value or a minimum,maximum list with -1 for unlimited)
+            oneOf:
+              - type: number
+                multipleOf: 1
+              - type: array
+                items:
+                  type: number
+                  multipleOf: 1
+                minItems: 2
+                maxItems: 2
+            default: 1
+          output_file_grp_cardinality:
+            description: Number of (comma-separated) output fileGrp@USE this tool expects (either an exact value or a minimum,maximum list with -1 for unlimited)
+            oneOf:
+              - type: number
+                multipleOf: 1
+              - type: array
+                items:
+                  type: number
+                  multipleOf: 1
+                minItems: 2
+                maxItems: 2
+            default: 1
           parameters:
             description: Object describing the parameters of a tool. Keys are parameter names, values sub-schemas.
             type: object
+            default: {}
             patternProperties:
               ".*":
                 type: object
@@ -82,6 +108,12 @@ properties:
                   maximum:
                     type: number
                     description: Maximum value for number parameters, including the maximum
+                  minProperties:
+                    type: number
+                    description: Minimum number of properties of an object
+                  maxProperties:
+                    type: number
+                    description: Maximum number of properties of an object
                   exclusiveMinimum:
                     type: number
                     description: Minimum value for number parameters, excluding the minimum
@@ -95,8 +127,11 @@ properties:
                     type: object
                     description: Describe the properties of an object value
                   additionalProperties:
-                    type: boolean
-                    description: Whether an object value may contain properties not explicitly defined
+                    oneOf:
+                    - type: boolean
+                      description: Whether an object value may contain properties not explicitly defined
+                    - type: object
+                      description: Schema any additional properties need to adhere to
                   required:
                     type: boolean
                     description: Whether this parameter is required
@@ -121,9 +156,9 @@ properties:
                     description: "If parameter is reference to file: Whether the file should be cached, e.g. because it is large and won't change."
                     default: false
           description:
-            description: Concise description what the tool does
+            description: Concise description of what the tool does
           categories:
-            description: Tools belong to this categories, representing modules within the OCR-D project structure
+            description: Tools belong to these categories, representing modules within the OCR-D project structure
             type: array
             items:
               type: string
@@ -198,7 +233,7 @@ properties:
                   default: 'as-is'
                 path_in_archive:
                   type: string
-                  description: if type is archive, the resource is at this location in the archive
+                  description: If type is archive, the resource is at this location in the archive
                   default: '.'
                 version_range:
                   type: string
@@ -206,4 +241,4 @@ properties:
                   default: '>= 0.0.1'
                 size:
                   type: number
-                  description: Size of the resource in bytes
+                  description: "Size of the resource in bytes to be retrieved (for archives: size of the archive)"
diff --git a/src/ocrd_validators/ocrd_tool_validator.py b/src/ocrd_validators/ocrd_tool_validator.py
index b408bd86e9..00a402c12d 100644
--- a/src/ocrd_validators/ocrd_tool_validator.py
+++ b/src/ocrd_validators/ocrd_tool_validator.py
@@ -4,7 +4,7 @@
 See `specs <https://ocr-d.de/en/spec/ocrd_tool>`_.
 """
 from .constants import OCRD_TOOL_SCHEMA
-from .json_validator import JsonValidator
+from .json_validator import DefaultValidatingDraft20199Validator, JsonValidator
 
 #
 # -------------------------------------------------
@@ -20,4 +20,7 @@ def validate(obj, schema=OCRD_TOOL_SCHEMA):
         """
         Validate against ``ocrd-tool.json`` schema.
         """
-        return JsonValidator.validate(obj, schema)
+        return OcrdToolValidator(schema)._validate(obj) # pylint: disable=protected-access
+
+    def __init__(self, schema):
+        super().__init__(schema, validator_class=DefaultValidatingDraft20199Validator)
diff --git a/src/ocrd_validators/page_validator.py b/src/ocrd_validators/page_validator.py
index 41ce0b9f94..0459f17811 100644
--- a/src/ocrd_validators/page_validator.py
+++ b/src/ocrd_validators/page_validator.py
@@ -6,7 +6,7 @@
 from shapely.validation import explain_validity
 
 from ocrd_utils import getLogger, polygon_from_points, deprecated_alias
-from ocrd_models.ocrd_page import parse
+from ocrd_models.ocrd_page import OcrdPage, parse
 from ocrd_modelfactory import page_from_file
 
 from ocrd_models.ocrd_page import (
@@ -34,50 +34,50 @@
 
 _HIERARCHY = [
     # page can contain different types of regions
-    (PageType,       'get_AdvertRegion', None), # pylint: disable=bad-whitespace
-    (PageType,       'get_ChartRegion', None), # pylint: disable=bad-whitespace
-    (PageType,       'get_ChemRegion', None), # pylint: disable=bad-whitespace
-    (PageType,       'get_CustomRegion', None), # pylint: disable=bad-whitespace
-    (PageType,       'get_GraphicRegion', None), # pylint: disable=bad-whitespace
-    (PageType,       'get_ImageRegion', None), # pylint: disable=bad-whitespace
-    (PageType,       'get_LineDrawingRegion', None), # pylint: disable=bad-whitespace
-    (PageType,       'get_MapRegion', None), # pylint: disable=bad-whitespace
-    (PageType,       'get_MathsRegion', None), # pylint: disable=bad-whitespace
-    (PageType,       'get_MusicRegion', None), # pylint: disable=bad-whitespace
-    (PageType,       'get_NoiseRegion', None), # pylint: disable=bad-whitespace
-    (PageType,       'get_SeparatorRegion', None), # pylint: disable=bad-whitespace
-    (PageType,       'get_TableRegion', None), # pylint: disable=bad-whitespace
-    (PageType,       'get_TextRegion', None), # pylint: disable=bad-whitespace
-    (PageType,       'get_UnknownRegion', None), # pylint: disable=bad-whitespace
+    (PageType,       'get_AdvertRegion', None),
+    (PageType,       'get_ChartRegion', None),
+    (PageType,       'get_ChemRegion', None),
+    (PageType,       'get_CustomRegion', None),
+    (PageType,       'get_GraphicRegion', None),
+    (PageType,       'get_ImageRegion', None),
+    (PageType,       'get_LineDrawingRegion', None),
+    (PageType,       'get_MapRegion', None),
+    (PageType,       'get_MathsRegion', None),
+    (PageType,       'get_MusicRegion', None),
+    (PageType,       'get_NoiseRegion', None),
+    (PageType,       'get_SeparatorRegion', None),
+    (PageType,       'get_TableRegion', None),
+    (PageType,       'get_TextRegion', None),
+    (PageType,       'get_UnknownRegion', None),
     # all regions can be recursive
-    (RegionType,     'get_AdvertRegion', None), # pylint: disable=bad-whitespace
-    (RegionType,     'get_ChartRegion', None), # pylint: disable=bad-whitespace
-    (RegionType,     'get_ChemRegion', None), # pylint: disable=bad-whitespace
-    (RegionType,     'get_CustomRegion', None), # pylint: disable=bad-whitespace
-    (RegionType,     'get_GraphicRegion', None), # pylint: disable=bad-whitespace
-    (RegionType,     'get_ImageRegion', None), # pylint: disable=bad-whitespace
-    (RegionType,     'get_LineDrawingRegion', None), # pylint: disable=bad-whitespace
-    #(RegionType,     'get_MapRegion', None), # pylint: disable=bad-whitespace
-    (RegionType,     'get_MathsRegion', None), # pylint: disable=bad-whitespace
-    (RegionType,     'get_MusicRegion', None), # pylint: disable=bad-whitespace
-    (RegionType,     'get_NoiseRegion', None), # pylint: disable=bad-whitespace
-    (RegionType,     'get_SeparatorRegion', None), # pylint: disable=bad-whitespace
-    (RegionType,     'get_TableRegion', None), # pylint: disable=bad-whitespace
-    (RegionType,     'get_TextRegion', None), # pylint: disable=bad-whitespace
-    (RegionType,     'get_UnknownRegion', None), # pylint: disable=bad-whitespace
+    (RegionType,     'get_AdvertRegion', None),
+    (RegionType,     'get_ChartRegion', None),
+    (RegionType,     'get_ChemRegion', None),
+    (RegionType,     'get_CustomRegion', None),
+    (RegionType,     'get_GraphicRegion', None),
+    (RegionType,     'get_ImageRegion', None),
+    (RegionType,     'get_LineDrawingRegion', None),
+    #(RegionType,     'get_MapRegion', None),
+    (RegionType,     'get_MathsRegion', None),
+    (RegionType,     'get_MusicRegion', None),
+    (RegionType,     'get_NoiseRegion', None),
+    (RegionType,     'get_SeparatorRegion', None),
+    (RegionType,     'get_TableRegion', None),
+    (RegionType,     'get_TextRegion', None),
+    (RegionType,     'get_UnknownRegion', None),
     # only TextRegion can contain TextLine
-    (TextRegionType, 'get_TextLine',   '\n'), # pylint: disable=bad-whitespace
-    (TextLineType,   'get_Word',       ' '),  # pylint: disable=bad-whitespace
-    (WordType,       'get_Glyph',      ''),   # pylint: disable=bad-whitespace
-    (GlyphType,      None,             None), # pylint: disable=bad-whitespace
+    (TextRegionType, 'get_TextLine',   '\n'),
+    (TextLineType,   'get_Word',       ' '),
+    (WordType,       'get_Glyph',      ''),
+    (GlyphType,      None,             None),
 ]
 
 _ORDER = [
     (None, TextLineOrderSimpleType.BOTTOMTOTOP, ReadingDirectionSimpleType.RIGHTTOLEFT),
-    (PageType,       'get_textLineOrder', 'get_readingDirection'), # pylint: disable=bad-whitespace
-    (TextRegionType, 'get_textLineOrder', 'get_readingDirection'), # pylint: disable=bad-whitespace
-    (TextLineType,   None,                'get_readingDirection'), # pylint: disable=bad-whitespace
-    (WordType,       None,                'get_readingDirection'), # pylint: disable=bad-whitespace
+    (PageType,       'get_textLineOrder', 'get_readingDirection'),
+    (TextRegionType, 'get_textLineOrder', 'get_readingDirection'),
+    (TextLineType,   None,                'get_readingDirection'),
+    (WordType,       None,                'get_readingDirection'),
 ]
 
 # The following parameters control how tolerant we are with respect to
@@ -115,9 +115,9 @@ def __init__(self, tag, ID, file_id, actual, expected):
         self.file_id = file_id
         self.actual = actual
         self.expected = expected
-        super(ConsistencyError, self).__init__(
-            "INCONSISTENCY in %s ID '%s' of file '%s': text results '%s' != concatenated '%s'" % (
-                tag, ID, file_id, actual, expected))
+        super().__init__(
+            f"INCONSISTENCY in {tag} ID '{ID}' of file '{file_id}': "
+            f"text results '{actual}' != concatenated '{expected}'")
 
 class CoordinateConsistencyError(Exception):
     """
@@ -141,9 +141,9 @@ def __init__(self, tag, ID, file_id, outer, inner):
         self.file_id = file_id
         self.outer = outer
         self.inner = inner
-        super(CoordinateConsistencyError, self).__init__(
-            "INCONSISTENCY in %s ID '%s' of '%s': coords '%s' not within parent coords '%s'" % (
-                tag, ID, file_id, inner, outer))
+        super().__init__(
+            f"INCONSISTENCY in {tag} ID '{ID}' of '{file_id}': "
+            f"coords '{inner}' not within parent coords '{outer}'")
 
 class CoordinateValidityError(Exception):
     """
@@ -166,9 +166,8 @@ def __init__(self, tag, ID, file_id, points, reason='unknown'):
         self.ID = ID
         self.file_id = file_id
         self.points = points
-        super(CoordinateValidityError, self).__init__(
-            "INVALIDITY in %s ID '%s' of '%s': coords '%s' - %s" % (
-                tag, ID, file_id, points, reason))
+        super().__init__(
+            f"INVALIDITY in {tag} ID '{ID}' of '{file_id}': coords '{points}' - {reason}")
 
 def compare_without_whitespace(a, b):
     """
@@ -177,13 +176,14 @@ def compare_without_whitespace(a, b):
     return re.sub('\\s+', '', a) == re.sub('\\s+', '', b)
 
 def page_get_reading_order(ro, rogroup):
-    """Add all elements from the given reading order group to the given dictionary.
-    
+    """
+    Add all elements from the given reading order group to the given dictionary.
+
     Given a dict ``ro`` from layout element IDs to ReadingOrder element objects,
     and an object ``rogroup`` with additional ReadingOrder element objects,
     add all references to the dict, traversing the group recursively.
     """
-    regionrefs = list()
+    regionrefs = []
     if isinstance(rogroup, (OrderedGroupType, OrderedGroupIndexedType)):
         regionrefs = (rogroup.get_RegionRefIndexed() +
                       rogroup.get_OrderedGroupIndexed() +
@@ -236,17 +236,17 @@ def validate_consistency(node, page_textequiv_consistency, page_textequiv_strate
     and whether the coordinates of an element are fully within its parent element coordinates.
     """
     log = getLogger('ocrd.page_validator.validate_consistency')
-    if isinstance(node, PcGtsType):
+    if isinstance(node, (PcGtsType, OcrdPage)):
         # top-level (start recursion)
         node_id = node.get_pcGtsId()
         node = node.get_Page() # has no .id
         if not readingOrder:
-            readingOrder = dict()
+            readingOrder = {}
         ro = node.get_ReadingOrder()
         if ro:
             page_get_reading_order(readingOrder, ro.get_OrderedGroup() or ro.get_UnorderedGroup())
         if not joinRelations:
-            joinRelations = list()
+            joinRelations = []
         relations = node.get_Relations() # get RelationsType
         if relations:
             relations = relations.get_Relation() # get list of RelationType
@@ -358,7 +358,7 @@ def concatenate(nodes, concatenate_with, page_textequiv_strategy, joins=None):
     if not nodes:
         return ''
     if not joins:
-        joins = list()
+        joins = []
     result = get_text(nodes[0], page_textequiv_strategy)
     for node, next_node in zip(nodes, nodes[1:]):
         if (node.id, next_node.id) not in joins:
@@ -470,11 +470,11 @@ def validate(filename=None, ocrd_page=None, ocrd_file=None,
             page = parse(filename, silence=True)
             file_id = filename
         else:
-            raise Exception("At least one of ocrd_page, ocrd_file or filename must be set")
+            raise ValueError("At least one of ocrd_page, ocrd_file or filename must be set")
         if page_textequiv_strategy not in ('first'):
-            raise Exception("page_textequiv_strategy %s not implemented" % page_textequiv_strategy)
+            raise ValueError("page_textequiv_strategy %s not implemented" % page_textequiv_strategy)
         if page_textequiv_consistency not in ('strict', 'lax', 'fix', 'off'):
-            raise Exception("page_textequiv_consistency level %s not implemented" % page_textequiv_consistency)
+            raise ValueError("page_textequiv_consistency level %s not implemented" % page_textequiv_consistency)
         report = ValidationReport()
         log.info("Validating input file '%s'", file_id)
         validate_consistency(page, page_textequiv_consistency, page_textequiv_strategy, check_baseline, check_coords, report, file_id)
diff --git a/src/ocrd_validators/parameter_validator.py b/src/ocrd_validators/parameter_validator.py
index 20dd6ff2b7..ca2a7ed8ed 100644
--- a/src/ocrd_validators/parameter_validator.py
+++ b/src/ocrd_validators/parameter_validator.py
@@ -1,7 +1,7 @@
 """
 Validate parameters against ocrd-tool.json.
 """
-from .json_validator import JsonValidator, DefaultValidatingDraft6Validator
+from .json_validator import DefaultValidatingDraft20199Validator, JsonValidator
 
 #
 # -------------------------------------------------
@@ -20,7 +20,7 @@ def validate(self, *args, **kwargs): # pylint: disable=arguments-differ
             obj (dict):
             schema (dict):
         """
-        return super(ParameterValidator, self)._validate(*args, **kwargs)
+        return super()._validate(*args, **kwargs)
 
     def __init__(self, ocrd_tool):
         """
@@ -40,9 +40,9 @@ def __init__(self, ocrd_tool):
                 if p[n]['required']:
                     required.append(n)
                 del(p[n]['required'])
-        super(ParameterValidator, self).__init__({
+        super().__init__({
             "type": "object",
             "required": required,
             "additionalProperties": False,
             "properties": p
-        }, DefaultValidatingDraft6Validator)
+        }, DefaultValidatingDraft20199Validator)
diff --git a/src/ocrd_validators/resource_list_validator.py b/src/ocrd_validators/resource_list_validator.py
index 72a11c34de..47f3c81a96 100644
--- a/src/ocrd_validators/resource_list_validator.py
+++ b/src/ocrd_validators/resource_list_validator.py
@@ -4,7 +4,7 @@
 See `specs <https://ocr-d.de/en/spec/cli#processor-resources>`_.
 """
 from .constants import RESOURCE_LIST_SCHEMA
-from .json_validator import JsonValidator, DefaultValidatingDraft6Validator
+from .json_validator import DefaultValidatingDraft20199Validator, JsonValidator
 
 #
 # -------------------------------------------------
@@ -16,9 +16,10 @@ class OcrdResourceListValidator(JsonValidator):
     """
 
     @staticmethod
-    def validate(obj, schema=RESOURCE_LIST_SCHEMA):
+    def validate(obj, schema=None):
         """
         Validate against ``resource_list.schema.yml`` schema.
         """
-        return JsonValidator(schema, validator_class=DefaultValidatingDraft6Validator)._validate(obj)
-
+        if schema is None:
+            schema = RESOURCE_LIST_SCHEMA
+        return JsonValidator(schema, validator_class=DefaultValidatingDraft20199Validator)._validate(obj) # pylint: disable=protected-access
diff --git a/src/ocrd_validators/workspace_validator.py b/src/ocrd_validators/workspace_validator.py
index d5be460997..28d45495ea 100644
--- a/src/ocrd_validators/workspace_validator.py
+++ b/src/ocrd_validators/workspace_validator.py
@@ -103,7 +103,7 @@ def __init__(self, resolver, mets_url, src_dir=None, skip=None, download=False,
                                                 'page_xsd']
                             if check not in self.skip]
 
-        self.find_kwargs = dict(include_fileGrp=include_fileGrp, exclude_fileGrp=exclude_fileGrp)
+        self.find_kwargs = {"include_fileGrp": include_fileGrp, "exclude_fileGrp": exclude_fileGrp}
         self.src_dir = src_dir
         self.workspace = None
         self.mets = None
@@ -139,7 +139,7 @@ def _validate(self):
             self._resolve_workspace()
         except Exception as e: # pylint: disable=broad-except
             self.log.warning("Failed to instantiate workspace: %s", e)
-            self.report.add_error("Failed to instantiate workspace: %s" % e)
+            self.report.add_error(f"Failed to instantiate workspace: {e}")
             return self.report
         with pushd_popd(self.workspace.directory):
             try:
@@ -158,7 +158,7 @@ def _validate(self):
                 if self.page_checks:
                     self._validate_page()
             except Exception: # pylint: disable=broad-except
-                self.report.add_error("Validation aborted with exception: %s" % format_exc())
+                self.report.add_error(f"Validation aborted with exception: {format_exc()}")
         return self.report
 
     def _resolve_workspace(self):
@@ -193,9 +193,9 @@ def _validate_imagefilename(self):
             page = page_from_file(f).get_Page()
             imageFilename = page.imageFilename
             if not self.mets.find_files(url=imageFilename, **self.find_kwargs):
-                self.report.add_error("PAGE-XML %s : imageFilename '%s' not found in METS" % (f.local_filename, imageFilename))
+                self.report.add_error(f"PAGE '{f.ID}': imageFilename '{imageFilename}' not found in METS")
             if is_local_filename(imageFilename) and not Path(imageFilename).exists():
-                self.report.add_warning("PAGE-XML %s : imageFilename '%s' points to non-existent local file" % (f.local_filename, imageFilename))
+                self.report.add_warning(f"PAGE '{f.ID}': imageFilename '{imageFilename}' points to non-existent local file")
 
     def _validate_dimension(self):
         """
@@ -210,9 +210,9 @@ def _validate_dimension(self):
             page = page_from_file(f).get_Page()
             _, _, exif = self.workspace.image_from_page(page, f.pageId)
             if page.imageHeight != exif.height:
-                self.report.add_error("PAGE '%s': @imageHeight != image's actual height (%s != %s)" % (f.ID, page.imageHeight, exif.height))
+                self.report.add_error(f"PAGE '{f.ID}': @imageHeight != image's actual height ({page.imageHeight} != {exif.height})")
             if page.imageWidth != exif.width:
-                self.report.add_error("PAGE '%s': @imageWidth != image's actual width (%s != %s)" % (f.ID, page.imageWidth, exif.width))
+                self.report.add_error(f"PAGE '{f.ID}': @imageWidth != image's actual width ({page.imageWidth} != {exif.width})")
 
     def _validate_multipage(self):
         """
@@ -229,9 +229,9 @@ def _validate_multipage(self):
             try:
                 exif = self.workspace.resolve_image_exif(f.local_filename)
                 if exif.n_frames > 1:
-                    self.report.add_error("Image %s: More than 1 frame: %s" % (f.ID, exif.n_frames))
+                    self.report.add_error(f"Image '{f.ID}': More than 1 frame: {exif.n_frames}")
             except FileNotFoundError:
-                self.report.add_error("Image %s: Could not retrieve %s (local_filename=%s, url=%s)" % (f.ID, f.local_filename, f.url))
+                self.report.add_error(f"Image '{f.ID}': Could not retrieve (local_filename='{f.local_filename}', url='{f.url}')")
                 return
 
     def _validate_pixel_density(self):
@@ -250,7 +250,7 @@ def _validate_pixel_density(self):
             for k in ['xResolution', 'yResolution']:
                 v = exif.__dict__.get(k)
                 if v is None or v <= 72:
-                    self.report.add_notice("Image %s: %s (%s pixels per %s) is suspiciously low" % (f.ID, k, v, exif.resolutionUnit))
+                    self.report.add_notice(f"Image '{f.ID}': {k} ({v} pixels per {exif.resolutionUnit}) is suspiciously low")
 
     def _validate_mets_file_group_names(self):
         """
@@ -261,7 +261,7 @@ def _validate_mets_file_group_names(self):
         self.log.debug('_validate_mets_file_group_names')
         for fileGrp in self.mets.file_groups:
             if not fileGrp.startswith(FILE_GROUP_PREFIX):
-                self.report.add_notice("fileGrp USE does not begin with '%s': %s" % (FILE_GROUP_PREFIX, fileGrp))
+                self.report.add_notice(f"fileGrp USE '{fileGrp}' does not begin with '{FILE_GROUP_PREFIX}'")
             else:
                 # OCR-D-FOO-BAR -> ('FOO', 'BAR')
                 # \____/\_/ \_/
@@ -273,9 +273,9 @@ def _validate_mets_file_group_names(self):
                 if '-' in category:
                     category, name = category.split('-', 1)
                 if category not in FILE_GROUP_CATEGORIES:
-                    self.report.add_notice("Unspecified USE category '%s' in fileGrp '%s'" % (category, fileGrp))
+                    self.report.add_notice(f"Unspecified USE category '{category}' in fileGrp '{fileGrp}'")
                 if name is not None and not re.match(r'^[A-Z0-9-]{3,}$', name):
-                    self.report.add_notice("Invalid USE name '%s' in fileGrp '%s'" % (name, fileGrp))
+                    self.report.add_notice(f"Invalid USE name '{name}' in fileGrp '{fileGrp}'")
 
     def _validate_mets_files(self):
         """
@@ -288,16 +288,16 @@ def _validate_mets_files(self):
             self.report.add_error("No files")
         for f in self.mets.find_files(**self.find_kwargs):
             if f._el.get('GROUPID'): # pylint: disable=protected-access
-                self.report.add_notice("File '%s' has GROUPID attribute - document might need an update" % f.ID)
+                self.report.add_notice(f"File '{f.ID}' has GROUPID attribute - document might need an update")
             if not (f.url or f.local_filename):
-                self.report.add_error("File '%s' has neither mets:Flocat[@LOCTYPE='URL']/@xlink:href nor mets:FLocat[@LOCTYPE='OTHER'][@OTHERLOCTYPE='FILE']/xlink:href" % f.ID)
+                self.report.add_error(f"File '{f.ID}' has neither mets:Flocat[@LOCTYPE='URL']/@xlink:href nor mets:FLocat[@LOCTYPE='OTHER'][@OTHERLOCTYPE='FILE']/xlink:href")
                 continue
             if f.url and 'url' not in self.skip:
                 if re.match(r'^file:/[^/]', f.url):
-                    self.report.add_error("File '%s' has an invalid (Java-specific) file URL '%s'" % (f.ID, f.url))
+                    self.report.add_error(f"File '{f.ID}' has an invalid (Java-specific) file URL '{f.url}'")
                 scheme = f.url[0:f.url.index(':')]
                 if scheme not in ('http', 'https', 'file'):
-                    self.report.add_warning("File '%s' has non-HTTP, non-file URL '%s'" % (f.ID, f.url))
+                    self.report.add_warning(f"File '{f.ID}' has non-HTTP, non-file URL '{f.url}'")
 
     def _validate_page(self):
         """
@@ -323,15 +323,15 @@ def _validate_page(self):
             if 'dimension' in self.page_checks:
                 _, _, exif = self.workspace.image_from_page(page, f.pageId)
                 if page.imageHeight != exif.height:
-                    self.report.add_error("PAGE '%s': @imageHeight != image's actual height (%s != %s)" % (f.ID, page.imageHeight, exif.height))
+                    self.report.add_error(f"PAGE '{f.ID}': @imageHeight != image's actual height ({page.imageHeight} != {exif.height})")
                 if page.imageWidth != exif.width:
-                    self.report.add_error("PAGE '%s': @imageWidth != image's actual width (%s != %s)" % (f.ID, page.imageWidth, exif.width))
+                    self.report.add_error(f"PAGE '{f.ID}': @imageWidth != image's actual width ({page.imageWidth} != {exif.width})")
             if 'imagefilename' in self.page_checks:
                 imageFilename = page.imageFilename
                 if not self.mets.find_files(url=imageFilename):
-                    self.report.add_error("PAGE-XML %s : imageFilename '%s' not found in METS" % (f.url, imageFilename))
+                    self.report.add_error(f"PAGE '{f.ID}': imageFilename '{imageFilename}' not found in METS")
                 if is_local_filename(imageFilename) and not Path(imageFilename).exists():
-                    self.report.add_warning("PAGE-XML %s : imageFilename '%s' points to non-existent local file" % (f.url, imageFilename))
+                    self.report.add_warning(f"PAGE '{f.ID}': imageFilename '{imageFilename}' points to non-existent local file")
             if 'mets_fileid_page_pcgtsid' in self.page_checks and pcgts.pcGtsId != f.ID:
                 self.report.add_warning('pc:PcGts/@pcGtsId differs from mets:file/@ID: "%s" !== "%s"' % (pcgts.pcGtsId or '', f.ID or ''))
 
diff --git a/src/ocrd_validators/xsd_validator.py b/src/ocrd_validators/xsd_validator.py
index 81b9457564..92e4502124 100644
--- a/src/ocrd_validators/xsd_validator.py
+++ b/src/ocrd_validators/xsd_validator.py
@@ -45,7 +45,7 @@ def __init__(self, schema_url):
             schema_url (str): URI of XML schema to validate against.
         """
         if schema_url not in XSD_PATHS:
-            raise Exception('XML schema not bundled with OCR-D: %s' % schema_url)
+            raise ValueError('XML schema not bundled with OCR-D: %s' % schema_url)
         with open(XSD_PATHS[schema_url], 'r') as f:
             xmlschema_doc = ET.parse(f)
             self._xmlschema = ET.XMLSchema(xmlschema_doc)
diff --git a/tests/base.py b/tests/base.py
index 53f393e08d..9eb1f20db8 100644
--- a/tests/base.py
+++ b/tests/base.py
@@ -26,8 +26,6 @@ class TestCase(VanillaTestCase):
 
     def setUp(self):
         chdir(dirname(realpath(__file__)) + '/..')
-        disableLogging()
-        initLogging(builtin_only=True)
 
 class CapturingTestCase(TestCase):
     """
diff --git a/tests/cli/test_bashlib.py b/tests/cli/test_bashlib.py
index ab52b6b1ba..ba7c283e40 100644
--- a/tests/cli/test_bashlib.py
+++ b/tests/cli/test_bashlib.py
@@ -1,4 +1,6 @@
 from contextlib import contextmanager
+import re
+from typing import Tuple, Union
 from tests.base import CapturingTestCase as TestCase, main, assets, copy_of_directory
 
 import os, sys
@@ -20,6 +22,13 @@
 
 from ocrd_utils import pushd_popd
 
+def parse_version(v : str) -> Union[Tuple[int, int, int], Tuple[int, int, int, str]]:
+    tokens = re.split('((?:a|b|rc)[0-9]+)', v, 1)
+    version_wo_suffix = tokens[0]
+    prerelease_suffix = tokens[1] if len(tokens) > 1 else ''
+    (major, minor, patch) = map(int, version_wo_suffix.split('.'))
+    return (major, minor, patch, prerelease_suffix)
+
 class TestBashlibCli(TestCase):
 
     def invoke_bash(self, script, *args, executable=None):
@@ -50,7 +59,7 @@ def invoke_bash(self, script, *args, executable=None):
             return -1, "", str(e)
         finally:
             os.remove(scriptfile.name)
-            
+
     def setUp(self):
         self.maxDiff = None
         super().setUp()
@@ -89,7 +98,7 @@ def test_constants_fail(self):
     def test_input_files(self):
         with copy_of_directory(assets.path_to('kant_aufklaerung_1784/data')) as wsdir:
             with pushd_popd(wsdir):
-                _, out, err = self.invoke_cli(bashlib_cli, ['input-files', '-I', 'OCR-D-IMG'])
+                _, out, err = self.invoke_cli(bashlib_cli, ['input-files', '-I', 'OCR-D-IMG', '-O', 'OUTPUT'])
                 assert ("[url]='' [local_filename]='OCR-D-IMG/INPUT_0017.tif' [ID]='INPUT_0017' [mimetype]='image/tiff' "
                         "[pageId]='PHYS_0017' [outputFileId]='OUTPUT_PHYS_0017'") in out
 
@@ -101,15 +110,22 @@ def test_bashlib_defs(self):
         assert 'function' in out
 
     def test_bashlib_minversion(self):
-        exit_code, out, err = self.invoke_bash(
-            "source $(ocrd bashlib filename) && ocrd__minversion 2.29.0")
+        exit_code, out, err = self.invoke_bash("source $(ocrd bashlib filename) && ocrd__minversion 2.29.0")
         assert exit_code == 0
-        (major, minor, patch) = map(int, str(VERSION).split('.'))
+        major, minor, patch, prerelease_suffix = parse_version(VERSION)
+
+        # test normal version with impossible minimum minor version
         version = "%d.%d.%d" % (major, minor + 1, patch)
-        exit_code, out, err = self.invoke_bash(
-            "source $(ocrd bashlib filename) && ocrd__minversion " + version)
+        exit_code, out, err = self.invoke_bash("source $(ocrd bashlib filename) && ocrd__minversion " + version)
+        assert exit_code > 0
+        assert f"ERROR: ocrd/core is too old ({VERSION} < {version})" in err
+
+        # test non-matching prerelease (the 99th alpha pre-release here)
+        version = "%d.%d.%dz99" % (major, minor, patch)
+        assert VERSION != version # assuming we will never have 99 alpha prereleases ^^
+        exit_code, out, err = self.invoke_bash("source $(ocrd bashlib filename) && ocrd__minversion " + version)
         assert exit_code > 0
-        assert "ERROR: ocrd/core is too old" in err
+        assert f"ERROR: ocrd/core is too old ({VERSION} < {version})" in err
 
     def test_bashlib_cp_processor(self):
         # script = (Path(__file__).parent.parent / 'data/bashlib_cp_processor.sh').read_text()
diff --git a/tests/cli/test_log.py b/tests/cli/test_log.py
index c63d78c318..3d81e8266b 100644
--- a/tests/cli/test_log.py
+++ b/tests/cli/test_log.py
@@ -6,8 +6,8 @@
 from tests.base import CapturingTestCase as TestCase, main, assets, copy_of_directory
 
 from ocrd.decorators import ocrd_loglevel
-from ocrd_utils import setOverrideLogLevel, logging, disableLogging
-import logging as python_logging
+from ocrd_utils import disableLogging, initLogging
+import logging
 
 @click.group()
 @ocrd_loglevel
@@ -18,14 +18,19 @@ def mock_ocrd_cli(log_level):
 class TestLogCli(TestCase):
 
     def _get_log_output(self, *args):
-        disableLogging()
         code, out, err = self.invoke_cli(mock_ocrd_cli, args)
         print({'code': code, 'out': out, 'err': err})
         return err
 
+    def setUp(self):
+        super().setUp()
+        initLogging()
+
     def tearDown(self):
         if 'OCRD_TOOL_NAME' in ENV:
             del(ENV['OCRD_TOOL_NAME'])
+        super().tearDown()
+        disableLogging()
 
     def test_loglevel(self):
         assert 'DEBUG ocrd.log_cli - foo' not in self._get_log_output('log', 'debug', 'foo')
diff --git a/tests/cli/test_validate.py b/tests/cli/test_validate.py
index 36ee3e5995..12e87f4dc9 100644
--- a/tests/cli/test_validate.py
+++ b/tests/cli/test_validate.py
@@ -21,8 +21,8 @@
         "ocrd-xyz": {
             "executable": "ocrd-xyz",
             "description": "bars all the foos",
-            "input_file_grp": ["OCR-D-FOO"],
-            "output_file_grp": ["OCR-D-BAR"],
+            "input_file_grp_cardinality": [1, 2],
+            "output_file_grp_cardinality": 1,
             "categories": ["Layout analysis"],
             "steps": ["layout/analysis"],
             "parameters": {
@@ -57,24 +57,24 @@ def test_validate_ocrd_tool(self):
             json_path.write_text(OCRD_TOOL)
 
             # normal call
-            code, _, _ = self.invoke_cli(validate_cli, ['tool-json', str(json_path)])
-            self.assertEqual(code, 0)
+            code, out, err = self.invoke_cli(validate_cli, ['tool-json', str(json_path)])
+            self.assertEqual(code, 0, out + err)
             # relative path
             with pushd_popd(tempdir):
-                code, _, _ = self.invoke_cli(validate_cli, ['tool-json', 'ocrd-tool.json'])
-                self.assertEqual(code, 0)
+                code, out, err = self.invoke_cli(validate_cli, ['tool-json', 'ocrd-tool.json'])
+                self.assertEqual(code, 0, out + err)
             # default path
             with pushd_popd(tempdir):
-                code, _, _ = self.invoke_cli(validate_cli, ['tool-json'])
-                self.assertEqual(code, 0)
+                code, out, err = self.invoke_cli(validate_cli, ['tool-json'])
+                self.assertEqual(code, 0, out + err)
 
     def test_validate_parameter(self):
         with TemporaryDirectory() as tempdir:
             json_path = Path(tempdir, 'ocrd-tool.json')
             json_path.write_text(OCRD_TOOL)
             with pushd_popd(tempdir):
-                code, _, _ = self.invoke_cli(validate_cli, ['parameters', 'ocrd-tool.json', 'ocrd-xyz', dumps({"baz": "foo"})])
-                self.assertEqual(code, 0)
+                code, out, err = self.invoke_cli(validate_cli, ['parameters', 'ocrd-tool.json', 'ocrd-xyz', dumps({"baz": "foo"})])
+                self.assertEqual(code, 0, out + err)
 
     def test_validate_page(self):
         page_path = assets.path_to('glyph-consistency/data/OCR-D-GT-PAGE/FAULTY_GLYPHS.xml')
@@ -84,19 +84,18 @@ def test_validate_page(self):
 
     def test_validate_tasks(self):
         # simple
-        code, _, _ = self.invoke_cli(validate_cli, ['tasks',
+        code, out, err = self.invoke_cli(validate_cli, ['tasks',
             "sample-processor-required-param -I FOO -O OUT1 -p '{\"param1\": true}'",
             "sample-processor-required-param -I FOO -O OUT2 -p '{\"param1\": true}'",
         ])
-        self.assertEqual(code, 0)
+        self.assertEqual(code, 0, out + err)
 
         # with workspace
         code, out, err = self.invoke_cli(validate_cli, ['tasks', '--workspace', assets.path_to('kant_aufklaerung_1784/data'),
             "sample-processor-required-param -I OCR-D-IMG,OCR-D-GT-PAGE -O OUT1 -p '{\"param1\": true}'",
             "sample-processor-required-param -I OCR-D-IMG,OCR-D-GT-PAGE -O OUT2 -p '{\"param1\": true}'",
         ])
-        print('code=%s out=%s err=%s' % (code, out, err))
-        self.assertEqual(code, 0)
+        self.assertEqual(code, 0, out + err)
 
 
 if __name__ == '__main__':
diff --git a/tests/data/__init__.py b/tests/data/__init__.py
index 93a2ea49a9..56779a6119 100644
--- a/tests/data/__init__.py
+++ b/tests/data/__init__.py
@@ -1,12 +1,19 @@
+from functools import cached_property
 import json
 import os
-from ocrd import Processor
-from ocrd_utils import make_file_id
+from time import sleep
+from pytest import warns
+from ocrd import Processor, OcrdPageResult
+from ocrd_utils import make_file_id, config
 
 DUMMY_TOOL = {
     'executable': 'ocrd-test',
     'description': 'dolor sit',
     'steps': ['recognition/post-correction'],
+    # as we bypass Processor.metadata with OcrdToolValidator
+    # we get no default expansion, so add default cardinalities here
+    'input_file_grp_cardinality': 1,
+    'output_file_grp_cardinality': 1,
     'parameters': {
         'baz': {
             'type': 'string',
@@ -17,34 +24,148 @@
 }
 
 class DummyProcessor(Processor):
+    @property
+    def ocrd_tool(self):
+        return DUMMY_TOOL
+
+    @property
+    def version(self):
+        return '0.0.1'
+
+    @property
+    def executable(self):
+        return 'ocrd-test'
 
     def __init__(self, *args, **kwargs):
-        kwargs['ocrd_tool'] = DUMMY_TOOL
-        kwargs['version'] = '0.0.1'
-        super(DummyProcessor, self).__init__(*args, **kwargs)
+        kwargs['download_files'] = False
+        super().__init__(*args, **kwargs)
 
     def process(self):
         print(json.dumps(self.parameter))
 
+    # override to prevent iterating over empty files
+    def process_workspace(self, workspace):
+        with warns(DeprecationWarning, match='should be replaced with process_page'):
+            self.process()
+
 class DummyProcessorWithRequiredParameters(Processor):
-    def process(self): pass
-    def __init__(self, *args, **kwargs):
-        kwargs['version'] = '0.0.1'
-        kwargs['ocrd_tool'] = {
+    @property
+    def ocrd_tool(self):
+        return {
             'executable': 'ocrd-test',
             'steps': ['recognition/post-correction'],
             'parameters': {
                 'i-am-required': {'required': True}
             }
         }
-        super(DummyProcessorWithRequiredParameters, self).__init__(*args, **kwargs)
+    @property
+    def version(self):
+        return '0.0.1'
+
+    @property
+    def executable(self):
+        return 'ocrd-test'
+
+    def __init__(self, *args, **kwargs):
+        kwargs['download_files'] = False
+        super().__init__(*args, **kwargs)
+
+    def process(self): pass
 
 class DummyProcessorWithOutput(Processor):
+    @cached_property
+    def ocrd_tool(self):
+        return DUMMY_TOOL
+
+    @cached_property
+    def version(self):
+        return '0.0.1'
+
+    @cached_property
+    def executable(self):
+        return 'ocrd-test'
 
     def __init__(self, *args, **kwargs):
+        kwargs['download_files'] = False
+        super().__init__(*args, **kwargs)
+
+    def process(self):
+        # print([str(x) for x in self.input_files]
+        for input_file in self.input_files:
+            file_id = make_file_id(input_file, self.output_file_grp)
+            # print(input_file.ID, file_id)
+            self.workspace.add_file(
+                file_id=file_id,
+                file_grp=self.output_file_grp,
+                page_id=input_file.pageId,
+                mimetype=input_file.mimetype,
+                local_filename=os.path.join(self.output_file_grp, file_id),
+                content='CONTENT',
+            )
+
+class DummyProcessorWithOutputSleep(Processor):
+    @property
+    def ocrd_tool(self):
+        # make deep copy
+        dummy_tool = json.loads(json.dumps(DUMMY_TOOL))
+        dummy_tool['parameters']['sleep'] = {'type': 'number'}
+        return dummy_tool
+
+    @property
+    def version(self):
+        return '0.0.1'
+
+    @property
+    def executable(self):
+        return 'ocrd-test'
+
+    def __init__(self, *args, **kwargs):
+        kwargs['download_files'] = False
+        super().__init__(*args, **kwargs)
+
+    def process_page_pcgts(self, pcgts, page_id=None):
+        sleep(self.parameter['sleep'])
+        return OcrdPageResult(pcgts)
+
+class DummyProcessorWithOutputFailures(Processor):
+    @cached_property
+    def ocrd_tool(self):
+        return DUMMY_TOOL
+
+    @cached_property
+    def version(self):
+        return '0.0.1'
+
+    @cached_property
+    def executable(self):
+        return 'ocrd-test'
+
+    def __init__(self, *args, **kwargs):
+        kwargs['download_files'] = False
+        super().__init__(*args, **kwargs)
+
+    # no error handling with old process(), so override new API
+    def process_page_file(self, input_file):
+        n = self.workspace.mets.physical_pages.index(input_file.pageId) + 1
+        if n % 2:
+            raise Exception(f"intermittent failure on page {input_file.pageId}")
+        output_file_id = make_file_id(input_file, self.output_file_grp)
+        self.workspace.add_file(file_id=output_file_id,
+                                file_grp=self.output_file_grp,
+                                page_id=input_file.pageId,
+                                local_filename=os.path.join(self.output_file_grp, output_file_id),
+                                mimetype=input_file.mimetype,
+                                content='CONTENT',
+        )
+
+class DummyProcessorWithOutputLegacy(Processor):
+    def __init__(self, *args, **kwargs):
+        kwargs['download_files'] = False
         kwargs['ocrd_tool'] = DUMMY_TOOL
         kwargs['version'] = '0.0.1'
         super().__init__(*args, **kwargs)
+        if hasattr(self, 'output_file_grp'):
+            self.setup()
 
     def process(self):
         # print([str(x) for x in self.input_files]
@@ -52,14 +173,21 @@ def process(self):
             file_id = make_file_id(input_file, self.output_file_grp)
             # print(input_file.ID, file_id)
             self.workspace.add_file(
-                ID=file_id,
+                file_id=file_id,
                 file_grp=self.output_file_grp,
-                pageId=input_file.pageId,
+                page_id=input_file.pageId,
                 mimetype=input_file.mimetype,
                 local_filename=os.path.join(self.output_file_grp, file_id),
-                content='CONTENT')
+                content='CONTENT',
+            )
 
 class IncompleteProcessor(Processor):
-    pass
+    @property
+    def executable(self):
+        return 'ocrd-foo'
+
+    @property
+    def metadata_rawdict(self):
+        return {'tools': {self.executable: {}}}
 
 
diff --git a/tests/data/ocrd-cp.ocrd-tool.json b/tests/data/ocrd-cp.ocrd-tool.json
index 728c144c50..948695c06d 100755
--- a/tests/data/ocrd-cp.ocrd-tool.json
+++ b/tests/data/ocrd-cp.ocrd-tool.json
@@ -1,15 +1,18 @@
 {
-  "version": "1.0",
+  "version": "1.0.0",
   "tools": {
     "ocrd-cp": {
       "executable": "ocrd-cp",
       "description": "dummy processor copying",
       "steps": ["preprocessing/optimization"],
       "categories": ["Image preprocessing"],
+      # we allow 1 or 2 input file grps
+      # the output cardinality gets expanded from default
+      "input_file_grp_cardinality": [1,2],
       "parameters": {
         "message": {
           "type": "string",
-          "default": "",
+          "default": "hello by default",
           "description": "message to print on stdout"
         }
       }
diff --git a/tests/model/test_ocrd_mets.py b/tests/model/test_ocrd_mets.py
index 739db7625a..89742a507e 100644
--- a/tests/model/test_ocrd_mets.py
+++ b/tests/model/test_ocrd_mets.py
@@ -248,7 +248,7 @@ def test_file_pageid(sbb_sample_01):
 
 def test_agent(sbb_sample_01):
     beforelen = len(sbb_sample_01.agents)
-    sbb_sample_01.add_agent('foo bar v0.0.1', 'OTHER', 'OTHER', 'YETOTHERSTILL')
+    sbb_sample_01.add_agent(name='foo bar v0.0.1', _type='OTHER', othertype='OTHER', role='YETOTHERSTILL')
     assert len(sbb_sample_01.agents) == beforelen + 1
 
 def test_metshdr():
diff --git a/tests/model/test_ocrd_page.py b/tests/model/test_ocrd_page.py
index 7dc130809f..97335775d6 100644
--- a/tests/model/test_ocrd_page.py
+++ b/tests/model/test_ocrd_page.py
@@ -460,7 +460,7 @@ def test_id():
 
     # TODO: is this *really* desired?
     # I would expect for a single Page-Element the ID is like from the top-level-Pgts-Container, not like a fileName
-    assert pcgts.get_Page().id == 'OCR-D-IMG/INPUT_0017.tif'
+    assert pcgts.get_Page().id == 'OCR-D-IMG_INPUT_0017.tif'
 
 
 if __name__ == '__main__':
diff --git a/tests/network/test_integration_4_processing_worker.py b/tests/network/test_integration_4_processing_worker.py
index e211bd2381..ae322b0978 100644
--- a/tests/network/test_integration_4_processing_worker.py
+++ b/tests/network/test_integration_4_processing_worker.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 from pika import BasicProperties
-from src.ocrd.processor.builtin.dummy_processor import DummyProcessor, OCRD_TOOL
+from src.ocrd.processor.builtin.dummy_processor import DummyProcessor
 from src.ocrd_network.constants import JobState
 from src.ocrd_network.database import sync_db_create_workspace, sync_db_create_processing_job
 from src.ocrd_network.logging_utils import get_processing_job_logging_file_path
@@ -25,12 +25,13 @@ def test_processing_worker_process_message():
     # wrong reads from the deployed dummy worker (part of the processing server integration test)
     processor_name = "ocrd-dummy-test"
     result_queue_name = f"{processor_name}-result"
+    ocrd_tool = DummyProcessor(None).metadata
 
     processing_worker = ProcessingWorker(
         rabbitmq_addr=test_config.RABBITMQ_URL,
         mongodb_addr=test_config.DB_URL,
         processor_name=processor_name,
-        ocrd_tool=OCRD_TOOL,
+        ocrd_tool=ocrd_tool,
         processor_class=DummyProcessor
     )
     processing_worker.connect_publisher(enable_acks=True)
diff --git a/tests/processor/test_ocrd_dummy.py b/tests/processor/test_ocrd_dummy.py
index 41b585c6b9..b85379e47d 100644
--- a/tests/processor/test_ocrd_dummy.py
+++ b/tests/processor/test_ocrd_dummy.py
@@ -33,7 +33,7 @@ def test_copies_ok(self):
             output_files = workspace.mets.find_all_files(fileGrp='OUTPUT')
             output_files.sort(key=lambda x: x.url)
             assert output_files[0].local_filename == 'OUTPUT/OUTPUT_PHYS_0001.tif'
-            assert output_files[1].local_filename == 'OUTPUT/OUTPUT_PHYS_0001.xml'
+            assert output_files[1].local_filename == 'OUTPUT/OUTPUT_PHYS_0001_PAGE.xml'
             self.assertEqual(page_from_file(output_files[1]).pcGtsId, output_files[1].ID)
             assert page_from_file(output_files[1]).get_Page().imageFilename == str(output_files[0].local_filename)
             self.assertEqual(len(output_files), 6)
diff --git a/tests/processor/test_processor.py b/tests/processor/test_processor.py
index 784f68fc3d..06c129c3ca 100644
--- a/tests/processor/test_processor.py
+++ b/tests/processor/test_processor.py
@@ -1,3 +1,4 @@
+from functools import cached_property
 import json
 from contextlib import ExitStack
 
@@ -5,33 +6,50 @@
 from pathlib import Path
 from os import environ
 from tests.base import CapturingTestCase as TestCase, assets, main, copy_of_directory # pylint: disable=import-error, no-name-in-module
-from tests.data import DummyProcessor, DummyProcessorWithRequiredParameters, DummyProcessorWithOutput, IncompleteProcessor
+from tests.data import (
+    DummyProcessor,
+    DummyProcessorWithRequiredParameters,
+    DummyProcessorWithOutput,
+    DummyProcessorWithOutputLegacy,
+    DummyProcessorWithOutputSleep,
+    DummyProcessorWithOutputFailures,
+    IncompleteProcessor
+)
+from tests.test_mets_server import fixture_start_mets_server
 
-from ocrd_utils import MIMETYPE_PAGE, pushd_popd, initLogging, disableLogging
+from ocrd_utils import MIMETYPE_PAGE, pushd_popd, initLogging, disableLogging, config
 from ocrd.resolver import Resolver
-from ocrd.processor.base import Processor, run_processor, run_cli
+from ocrd.processor import Processor, run_processor, run_cli, NonUniqueInputFile
+from ocrd.processor.helpers import get_processor
 
 from unittest import mock
 import pytest
 
 class TestProcessor(TestCase):
 
+    def run(self, result=None):
+        with copy_of_directory(assets.path_to('SBB0000F29300010000/data')) as workdir:
+            with pushd_popd(workdir):
+                self.resolver = Resolver()
+                self.workspace = self.resolver.workspace_from_url('mets.xml')
+                super().run(result=result)
+
     def setUp(self):
         super().setUp()
-        # make sure we get an isolated temporary copy of the testdata each time
-        # as long as we are not using pytest but unittest, we need to manage contexts
-        # (enterContext is only supported starting with py311)
-        with ExitStack() as stack:
-            self.resolver = Resolver()
-            self.workdir = stack.enter_context(copy_of_directory(assets.path_to('SBB0000F29300010000/data')))
-            stack.enter_context(pushd_popd(self.workdir))
-            self.workspace = self.resolver.workspace_from_url('mets.xml')
-            self.addCleanup(stack.pop_all().close)
+        initLogging()
+
+    def tearDown(self):
+        super().tearDown()
+        config.reset_defaults()
+        disableLogging()
 
     def test_incomplete_processor(self):
         proc = IncompleteProcessor(None)
+        proc.input_file_grp = 'OCR-D-IMG'
+        proc.output_file_grp = 'DUMMY'
+        proc.page_id = None
         with self.assertRaises(NotImplementedError):
-            proc.process()
+            proc.process_workspace(self.workspace)
 
     def test_no_resolver(self):
         with self.assertRaisesRegex(Exception, 'pass a resolver to create a workspace'):
@@ -54,34 +72,101 @@ def test_with_mets_url_input_files(self):
                                   input_file_grp='OCR-D-SEG-PAGE',
                                   resolver=self.resolver,
                                   workspace=self.workspace)
+        processor.workspace = self.workspace
         assert len(processor.input_files) == 2
         assert [f.mimetype for f in processor.input_files] == [MIMETYPE_PAGE, MIMETYPE_PAGE]
 
     def test_parameter(self):
         with TemporaryDirectory():
-            jsonpath = Path('params.json').name
+            jsonpath = 'params.json'
             with open(jsonpath, 'w') as f:
                 f.write('{"baz": "quux"}')
             with open(jsonpath, 'r') as f:
+                parameter = json.load(f)
                 processor = run_processor(
                     DummyProcessor,
-                    parameter=json.load(f),
+                    parameter=parameter,
                     input_file_grp="OCR-D-IMG",
                     resolver=self.resolver,
                     workspace=self.workspace
                 )
-            self.assertEqual(len(processor.input_files), 3)
+                self.assertEqual(processor.parameter['baz'], 'quux')
+                processor = get_processor(
+                    DummyProcessor,
+                    parameter=parameter)
+                with self.assertRaises(TypeError):
+                    processor.parameter['baz'] = 'xuuq'
+                processor.parameter = { **parameter, 'baz': 'xuuq' }
+                self.assertEqual(processor.parameter['baz'], 'xuuq')
+
+    def test_instance_caching(self):
+        class DyingDummyProcessor(DummyProcessor):
+            max_instances = 10
+            def shutdown(self):
+                # fixme: will only print _after_ pytest exits, so too late for assertions
+                #print(self.parameter['baz'])
+                pass
+        self.capture_out_err()
+        # customize (as processor implementors would)
+        firstp = None
+        for i in range(DyingDummyProcessor.max_instances + 2):
+            p = get_processor(
+                DyingDummyProcessor,
+                parameter={'baz': str(i)},
+                instance_caching=True
+            )
+            if i == 0:
+                firstp = p
+        lastp = p
+        p = get_processor(DyingDummyProcessor,
+                parameter={'baz': '0'},
+                instance_caching=True
+            )
+        # should not be cached anymore
+        self.assertNotEqual(firstp, p)
+        p = get_processor(DyingDummyProcessor,
+                parameter={'baz': str(i)},
+                instance_caching=True
+            )
+        # should still be cached
+        self.assertEqual(lastp, p)
+        from ocrd.processor.helpers import get_cached_processor
+        get_cached_processor.__wrapped__.cache_clear()
+        p = get_processor(DyingDummyProcessor,
+                parameter={'baz': str(i)},
+                instance_caching=True
+            )
+        # should not be cached anymore
+        self.assertNotEqual(lastp, p)
+        # fixme: will only print _after_ pytest exits, so too late for assertions
+        #out, err = self.capture_out_err()
+        #assert '0' in out.split('\n')
 
     def test_verify(self):
-        proc = DummyProcessor(self.workspace)
+        proc = DummyProcessor(None)
+        with self.assertRaises(AttributeError):
+            proc.verify()
+        proc.workspace = self.workspace
+        proc.input_file_grp = "OCR-D-IMG"
+        proc.output_file_grp = "DUMMY"
         self.assertEqual(proc.verify(), True)
 
     def test_json(self):
-        DummyProcessor(self.workspace, dump_json=True)
+        DummyProcessor(None).dump_json()
 
     def test_params_missing_required(self):
-        with self.assertRaisesRegex(Exception, 'is a required property'):
-            DummyProcessorWithRequiredParameters(workspace=self.workspace)
+        proc = DummyProcessorWithRequiredParameters(None)
+        assert proc.parameter is None
+        with self.assertRaisesRegex(ValueError, 'is a required property'):
+            proc.parameter = {}
+        with self.assertRaisesRegex(ValueError, 'is a required property'):
+            get_processor(DummyProcessorWithRequiredParameters)
+        with self.assertRaisesRegex(ValueError, 'is a required property'):
+            get_processor(DummyProcessorWithRequiredParameters, parameter={})
+        with self.assertRaisesRegex(ValueError, 'is a required property'):
+            run_processor(DummyProcessorWithRequiredParameters,
+                          workspace=self.workspace, input_file_grp="OCR-D-IMG")
+        proc.parameter = {'i-am-required': 'foo'}
 
     def test_params_preset_resolve(self):
         with pushd_popd(tempdir=True) as tempdir:
@@ -107,12 +192,19 @@ def test_params_preset_resolve(self):
                                     overwrite=True)
 
     def test_params(self):
-        proc = Processor(workspace=self.workspace)
+        class ParamTestProcessor(Processor):
+            @cached_property
+            def ocrd_tool(self):
+                return {}
+        proc = ParamTestProcessor(None)
+        self.assertEqual(proc.parameter, None)
+        # get_processor will set to non-none and validate
+        proc = get_processor(ParamTestProcessor)
         self.assertEqual(proc.parameter, {})
 
     def test_run_agent(self):
         no_agents_before = len(self.workspace.mets.agents)
-        run_processor(DummyProcessor, workspace=self.workspace)
+        run_processor(DummyProcessor, workspace=self.workspace, input_file_grp="OCR-D-IMG")
         self.assertEqual(len(self.workspace.mets.agents), no_agents_before + 1, 'one more agent')
         #  print(self.workspace.mets.agents[no_agents_before])
 
@@ -125,27 +217,86 @@ def test_run_input(self):
     def test_run_output0(self):
         with pushd_popd(tempdir=True) as tempdir:
             ws = self.resolver.workspace_from_nothing(directory=tempdir)
-            ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, ID='foobar1', pageId='phys_0001')
-            ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, ID='foobar2', pageId='phys_0002')
+            ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, file_id='foobar1', page_id='phys_0001')
+            ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, file_id='foobar2', page_id='phys_0002')
             run_processor(DummyProcessorWithOutput, workspace=ws,
                           input_file_grp="GRP1",
                           output_file_grp="OCR-D-OUT")
             assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == 2
 
+    def test_run_output_legacy(self):
+        ws = self.workspace
+        run_processor(DummyProcessorWithOutputLegacy,
+                      workspace=ws,
+                      input_file_grp="OCR-D-IMG",
+                      output_file_grp="OCR-D-OUT")
+        assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == len(ws.mets.find_all_files(fileGrp="OCR-D-IMG"))
+
+    def test_run_output_missing(self):
+        ws = self.workspace
+        # do not raise for number of failures:
+        config.OCRD_MAX_MISSING_OUTPUTS = -1
+        config.OCRD_MISSING_OUTPUT = 'SKIP'
+        run_processor(DummyProcessorWithOutputFailures, workspace=ws,
+                      input_file_grp="OCR-D-IMG",
+                      output_file_grp="OCR-D-OUT")
+        # only half succeed
+        assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == len(ws.mets.find_all_files(fileGrp="OCR-D-IMG")) // 2
+        config.OCRD_MISSING_OUTPUT = 'ABORT'
+        config.OCRD_EXISTING_OUTPUT = 'OVERWRITE'
+        with pytest.raises(Exception) as exc:
+            run_processor(DummyProcessorWithOutputFailures, workspace=ws,
+                          input_file_grp="OCR-D-IMG",
+                          output_file_grp="OCR-D-OUT")
+        assert "intermittent" in str(exc.value)
+        config.OCRD_MISSING_OUTPUT = 'COPY'
+        config.OCRD_EXISTING_OUTPUT = 'SKIP'
+        run_processor(DummyProcessorWithOutputFailures, workspace=ws,
+                      input_file_grp="OCR-D-IMG",
+                      output_file_grp="OCR-D-OUT")
+        assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == len(ws.mets.find_all_files(fileGrp="OCR-D-IMG"))
+        # do raise for number of failures:
+        config.OCRD_MAX_MISSING_OUTPUTS = 0.4
+        config.OCRD_MISSING_OUTPUT = 'SKIP'
+        with pytest.raises(Exception) as exc:
+            run_processor(DummyProcessorWithOutputFailures, workspace=ws,
+                          input_file_grp="OCR-D-IMG",
+                          output_file_grp="OCR-D-OUT")
+        assert "too many failures" in str(exc.value)
+
+    def test_run_output_timeout(self):
+        ws = self.workspace
+        # do not raise for number of failures:
+        config.OCRD_MAX_MISSING_OUTPUTS = -1
+        config.OCRD_MISSING_OUTPUT = 'ABORT'
+        config.OCRD_PROCESSING_PAGE_TIMEOUT = 3
+        run_processor(DummyProcessorWithOutputSleep, workspace=ws,
+                      input_file_grp="OCR-D-IMG",
+                      output_file_grp="OCR-D-OUT",
+                      parameter={"sleep": 1})
+        assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == len(ws.mets.find_all_files(fileGrp="OCR-D-IMG"))
+        config.OCRD_EXISTING_OUTPUT = 'OVERWRITE'
+        config.OCRD_PROCESSING_PAGE_TIMEOUT = 1
+        with pytest.raises(TimeoutError) as exc:
+            run_processor(DummyProcessorWithOutputSleep, workspace=ws,
+                          input_file_grp="OCR-D-IMG",
+                          output_file_grp="OCR-D-OUT",
+                          parameter={"sleep": 3})
+
     def test_run_output_overwrite(self):
         with pushd_popd(tempdir=True) as tempdir:
             ws = self.resolver.workspace_from_nothing(directory=tempdir)
-            ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, ID='foobar1', pageId='phys_0001')
-            ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, ID='foobar2', pageId='phys_0002')
-            ws.overwrite_mode = True
-            ws.add_file('OCR-D-OUT', mimetype=MIMETYPE_PAGE, ID='OCR-D-OUT_phys_0001', pageId='phys_0001')
-            ws.overwrite_mode = False
+            ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, file_id='foobar1', page_id='phys_0001')
+            ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, file_id='foobar2', page_id='phys_0002')
+            config.OCRD_EXISTING_OUTPUT = 'OVERWRITE'
+            ws.add_file('OCR-D-OUT', mimetype=MIMETYPE_PAGE, file_id='OCR-D-OUT_phys_0001', page_id='phys_0001')
+            config.OCRD_EXISTING_OUTPUT = 'ABORT'
             with pytest.raises(Exception) as exc:
                 run_processor(DummyProcessorWithOutput, workspace=ws,
                               input_file_grp="GRP1",
                               output_file_grp="OCR-D-OUT")
-                assert str(exc.value) == "File with ID='OCR-D-OUT_phys_0001' already exists"
-            ws.overwrite_mode = True
+            assert "already exists" in str(exc.value)
+            config.OCRD_EXISTING_OUTPUT = 'OVERWRITE'
             run_processor(DummyProcessorWithOutput, workspace=ws,
                           input_file_grp="GRP1",
                           output_file_grp="OCR-D-OUT")
@@ -153,7 +304,9 @@ def test_run_output_overwrite(self):
 
     def test_run_cli(self):
         with TemporaryDirectory() as tempdir:
-            run_processor(DummyProcessor, workspace=self.workspace)
+            run_processor(DummyProcessor, workspace=self.workspace,
+                          input_file_grp='OCR-D-IMG',
+                          output_file_grp='OUTPUT')
             run_cli(
                 'echo',
                 mets_url=assets.url_of('SBB0000F29300010000/data/mets.xml'),
@@ -173,7 +326,10 @@ def test_run_cli(self):
             )
 
     def test_zip_input_files(self):
-        class ZipTestProcessor(Processor): pass
+        class ZipTestProcessor(Processor):
+            @property
+            def ocrd_tool(self):
+                return {}
         with pushd_popd(tempdir=True) as tempdir:
             ws = self.resolver.workspace_from_nothing(directory=tempdir)
             ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, file_id='foobar1', page_id='phys_0001')
@@ -182,7 +338,10 @@ class ZipTestProcessor(Processor): pass
             ws.add_file('GRP2', mimetype=MIMETYPE_PAGE, file_id='foobar4', page_id='phys_0002')
             for page_id in [None, 'phys_0001,phys_0002']:
                 with self.subTest(page_id=page_id):
-                    proc = ZipTestProcessor(workspace=ws, input_file_grp='GRP1,GRP2', page_id=page_id)
+                    proc = ZipTestProcessor(None)
+                    proc.workspace = ws
+                    proc.input_file_grp = 'GRP1,GRP2'
+                    proc.page_id = page_id
                     tuples = [(one.ID, two.ID) for one, two in proc.zip_input_files()]
                     assert ('foobar1', 'foobar2') in tuples
                     assert ('foobar3', 'foobar4') in tuples
@@ -193,7 +352,10 @@ class ZipTestProcessor(Processor): pass
                     assert ('foobar3', 'foobar4') in tuples
 
     def test_zip_input_files_multi_mixed(self):
-        class ZipTestProcessor(Processor): pass
+        class ZipTestProcessor(Processor):
+            @property
+            def ocrd_tool(self):
+                return {}
         with pushd_popd(tempdir=True) as tempdir:
             ws = self.resolver.workspace_from_nothing(directory=tempdir)
             ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, file_id='foobar1', page_id='phys_0001')
@@ -204,7 +366,10 @@ class ZipTestProcessor(Processor): pass
             ws.add_file('GRP2', mimetype='image/tiff', file_id='foobar4', page_id='phys_0002')
             for page_id in [None, 'phys_0001,phys_0002']:
                 with self.subTest(page_id=page_id):
-                    proc = ZipTestProcessor(workspace=ws, input_file_grp='GRP1,GRP2', page_id=page_id)
+                    proc = ZipTestProcessor(None)
+                    proc.workspace = ws
+                    proc.input_file_grp = 'GRP1,GRP2'
+                    proc.page_id = page_id
                     print("unfiltered")
                     tuples = [(one.ID, two.ID) for one, two in proc.zip_input_files()]
                     assert ('foobar1', 'foobar2') in tuples
@@ -215,23 +380,32 @@ class ZipTestProcessor(Processor): pass
             ws.add_file('GRP2', mimetype='image/tiff', file_id='foobar4dup', page_id='phys_0002')
             for page_id in [None, 'phys_0001,phys_0002']:
                 with self.subTest(page_id=page_id):
-                    proc = ZipTestProcessor(workspace=ws, input_file_grp='GRP1,GRP2', page_id=page_id)
+                    proc = ZipTestProcessor(None)
+                    proc.workspace = ws
+                    proc.input_file_grp = 'GRP1,GRP2'
+                    proc.page_id = page_id
                     tuples = [(one.ID, two.ID) for one, two in proc.zip_input_files(on_error='first')]
                     assert ('foobar1', 'foobar2') in tuples
                     assert ('foobar3', 'foobar4') in tuples
                     tuples = [(one.ID, two) for one, two in proc.zip_input_files(on_error='skip')]
                     assert ('foobar3', None) in tuples
-                    with self.assertRaisesRegex(Exception, "No PAGE-XML for page .* in fileGrp .* but multiple matches."):
+                    with self.assertRaisesRegex(NonUniqueInputFile, "Could not determine unique input file"):
                         tuples = proc.zip_input_files(on_error='abort')
             ws.add_file('GRP2', mimetype=MIMETYPE_PAGE, file_id='foobar2dup', page_id='phys_0001')
             for page_id in [None, 'phys_0001,phys_0002']:
                 with self.subTest(page_id=page_id):
-                    proc = ZipTestProcessor(workspace=ws, input_file_grp='GRP1,GRP2', page_id=page_id)
-                    with self.assertRaisesRegex(Exception, "Multiple PAGE-XML matches for page"):
+                    proc = ZipTestProcessor(None)
+                    proc.workspace = ws
+                    proc.input_file_grp = 'GRP1,GRP2'
+                    proc.page_id = page_id
+                    with self.assertRaisesRegex(NonUniqueInputFile, "Could not determine unique input file"):
                         tuples = proc.zip_input_files()
 
     def test_zip_input_files_require_first(self):
-        class ZipTestProcessor(Processor): pass
+        class ZipTestProcessor(Processor):
+            @property
+            def ocrd_tool(self):
+                return {}
         self.capture_out_err()
         with pushd_popd(tempdir=True) as tempdir:
             ws = self.resolver.workspace_from_nothing(directory=tempdir)
@@ -239,10 +413,67 @@ class ZipTestProcessor(Processor): pass
             ws.add_file('GRP2', mimetype=MIMETYPE_PAGE, file_id='foobar2', page_id='phys_0001')
             for page_id in [None, 'phys_0001']:
                 with self.subTest(page_id=page_id):
-                    proc = ZipTestProcessor(workspace=ws, input_file_grp='GRP1,GRP2', page_id=page_id)
+                    proc = ZipTestProcessor(None)
+                    proc.workspace = ws
+                    proc.input_file_grp = 'GRP1,GRP2'
+                    proc.page_id = page_id
                     assert [(one, two.ID) for one, two in proc.zip_input_files(require_first=False)] == [(None, 'foobar2')]
         r = self.capture_out_err()
-        assert 'ERROR ocrd.processor.base - found no page phys_0001 in file group GRP1' in r.err
+        assert 'ERROR ocrd.processor.base - Found no file for page phys_0001 in file group GRP1' in r.err
+
+def test_run_output_metsserver(start_mets_server):
+    mets_server_url, ws = start_mets_server
+    assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == 0
+    # do not raise for number of failures:
+    config.OCRD_MAX_MISSING_OUTPUTS = -1
+    run_processor(DummyProcessorWithOutputSleep, workspace=ws,
+                  input_file_grp="OCR-D-IMG",
+                  output_file_grp="OCR-D-OUT",
+                  parameter={"sleep": 0},
+                  mets_server_url=mets_server_url)
+    assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == len(ws.mets.find_all_files(fileGrp="OCR-D-IMG"))
+    config.OCRD_EXISTING_OUTPUT = 'OVERWRITE'
+    run_processor(DummyProcessorWithOutputSleep, workspace=ws,
+                  input_file_grp="OCR-D-IMG",
+                  output_file_grp="OCR-D-OUT",
+                  parameter={"sleep": 0},
+                  mets_server_url=mets_server_url)
+    assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == len(ws.mets.find_all_files(fileGrp="OCR-D-IMG"))
+    config.OCRD_EXISTING_OUTPUT = 'ABORT'
+    with pytest.raises(Exception) as exc:
+        run_processor(DummyProcessorWithOutputSleep, workspace=ws,
+                      input_file_grp="OCR-D-IMG",
+                      output_file_grp="OCR-D-OUT",
+                      parameter={"sleep": 0},
+                      mets_server_url=mets_server_url)
+    assert "already exists" in str(exc.value)
+    config.reset_defaults()
+
+# 2s (+ 2s tolerance) instead of 3*3s (+ 2s tolerance)
+# fixme: pytest-timeout does not shut down / finalize the fixture properly
+#        (regardless of method or func_only), so the next test in the suite
+#        does not execute ("previous item was not torn down properly")
+#        so we must instead wait for completion and assert on the time spent...
+#@pytest.mark.timeout(timeout=4, func_only=True, method="signal")
+def test_run_output_parallel(start_mets_server):
+    import time
+    mets_server_url, ws = start_mets_server
+    assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == 0
+    # do not raise for single-page timeout
+    config.OCRD_PROCESSING_PAGE_TIMEOUT = -1
+    # do not raise for number of failures:
+    config.OCRD_MAX_MISSING_OUTPUTS = -1
+    config.OCRD_MAX_PARALLEL_PAGES = 3
+    start_time = time.time()
+    run_processor(DummyProcessorWithOutputSleep, workspace=ws,
+                  input_file_grp="OCR-D-IMG",
+                  output_file_grp="OCR-D-OUT",
+                  parameter={"sleep": 2},
+                  mets_server_url=mets_server_url)
+    run_time = time.time() - start_time
+    assert run_time < 3, f"run_processor took {run_time}s"
+    assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == len(ws.mets.find_all_files(fileGrp="OCR-D-IMG"))
+    config.reset_defaults()
 
 if __name__ == "__main__":
     main(__file__)
diff --git a/tests/test_decorators.py b/tests/test_decorators.py
index 5ab2880053..561fdc762d 100644
--- a/tests/test_decorators.py
+++ b/tests/test_decorators.py
@@ -15,7 +15,7 @@
     ocrd_loglevel,
     ocrd_cli_wrap_processor,
 )    # pylint: disable=protected-access
-from ocrd_utils import pushd_popd, VERSION as OCRD_VERSION, disableLogging, initLogging, get_logging_config_files
+from ocrd_utils import pushd_popd, VERSION as OCRD_VERSION, disableLogging, initLogging, get_logging_config_files, config
 
 @click.command()
 @ocrd_cli_options
@@ -41,18 +41,20 @@ def cli_dummy_processor(*args, **kwargs):
 
 class TestDecorators(TestCase):
 
-    def setUp(self):
-        super().setUp()
+    def tearDown(self):
+        super().tearDown()
+        config.reset_defaults()
         disableLogging()
 
     def test_minimal(self):
-        exit_code, out, err = self.invoke_cli(cli_with_ocrd_cli_options, ['-l', 'DEBUG'])
-        print(out, err)
-        assert not exit_code
+        initLogging()
+        code, out, err = self.invoke_cli(cli_with_ocrd_cli_options, ['-l', 'DEBUG'])
+        assert not code, (out, err)
 
     def test_loglevel_invalid(self):
-        code, _, err = self.invoke_cli(cli_with_ocrd_loglevel, ['--log-level', 'foo'])
-        assert code
+        initLogging()
+        code, out, err = self.invoke_cli(cli_with_ocrd_loglevel, ['--log-level', 'foo'])
+        assert code, (out, err)
         import click
         if int(click.__version__[0]) < 8:
             assert 'invalid choice: foo' in err
@@ -63,7 +65,7 @@ def test_loglevel_override(self):
         if get_logging_config_files():
             pytest.skip(f"ocrd_logging.conf found at {get_logging_config_files()}, skipping logging test")
         import logging
-        disableLogging()
+        assert logging.getLogger('').getEffectiveLevel() == logging.WARNING
         assert logging.getLogger('ocrd').getEffectiveLevel() == logging.WARNING
         initLogging()
         assert logging.getLogger('ocrd').getEffectiveLevel() == logging.INFO
diff --git a/tests/test_logging.py b/tests/test_logging.py
index 2e4e0861b5..091fc25bee 100644
--- a/tests/test_logging.py
+++ b/tests/test_logging.py
@@ -26,16 +26,22 @@ class TestLogging(TestCase):
     def setUp(self):
         pass # do not chdir
 
+    def tearDown(self):
+        super().tearDown()
+        disableLogging()
+
     def test_loglevel_inheritance(self):
         initLogging(builtin_only=True)
         ocrd_logger = logging.getLogger('ocrd')
         assert ocrd_logger.getEffectiveLevel() == logging.INFO
         some_logger = getLogger('ocrd.foo')
+        assert some_logger.level == logging.NOTSET
         assert some_logger.getEffectiveLevel() == logging.INFO
         setOverrideLogLevel('ERROR')
         assert ocrd_logger.getEffectiveLevel() == logging.ERROR
         assert some_logger.getEffectiveLevel() == logging.ERROR
         another_logger = getLogger('ocrd.bar')
+        assert another_logger.level == logging.NOTSET
         assert another_logger.getEffectiveLevel() == logging.ERROR
 
     def test_getLevelName(self):
@@ -139,7 +145,7 @@ def testProcessorProfiling(self):
         getLogger('ocrd.process.profile').setLevel('DEBUG')
         getLogger('ocrd.process.profile').addHandler(ch)
 
-        run_processor(DummyProcessor, resolver=Resolver(), mets_url=assets.url_of('SBB0000F29300010000/data/mets.xml'))
+        run_processor(DummyProcessor, input_file_grp='OCR-D-IMG', resolver=Resolver(), mets_url=assets.url_of('SBB0000F29300010000/data/mets.xml'))
 
         log_contents = log_capture_string.getvalue()
         log_capture_string.close()
diff --git a/tests/test_logging_conf.py b/tests/test_logging_conf.py
index f8e0e9e894..0717674103 100644
--- a/tests/test_logging_conf.py
+++ b/tests/test_logging_conf.py
@@ -21,74 +21,67 @@
 # sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/../ocrd')
 TEST_ROOT = pathlib.Path(os.path.dirname(os.path.abspath(__file__))).parent
 
-def resetLogging():
-    disableLogging()
-    initLogging()
-
-
 @pytest.fixture(name="logging_conf")
-def _fixture_logging_conf(tmpdir):
+def _fixture_logging_conf(tmpdir, capfd):
 
     path_logging_conf_orig = os.path.join(
         str(TEST_ROOT), 'src', 'ocrd_utils', 'ocrd_logging.conf')
     path_logging_conf_dest = os.path.join(str(tmpdir), 'ocrd_logging.conf')
     shutil.copy(path_logging_conf_orig, path_logging_conf_dest)
-    return str(tmpdir)
+    with pushd_popd(tmpdir):
+        with capfd.disabled():
+            initLogging()
+            yield str(tmpdir)
+            disableLogging()
 
 
-def test_configured_dateformat(logging_conf, capsys):
+def test_configured_dateformat(logging_conf, capfd):
     """Ensure example ocrd_logging.conf is valid and produces desired record format"""
 
     # arrange
-    with pushd_popd(logging_conf):
-        resetLogging()
-        test_logger = getLogger('')
+    test_logger = getLogger('ocrd')
 
-        # act
-        test_logger.info("test logger initialized")
+    # act
+    test_logger.info("test logger initialized")
 
-        log_info_output = capsys.readouterr().err
-        must_not_match = r"^\d{4}-\d{2}-\d{2}.*"
-        assert not re.match(must_not_match, log_info_output)
-        match_pattern = r"^\d{2}:\d{2}:\d{2}.*"
-        assert re.match(match_pattern, log_info_output)
+    log_info_output = capfd.readouterr().err
+    must_not_match = r"^\d{4}-\d{2}-\d{2}.*"
+    assert not re.match(must_not_match, log_info_output)
+    match_pattern = r"^\d{2}:\d{2}:\d{2}.*"
+    assert re.match(match_pattern, log_info_output), log_info_output
 
 
-def test_configured_tensorflow_logger_present(logging_conf, capsys):
+def test_configured_tensorflow_logger_present(logging_conf, capfd):
     """Ensure example ocrd_logging.conf is valid and contains logger tensorflow"""
 
     # arrange
-    os.chdir(logging_conf)
-    resetLogging()
     logger_under_test = getLogger('tensorflow')
 
     # act info
     logger_under_test.info("tensorflow logger initialized")
-    log_info_output = capsys.readouterr().err
+    log_info_output = capfd.readouterr().err
     assert not log_info_output
 
     # act error
     logger_under_test.error("tensorflow has error")
-    log_error_output = capsys.readouterr().err
+    log_error_output = capfd.readouterr().err
     assert log_error_output
 
 
-def test_configured_shapely_logger_present(logging_conf, capsys):
+def test_configured_shapely_logger_present(logging_conf, capfd):
     """Ensure example ocrd_logging.conf is valid and contains logger shapely.geos"""
 
     # arrange
-    os.chdir(logging_conf)
-    resetLogging()
     logger_under_test = getLogger('shapely.geos')
 
     # act info
     logger_under_test.info("shapely.geos logger initialized")
-    log_info_output = capsys.readouterr().err
+    log_info_output = capfd.readouterr().err
     assert not log_info_output
 
     # act error
     logger_under_test.error("shapely alert")
-    log_error_output = capsys.readouterr().err
+    log_error_output = capfd.readouterr().err
     assert log_error_output
 
 if __name__ == '__main__':
diff --git a/tests/test_mets_server.py b/tests/test_mets_server.py
index 58ff6e2a9b..3bb96535c0 100644
--- a/tests/test_mets_server.py
+++ b/tests/test_mets_server.py
@@ -22,50 +22,65 @@
 from requests.exceptions import ConnectionError
 
 from ocrd import Resolver, OcrdMetsServer, Workspace
-from ocrd_utils import pushd_popd, MIMETYPE_PAGE
+from ocrd_utils import pushd_popd, MIMETYPE_PAGE, initLogging, setOverrideLogLevel, disableLogging, getLogger
 
-WORKSPACE_DIR = '/tmp/ocrd-mets-server'
 TRANSPORTS = ['/tmp/ocrd-mets-server.sock', 'http://127.0.0.1:12345']
 
 @fixture(scope='function', name='start_mets_server', params=TRANSPORTS)
-def fixture_start_mets_server(request) -> Iterable[Tuple[str, Workspace]]:
-    def _start_mets_server(*args, **kwargs):
-        mets_server = OcrdMetsServer(*args, **kwargs)
-        mets_server.startup()
+def fixture_start_mets_server(request, tmpdir) -> Iterable[Tuple[str, Workspace]]:
+    initLogging()
+    #setOverrideLogLevel(10)
+    logger = getLogger('ocrd')
 
+    tmpdir = str(tmpdir)
     mets_server_url = request.param
 
     if mets_server_url == TRANSPORTS[0]:
         if exists(mets_server_url):
             remove(mets_server_url)
 
-    if exists(WORKSPACE_DIR):
-        rmtree(WORKSPACE_DIR, ignore_errors=True)
-
-    copytree(assets.path_to('SBB0000F29300010000/data'), WORKSPACE_DIR)
-    workspace = Workspace(Resolver(), WORKSPACE_DIR)
-    p = Process(target=_start_mets_server, kwargs={'workspace': workspace, 'url': request.param})
+    if exists(tmpdir):
+        rmtree(tmpdir, ignore_errors=True)
+
+    copytree(assets.path_to('SBB0000F29300010000/data'), tmpdir)
+    workspace = Workspace(Resolver(), tmpdir)
+    class MetsServerProcess(Process):
+        def __init__(self, *args, **kwargs):
+            self.server = OcrdMetsServer(*args, **kwargs)
+            super().__init__()
+        def run(self):
+            self.server.startup()
+        def terminate(self):
+            self.server.workspace.save_mets()
+            super().terminate()
+    p = MetsServerProcess(workspace=workspace, url=request.param)
     p.start()
+    logger.info("started METS Server")
     sleep(1)  # sleep to start up server
-    yield mets_server_url, Workspace(resolver=Resolver(), directory=WORKSPACE_DIR, mets_server_url=mets_server_url)
+    workspace_server = Workspace(Resolver(), tmpdir, mets_server_url=mets_server_url)
+    yield mets_server_url, workspace_server
     p.terminate()
-    rmtree(WORKSPACE_DIR, ignore_errors=True)
-
-def add_file_server(x):
-    mets_server_url, i = x
-    workspace_server = Workspace(resolver=Resolver(), directory=WORKSPACE_DIR, mets_server_url=mets_server_url)
+    p.join()
+    logger.info("terminated METS Server")
+    rmtree(tmpdir, ignore_errors=True)
+    disableLogging()
+
+def add_file_server(x, force=False):
+    mets_server_url, directory, i = x
+    workspace_server = Workspace(Resolver(), directory, mets_server_url=mets_server_url)
     workspace_server.add_file(
+        'FOO',
         local_filename=f'local_filename{i}',
         mimetype=MIMETYPE_PAGE,
         page_id=f'page{i}',
-        file_grp='FOO',
         file_id=f'FOO_page{i}_foo{i}',
         # url=f'url{i}'
+        force=force
     )
 
 def add_agent_server(x):
-    mets_server_url, i = x
-    workspace_server = Workspace(resolver=Resolver(), directory=WORKSPACE_DIR, mets_server_url=mets_server_url)
+    mets_server_url, directory, i = x
+    workspace_server = Workspace(Resolver(), directory, mets_server_url=mets_server_url)
     workspace_server.mets.add_agent(
         name=f'proc{i}',
         _type='baz',
@@ -82,7 +97,10 @@ def test_mets_server_add_file(start_mets_server):
 
     # add NO_FILES files in parallel
     with Pool() as pool:
-        pool.map(add_file_server, zip(repeat(mets_server_url), range(NO_FILES)))
+        pool.map(add_file_server, zip(
+            repeat(mets_server_url),
+            repeat(workspace_server.directory),
+            range(NO_FILES)))
 
     assert set(workspace_server.mets.file_groups) == set( [
         'OCR-D-IMG',
@@ -107,7 +125,7 @@ def test_mets_server_add_file(start_mets_server):
     assert len(workspace_server.mets.find_all_files(fileGrp='FOO')) == NO_FILES
 
     # not yet synced
-    workspace_file = Workspace(Resolver(), WORKSPACE_DIR)
+    workspace_file = Workspace(Resolver(), workspace_server.directory)
     assert len(workspace_file.mets.find_all_files(fileGrp='FOO')) == 0
 
     # sync
@@ -116,6 +134,19 @@ def test_mets_server_add_file(start_mets_server):
 
     assert len(workspace_file.mets.find_all_files(fileGrp='FOO')) == NO_FILES
 
+def test_mets_server_add_file_overwrite(start_mets_server):
+    mets_server_url, workspace_server = start_mets_server
+
+    add_file_server((mets_server_url, workspace_server.directory, 5))
+
+    assert len(workspace_server.mets.find_all_files(fileGrp='FOO')) == 1
+
+    with raises(RuntimeError, match="already exists"):
+        add_file_server((mets_server_url, workspace_server.directory, 5))
+
+    add_file_server((mets_server_url, workspace_server.directory, 5), force=True)
+    assert len(workspace_server.mets.find_all_files(fileGrp='FOO')) == 1
+
 def test_mets_server_add_agents(start_mets_server):
     NO_AGENTS = 30
 
@@ -125,13 +156,16 @@ def test_mets_server_add_agents(start_mets_server):
 
     # add NO_AGENTS agents in parallel
     with Pool() as pool:
-        pool.map(add_agent_server, zip(repeat(mets_server_url), list(range(NO_AGENTS))))
+        pool.map(add_agent_server, zip(
+            repeat(mets_server_url),
+            repeat(workspace_server.directory),
+            list(range(NO_AGENTS))))
 
     assert len(workspace_server.mets.agents) == NO_AGENTS + no_agents_before
     # XXX not a tuple
     assert workspace_server.mets.agents[-1].notes[0][0] == {'{https://ocr-d.de}foo': 'bar'}
 
-    workspace_file = Workspace(Resolver(), WORKSPACE_DIR)
+    workspace_file = Workspace(Resolver(), workspace_server.directory)
     assert len(workspace_file.mets.agents) == no_agents_before
 
     # sync
@@ -142,7 +176,7 @@ def test_mets_server_add_agents(start_mets_server):
 
 def test_mets_server_str(start_mets_server):
     mets_server_url, workspace_server = start_mets_server
-    workspace_server = Workspace(Resolver(), WORKSPACE_DIR, mets_server_url=mets_server_url)
+    workspace_server = Workspace(Resolver(), workspace_server.directory, mets_server_url=mets_server_url)
     f = next(workspace_server.find_files())
     assert str(f) == '<ClientSideOcrdFile fileGrp=OCR-D-IMG, ID=FILE_0001_IMAGE, mimetype=image/tiff, url=---, local_filename=OCR-D-IMG/FILE_0001_IMAGE.tif]/>'
     a = workspace_server.mets.agents[0]
@@ -182,7 +216,7 @@ def test_mets_server_socket_stop(start_mets_server):
         assert True, 'No stop conditions to test for TCP server'
     else:
         assert Path(mets_server_url).exists()
-        assert workspace_server.mets.workspace_path == WORKSPACE_DIR
+        assert workspace_server.mets.workspace_path == workspace_server.directory
         workspace_server.mets.stop()
         with raises(ConnectionError):
             workspace_server.mets.file_groups
@@ -236,7 +270,7 @@ def test_reload(start_mets_server : Tuple[str, Workspace]):
     assert len(workspace_server.mets.find_all_files()) == 35, '35 files total'
     assert len(workspace_server_copy.mets.find_all_files()) == 35, '35 files total'
 
-    workspace_server_copy.add_file('FOO', ID='foo', mimetype='foo/bar', local_filename='mets.xml', pageId='foo')
+    workspace_server_copy.add_file('FOO', file_id='foo', mimetype='foo/bar', local_filename='mets.xml', page_id='foo')
     assert len(workspace_server.mets.find_all_files()) == 35, '35 files total'
     assert len(workspace_server_copy.mets.find_all_files()) == 36, '36 files total'
 
diff --git a/tests/test_resolver.py b/tests/test_resolver.py
index 16dfd03d56..c2575b6086 100644
--- a/tests/test_resolver.py
+++ b/tests/test_resolver.py
@@ -292,20 +292,21 @@ def test_resolve_mets_arguments():
     https://github.com/OCR-D/core/issues/517
     """
     resolver = Resolver()
-    assert resolver.resolve_mets_arguments(None, None, None, None) == (str(Path.cwd()), str(Path.cwd() / 'mets.xml'), 'mets.xml', None)
-    assert resolver.resolve_mets_arguments('/', None, 'mets.xml', None) == ('/', '/mets.xml', 'mets.xml', None)
-    assert resolver.resolve_mets_arguments('/foo', '/foo/foo.xml', None, None) == ('/foo', '/foo/foo.xml', 'foo.xml', None)
-    assert resolver.resolve_mets_arguments(None, '/foo/foo.xml', None, None) == ('/foo', '/foo/foo.xml', 'foo.xml', None)
-    assert resolver.resolve_mets_arguments('/foo', 'foo.xml', None, None) == ('/foo', '/foo/foo.xml', 'foo.xml', None)
-    assert resolver.resolve_mets_arguments('/foo', 'http://bar/foo.xml', None, None) == ('/foo', 'http://bar/foo.xml', 'foo.xml', None)
-    with pytest.raises(ValueError, match="Use either --mets or --mets-basename, not both"):
-        resolver.resolve_mets_arguments('/', '/foo/bar', 'foo.xml', None)
-    with pytest.raises(ValueError, match="inconsistent with --directory"):
-        resolver.resolve_mets_arguments('/foo', '/bar/foo.xml', None, None)
-    with pytest.warns(DeprecationWarning):
-        resolver.resolve_mets_arguments('/foo', None, 'not_mets.xml', None)
-    with pytest.raises(ValueError, match=r"--mets is an http\(s\) URL but no --directory was given"):
-        resolver.resolve_mets_arguments(None, 'http://bar/foo.xml', None, None)
+    with pytest.warns(DeprecationWarning, match='--mets-basename'):
+        assert resolver.resolve_mets_arguments(None, None, None, None) == (str(Path.cwd()), str(Path.cwd() / 'mets.xml'), 'mets.xml', None)
+        assert resolver.resolve_mets_arguments('/', None, 'mets.xml', None) == ('/', '/mets.xml', 'mets.xml', None)
+        assert resolver.resolve_mets_arguments('/foo', '/foo/foo.xml', None, None) == ('/foo', '/foo/foo.xml', 'foo.xml', None)
+        assert resolver.resolve_mets_arguments(None, '/foo/foo.xml', None, None) == ('/foo', '/foo/foo.xml', 'foo.xml', None)
+        assert resolver.resolve_mets_arguments('/foo', 'foo.xml', None, None) == ('/foo', '/foo/foo.xml', 'foo.xml', None)
+        assert resolver.resolve_mets_arguments('/foo', 'http://bar/foo.xml', None, None) == ('/foo', 'http://bar/foo.xml', 'foo.xml', None)
+        with pytest.raises(ValueError, match="Use either --mets or --mets-basename, not both"):
+            resolver.resolve_mets_arguments('/', '/foo/bar', 'foo.xml', None)
+        with pytest.raises(ValueError, match="inconsistent with --directory"):
+            resolver.resolve_mets_arguments('/foo', '/bar/foo.xml', None, None)
+        with pytest.warns(DeprecationWarning):
+            resolver.resolve_mets_arguments('/foo', None, 'not_mets.xml', None)
+        with pytest.raises(ValueError, match=r"--mets is an http\(s\) URL but no --directory was given"):
+            resolver.resolve_mets_arguments(None, 'http://bar/foo.xml', None, None)
 
 if __name__ == '__main__':
     main(__file__)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 89ff6d90f3..dea7ad7942 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -242,12 +242,16 @@ def test_set_json_key_value_overrides():
 
 def test_assert_file_grp_cardinality():
     with raises(AssertionError, match="Expected exactly 5 output file groups, but '.'FOO', 'BAR'.' has 2"):
-        assert_file_grp_cardinality('FOO,BAR', 5)
+        with warns(DeprecationWarning, match="file_grp_cardinality in ocrd-tool.json instead"):
+            assert_file_grp_cardinality('FOO,BAR', 5)
     with raises(AssertionError, match="Expected exactly 1 output file group, but '.'FOO', 'BAR'.' has 2"):
-        assert_file_grp_cardinality('FOO,BAR', 1)
-    assert_file_grp_cardinality('FOO,BAR', 2)
+        with warns(DeprecationWarning, match="file_grp_cardinality in ocrd-tool.json instead"):
+            assert_file_grp_cardinality('FOO,BAR', 1)
+    with warns(DeprecationWarning, match="file_grp_cardinality in ocrd-tool.json instead"):
+        assert_file_grp_cardinality('FOO,BAR', 2)
     with raises(AssertionError, match="Expected exactly 1 output file group .foo bar., but '.'FOO', 'BAR'.' has 2"):
-        assert_file_grp_cardinality('FOO,BAR', 1, 'foo bar')
+        with warns(DeprecationWarning, match="file_grp_cardinality in ocrd-tool.json instead"):
+            assert_file_grp_cardinality('FOO,BAR', 1, 'foo bar')
 
 def test_make_file_id_simple():
     f = create_ocrd_file('MAX', ID="MAX_0012")
diff --git a/tests/test_workspace.py b/tests/test_workspace.py
index c8df9b444b..ad9cd15575 100644
--- a/tests/test_workspace.py
+++ b/tests/test_workspace.py
@@ -87,10 +87,10 @@ def test_workspace_add_file_overwrite(plain_workspace):
     plain_workspace.add_file('GRP', file_id='ID1', mimetype='image/tiff', content='CONTENT', page_id='phys1', local_filename=fpath)
     with pytest.raises(FileExistsError) as fn_exc:
         plain_workspace.add_file('GRP', file_id='ID1', mimetype='image/tiff', content='CONTENT', page_id=None, local_filename=fpath)
-        assert str(fn_exc.value) == "File with file_id='ID1' already exists"
+    assert "already exists" in str(fn_exc.value)
     with pytest.raises(FileExistsError) as fn_exc:
         plain_workspace.add_file('GRP', file_id='ID1', mimetype='image/tiff', content='CONTENT', page_id='phys2', local_filename=fpath, force=True)
-        assert 'cannot mitigate' in str(fn_exc.value)
+    assert 'cannot mitigate' in str(fn_exc.value)
     plain_workspace.add_file('GRP', file_id='ID1', mimetype='image/tiff', content='CONTENT2', page_id='phys1', local_filename=fpath, force=True)
 
     f = plain_workspace.mets.find_all_files()[0]
@@ -270,9 +270,9 @@ def test_remove_file_force(sbb_data_workspace):
 
     # TODO check semantics - can a non-existent thing be removed?
     assert not sbb_data_workspace.remove_file('non-existing-id', force=True)
-    # should also succeed
-    sbb_data_workspace.overwrite_mode = True
-    assert not sbb_data_workspace.remove_file('non-existing-id', force=False)
+    with pytest.raises(FileNotFoundError) as not_found_exc:
+        sbb_data_workspace.remove_file('non-existing-id', force=False)
+    assert "not found in METS" in str(not_found_exc.value)
 
 
 def test_remove_file_remote_not_available_raises_exception(plain_workspace):
@@ -292,9 +292,9 @@ def test_remove_file_remote(plain_workspace):
     assert plain_workspace.remove_file('page1_img', force=True)
 
     # TODO check returned value
-    # should also "succeed", because overwrite_mode is set which also sets 'force' to 'True'
-    plain_workspace.overwrite_mode = True
-    assert not plain_workspace.remove_file('page1_img')
+    with pytest.raises(FileNotFoundError) as not_found_exc:
+        plain_workspace.remove_file('page1_img')
+    assert "not found in METS" in str(not_found_exc.value)
 
 
 def test_rename_file_group(tmp_path):
@@ -341,9 +341,6 @@ def test_remove_file_group_force(sbb_data_workspace):
     # check function and tests semantics
     # should succeed
     assert not sbb_data_workspace.remove_file_group('I DO NOT EXIST', force=True)
-    # should also succeed
-    sbb_data_workspace.overwrite_mode = True
-    assert not sbb_data_workspace.remove_file_group('I DO NOT EXIST', force=False)
 
 
 def test_remove_file_group_rmdir(sbb_data_tmp, sbb_data_workspace):
@@ -417,7 +414,7 @@ def test_save_image_file_invalid_mimetype_raises_exception(plain_workspace):
 
     # act raise
     with pytest.raises(KeyError) as key_exc:
-        plain_workspace.save_image_file(img, 'page1_img', 'IMG', 'page1', 'ceci/nest/pas/une/mimetype')
+        plain_workspace.save_image_file(img, 'page1_img', 'IMG', page_id='page1', mimetype='ceci/nest/pas/une/mimetype')
 
     assert "'ceci/nest/pas/une/mimetype'" == str(key_exc.value)
 
@@ -428,13 +425,18 @@ def test_save_image_file(plain_workspace):
     img = Image.new('RGB', (1000, 1000))
 
     # act
-    assert plain_workspace.save_image_file(img, 'page1_img', 'IMG', 'page1', 'image/jpeg')
+    assert plain_workspace.save_image_file(img, 'page1_img', 'IMG', page_id='page1', mimetype='image/jpeg')
     assert exists(join(plain_workspace.directory, 'IMG', 'page1_img.jpg'))
     # should succeed
-    assert plain_workspace.save_image_file(img, 'page1_img', 'IMG', 'page1', 'image/jpeg', force=True)
-    # should also succeed
-    plain_workspace.overwrite_mode = True
-    assert plain_workspace.save_image_file(img, 'page1_img', 'IMG', 'page1', 'image/jpeg')
+    assert plain_workspace.save_image_file(img, 'page1_img', 'IMG', page_id='page1', mimetype='image/jpeg', force=True)
+    # should fail
+    with pytest.raises(FileExistsError) as exists_exc:
+        plain_workspace.save_image_file(img, 'page1_img', 'IMG', page_id='page1', mimetype='image/jpeg')
+    assert "neither force nor ignore are set" in str(exists_exc.value)
+
+    # check file_path kwarg
+    assert plain_workspace.save_image_file(img, 'page1_img2', 'IMG', page_id='page1', file_path='IMG/page1_img2.png')
+    assert exists(join(plain_workspace.directory, 'IMG', 'page1_img2.png'))
 
 
 @pytest.fixture(name='workspace_kant_aufklaerung')
@@ -484,8 +486,10 @@ def test_image_from_page_basic(workspace_gutachten_data):
         pcgts = parseString(f.read().encode('utf8'), silence=True)
 
     # act + assert
-    _, info, _ = workspace_gutachten_data.image_from_page(pcgts.get_Page(), page_id='PHYS_0017', feature_selector='clipped', feature_filter='cropped')
-    assert info['features'] == 'binarized,clipped'
+    img, coords, _ = workspace_gutachten_data.image_from_page(pcgts.get_Page(), page_id='PHYS_0017', feature_selector='clipped', feature_filter='cropped')
+    assert coords['features'] == 'binarized,clipped'
+    assert isinstance(img.info.get('dpi', None), tuple)
+    assert img.info['dpi'][0] == coords['DPI']
     _, info, _ = workspace_gutachten_data.image_from_page(pcgts.get_Page(), page_id='PHYS_0017')
     assert info['features'] == 'binarized,clipped'
 
@@ -526,6 +530,7 @@ def test_deskewing(plain_workspace):
     skew = 4.625
     image = Image.new('L', size)
     image = polygon_mask(image, poly)
+    image.info['dpi'] = (300, 300)
     #image.show(title='image')
     pixels = np.count_nonzero(np.array(image) > 0)
     name = 'foo0'
@@ -536,9 +541,12 @@ def test_deskewing(plain_workspace):
                             Coords=CoordsType(points=points_from_polygon(poly)),
                             orientation=-skew)
     page.add_TextRegion(region)
-    page_image, page_coords, _ = plain_workspace.image_from_page(page, '')
+    page_image, page_coords, page_info = plain_workspace.image_from_page(page, '')
     #page_image.show(title='page_image')
     assert list(image.getdata()) == list(page_image.getdata())
+    assert 'dpi' in page_image.info
+    assert round(page_image.info['dpi'][0]) == 300
+    assert page_coords['DPI'] == 300
     assert np.all(page_coords['transform'] == np.eye(3))
     reg_image, reg_coords = plain_workspace.image_from_segment(region, page_image, page_coords,
                                                                feature_filter='deskewed', fill=0)
@@ -547,6 +555,7 @@ def test_deskewing(plain_workspace):
     assert reg_image.height == xywh['h'] == 335
     assert reg_coords['transform'][0, 2] == -xywh['x']
     assert reg_coords['transform'][1, 2] == -xywh['y']
+    assert round(reg_image.info['dpi'][0]) == 300
     # same fg after cropping to minimal bbox
     reg_pixels = np.count_nonzero(np.array(reg_image) > 0)
     assert pixels == reg_pixels
@@ -558,6 +567,7 @@ def test_deskewing(plain_workspace):
     assert reg_coords['transform'][0, 1] != 0
     assert reg_coords['transform'][1, 0] != 0
     assert 'deskewed' in reg_coords['features']
+    assert round(reg_image.info['dpi'][0]) == 300
     # same fg after cropping to minimal bbox (roughly - due to aliasing)
     reg_pixels = np.count_nonzero(np.array(reg_image) > 0)
     assert np.abs(pixels - reg_pixels) / pixels < 0.005
@@ -579,6 +589,7 @@ def test_deskewing(plain_workspace):
     assert reg_image2.height == reg_image.height
     assert np.allclose(reg_coords2['transform'], reg_coords['transform'])
     assert reg_coords2['features'] == reg_coords['features']
+    assert round(reg_image2.info['dpi'][0]) == 300
     # same fg after cropping to minimal bbox (roughly - due to aliasing)
     reg_pixels2 = np.count_nonzero(np.array(reg_image) > 0)
     assert reg_pixels2 == reg_pixels
@@ -673,7 +684,7 @@ def test_merge_overwrite(tmp_path):
         ws1.add_file('X', page_id='X', mimetype='X', file_id='id123', local_filename='X/X', content='ws1')
         ws2.add_file('X', page_id='X', mimetype='X', file_id='id456', local_filename='X/X', content='ws2')
         ws1.merge(ws2)
-        assert "would overwrite" == str(exc.value)
+    assert "would overwrite" in str(exc.value)
 
 def test_merge_with_filter(plain_workspace, tmp_path):
     # arrange
@@ -734,7 +745,7 @@ def _fixture_metsDocumentID(tmp_path):
 def test_agent_before_metsDocumentID(workspace_metsDocumentID):
     report = WorkspaceValidator.validate(Resolver(), mets_url=workspace_metsDocumentID.mets_target)
     assert report.is_valid
-    workspace_metsDocumentID.mets.add_agent('foo bar v0.0.1', 'OTHER', 'OTHER', 'OTHER')
+    workspace_metsDocumentID.mets.add_agent(name='foo bar v0.0.1', _type='OTHER', othertype='OTHER', role='OTHER')
     workspace_metsDocumentID.save_mets()
     report = WorkspaceValidator.validate(Resolver(), mets_url=workspace_metsDocumentID.mets_target)
     print(report.errors)
diff --git a/tests/utils/test_config.py b/tests/utils/test_config.py
index 99595a864c..a94eb5d3cc 100644
--- a/tests/utils/test_config.py
+++ b/tests/utils/test_config.py
@@ -57,3 +57,11 @@ def test_OCRD_PROFILE():
     with temp_env_var('OCRD_PROFILE', 'some other value'):
         with raises(ValueError, match="'OCRD_PROFILE' set to invalid value 'some other value'"):
             config.OCRD_PROFILE
+
+def test_defaults():
+    default = config.OCRD_MAX_PROCESSOR_CACHE
+    print(type(default))
+    config.OCRD_MAX_PROCESSOR_CACHE = 2
+    assert config.OCRD_MAX_PROCESSOR_CACHE == 2
+    config.reset_defaults()
+    assert config.OCRD_MAX_PROCESSOR_CACHE == default
diff --git a/tests/validator/test_json_validator.py b/tests/validator/test_json_validator.py
index 8a8387d4b6..d81c894f97 100644
--- a/tests/validator/test_json_validator.py
+++ b/tests/validator/test_json_validator.py
@@ -1,5 +1,5 @@
 from tests.base import TestCase, main
-from ocrd_validators.json_validator import JsonValidator, DefaultValidatingDraft6Validator
+from ocrd_validators.json_validator import JsonValidator, DefaultValidatingDraft20199Validator
 
 class TestParameterValidator(TestCase):
 
@@ -15,23 +15,23 @@ def setUp(self):
                 }
             }
         }
-        self.defaults_validator = JsonValidator(self.schema, DefaultValidatingDraft6Validator)
+        self.defaults_validator = JsonValidator(self.schema, DefaultValidatingDraft20199Validator)
         super().setUp()
 
     def test_validate_string(self):
         report = JsonValidator.validate('{}', {})
-        self.assertTrue(report.is_valid)
+        self.assertTrue(report.is_valid, str(report.to_xml()))
 
     def test_defaults_set(self):
         obj = {'bar': 2000}
         report = self.defaults_validator._validate(obj)
-        self.assertTrue(report.is_valid)
+        self.assertTrue(report.is_valid, str(report.to_xml()))
         self.assertEqual(obj, {'foo': 3000, 'bar': 2000})
 
     def test_properr(self):
         obj = {'bar': 100, 'quux': {}}
         report = self.defaults_validator._validate(obj)
-        self.assertFalse(report.is_valid)
+        self.assertFalse(report.is_valid, str(report.to_xml()))
         self.assertEqual(len(report.errors), 1)
 
 
diff --git a/tests/validator/test_ocrd_tool_validator.py b/tests/validator/test_ocrd_tool_validator.py
index 3ad40d8645..df19e8e64c 100644
--- a/tests/validator/test_ocrd_tool_validator.py
+++ b/tests/validator/test_ocrd_tool_validator.py
@@ -12,8 +12,8 @@
                 "ocrd-xyz": {
                     "executable": "ocrd-xyz",
                     "description": "bars all the foos",
-                    "input_file_grp": ["OCR-D-FOO"],
-                    "output_file_grp": ["OCR-D-BAR"],
+                    "input_file_grp_cardinality": 1,
+                    "output_file_grp_cardinality": 1,
                     "categories": ["Layout analysis"],
                     "steps": ["layout/analysis"]
                 }
@@ -29,7 +29,7 @@ def setUp(self):
 
     def test_smoke(self):
         report = OcrdToolValidator.validate(self.ocrd_tool)
-        self.assertEqual(report.is_valid, True)
+        self.assertTrue(report.is_valid, str(report.to_xml()))
 
     def test_additional_props(self):
         self.ocrd_tool['not-allowed'] = 'YUP'
@@ -48,7 +48,7 @@ def test_file_param_ok(self):
         ocrd_tool = json.loads(skeleton)
         ocrd_tool['tools']['ocrd-xyz']['parameters'] = {"file-param": {"description": "...", "type": "string", "content-type": 'application/rdf+xml'}}
         report = OcrdToolValidator.validate(ocrd_tool)
-        self.assertEqual(report.is_valid, True)
+        self.assertTrue(report.is_valid, str(report.to_xml()))
 
     # Not restricted anymore since spec 3.3.0
     #  def test_file_param_bad_content_types(self):
diff --git a/tests/validator/test_page_validator.py b/tests/validator/test_page_validator.py
index 79e92d90fa..e6aaff1523 100644
--- a/tests/validator/test_page_validator.py
+++ b/tests/validator/test_page_validator.py
@@ -16,9 +16,10 @@ def test_validate_err(self):
             PageValidator.validate(filename=FAULTY_GLYPH_PAGE_FILENAME, page_textequiv_strategy='best')
         # test with deprecated name
         with self.assertRaisesRegex(Exception, 'page_textequiv_strategy best not implemented'):
-            PageValidator.validate(filename=FAULTY_GLYPH_PAGE_FILENAME, strategy='best')
+            with self.assertWarnsRegex(DeprecationWarning, r'use page_textequiv_strategy'):
+                PageValidator.validate(filename=FAULTY_GLYPH_PAGE_FILENAME, strategy='best')
         with self.assertRaisesRegex(Exception, 'page_textequiv_consistency level superstrictest not implemented'):
-            PageValidator.validate(filename=FAULTY_GLYPH_PAGE_FILENAME, page_textequiv_consistency='superstrictest', strategy='first')
+            PageValidator.validate(filename=FAULTY_GLYPH_PAGE_FILENAME, page_textequiv_consistency='superstrictest', page_textequiv_strategy='first')
 
     def test_validate_filename(self):
         report = PageValidator.validate(filename=FAULTY_GLYPH_PAGE_FILENAME)
@@ -44,7 +45,7 @@ def test_validate_lax(self):
 
         report = PageValidator.validate(ocrd_page=ocrd_page)
         self.assertEqual(len([e for e in report.errors if isinstance(e, ConsistencyError)]), 26, '26 textequiv consistency errors - strict')
-        report = PageValidator.validate(ocrd_page=ocrd_page, strictness='lax')
+        report = PageValidator.validate(ocrd_page=ocrd_page, page_textequiv_consistency='lax')
         self.assertEqual(len([e for e in report.errors if isinstance(e, ConsistencyError)]), 1, '1 textequiv consistency errors - lax')
 
     def test_validate_multi_textequiv_first(self):
@@ -89,7 +90,7 @@ def test_fix(self):
         ocrd_page = parse(FAULTY_GLYPH_PAGE_FILENAME, silence=True)
         report = PageValidator.validate(ocrd_page=ocrd_page)
         self.assertEqual(len([e for e in report.errors if isinstance(e, ConsistencyError)]), 17, '17 textequiv consistency errors')
-        PageValidator.validate(ocrd_page=ocrd_page, strictness='fix')
+        PageValidator.validate(ocrd_page=ocrd_page, page_textequiv_consistency='fix')
         report = PageValidator.validate(ocrd_page=ocrd_page)
         self.assertEqual(len([e for e in report.errors if isinstance(e, ConsistencyError)]), 0, 'no more textequiv consistency errors')
 
diff --git a/tests/validator/test_parameter_validator.py b/tests/validator/test_parameter_validator.py
index f0d9d41d2c..297a149064 100644
--- a/tests/validator/test_parameter_validator.py
+++ b/tests/validator/test_parameter_validator.py
@@ -42,7 +42,7 @@ def test_default_assignment(self):
         })
         obj = {'baz': '23'}
         report = validator.validate(obj)
-        self.assertTrue(report.is_valid)
+        self.assertTrue(report.is_valid, str(report.to_xml()))
         self.assertEqual(obj, {'baz': '23', "num-param": 1})
 
 def test_min_max():
diff --git a/tests/validator/test_resource_list_validator.py b/tests/validator/test_resource_list_validator.py
index eb95d9b1ea..cc63c30ea7 100644
--- a/tests/validator/test_resource_list_validator.py
+++ b/tests/validator/test_resource_list_validator.py
@@ -22,8 +22,7 @@ def reslist():
 
 def test_resource_list_validator(reslist):
     report = OcrdResourceListValidator.validate(reslist)
-    print(report.errors)
-    assert report.is_valid == True
+    assert report.is_valid, str(report.to_xml())
 
 if __name__ == '__main__':
     main(__file__)
diff --git a/tests/validator/test_workspace_validator.py b/tests/validator/test_workspace_validator.py
index bc516d5a53..2e63bb5495 100644
--- a/tests/validator/test_workspace_validator.py
+++ b/tests/validator/test_workspace_validator.py
@@ -90,7 +90,7 @@ def test_validate_file_groups_non_ocrd(self):
             self.assertEqual(len(report.errors), 1)
             self.assertIn('No files', report.errors[0])
             self.assertEqual(len(report.notices), 1)
-            self.assertIn("USE does not begin with 'OCR-D-'", report.notices[0])
+            self.assertIn("fileGrp USE 'FOO' does not begin with 'OCR-D-'", report.notices[0])
 
     def test_validate_file_groups_unspecified(self):
         with TemporaryDirectory() as tempdir:
diff --git a/tests/validator/test_xsd_validator.py b/tests/validator/test_xsd_validator.py
index d0150338dd..50b3851ffc 100644
--- a/tests/validator/test_xsd_validator.py
+++ b/tests/validator/test_xsd_validator.py
@@ -37,22 +37,22 @@ def test_mets_empty(self):
     def test_validate_simple_protected_str(self):
         val = XsdValidator(XSD_METS_URL)
         report = val._validate(self.ws.mets.to_xml())
-        self.assertTrue(report.is_valid)
+        self.assertTrue(report.is_valid, str(report.to_xml()))
 
     def test_validate_simple_protected_doc(self):
         val = XsdValidator(XSD_METS_URL)
         report = val._validate(self.ws.mets._tree)
-        self.assertTrue(report.is_valid)
+        self.assertTrue(report.is_valid, str(report.to_xml()))
 
     def test_validate_simple_static_doc(self):
         report = XsdValidator.validate(XSD_METS_URL, self.ws.mets._tree)
-        self.assertTrue(report.is_valid)
+        self.assertTrue(report.is_valid, str(report.to_xml()))
 
 class TestXsdPageValidator(TestCase):
 
     def test_validate_page_simple_static_doc(self):
         report = XsdPageValidator.validate(simple_page)
-        self.assertTrue(report.is_valid)
+        self.assertTrue(report.is_valid, str(report.to_xml()))
 
 if __name__ == '__main__':
     main(__file__)