From 7dabe0046038f387254d193e81efc1cbc1122788 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 17 Jul 2024 12:55:09 +0200 Subject: [PATCH 01/11] load tool json locally --- src/ocrd_network/constants.py | 2 +- src/ocrd_network/processing_server.py | 8 ++++---- src/ocrd_network/utils.py | 14 +++++--------- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/ocrd_network/constants.py b/src/ocrd_network/constants.py index 53dbd9b11b..bfa137d9e0 100644 --- a/src/ocrd_network/constants.py +++ b/src/ocrd_network/constants.py @@ -6,7 +6,7 @@ DOCKER_RABBIT_MQ_FEATURES = "quorum_queue,implicit_default_bindings,classic_mirrored_queue_version" NETWORK_PROTOCOLS = ["http://", "https://"] -OCRD_ALL_JSON_TOOLS_URL = "https://ocr-d.de/js/ocrd-all-tool.json" +OCRD_ALL_TOOL_JSON = "ocrd-all-tool.json" # Used as a placeholder to lock all pages when no page_id is specified SERVER_ALL_PAGES_PLACEHOLDER = "all_pages" diff --git a/src/ocrd_network/processing_server.py b/src/ocrd_network/processing_server.py index dbbdea6475..e142802268 100644 --- a/src/ocrd_network/processing_server.py +++ b/src/ocrd_network/processing_server.py @@ -10,7 +10,7 @@ from ocrd.task_sequence import ProcessorTask from ocrd_utils import initLogging, getLogger -from .constants import AgentType, JobState, OCRD_ALL_JSON_TOOLS_URL, ServerApiTags +from .constants import AgentType, JobState, ServerApiTags from .database import ( initiate_database, db_get_processing_job, @@ -58,7 +58,7 @@ ) from .tcp_to_uds_mets_proxy import MetsServerProxy from .utils import ( - download_ocrd_all_tool_json, + load_ocrd_all_tool_json, expand_page_ids, generate_id, generate_workflow_content, @@ -90,8 +90,8 @@ def __init__(self, config_path: str, host: str, port: int) -> None: log_file = get_processing_server_logging_file_path(pid=getpid()) configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a") - self.log.info(f"Downloading ocrd all tool json") - self.ocrd_all_tool_json = download_ocrd_all_tool_json(ocrd_all_url=OCRD_ALL_JSON_TOOLS_URL) + self.log.info(f"Loading ocrd all tool json") + self.ocrd_all_tool_json = load_ocrd_all_tool_json() self.hostname = host self.port = port diff --git a/src/ocrd_network/utils.py b/src/ocrd_network/utils.py index e7a07fa9d9..e0f3570a47 100644 --- a/src/ocrd_network/utils.py +++ b/src/ocrd_network/utils.py @@ -3,6 +3,7 @@ from fastapi import UploadFile from functools import wraps from hashlib import md5 +from json import loads from pathlib import Path from re import compile as re_compile, split as re_split from requests import get as requests_get, Session as Session_TCP @@ -14,7 +15,8 @@ from ocrd.resolver import Resolver from ocrd.workspace import Workspace from ocrd.mets_server import MpxReq -from ocrd_utils import config, generate_range, REGEX_PREFIX, safe_filename, getLogger +from ocrd_utils import config, generate_range, REGEX_PREFIX, safe_filename, getLogger, resource_string +from .constants import OCRD_ALL_TOOL_JSON from .rabbitmq_utils import OcrdResultMessage @@ -92,14 +94,8 @@ def is_url_responsive(url: str, tries: int = 1, wait_time: int = 3) -> bool: return False -def download_ocrd_all_tool_json(ocrd_all_url: str): - if not ocrd_all_url: - raise ValueError(f"The URL of ocrd all tool json is empty") - headers = {"Accept": "application/json"} - response = Session_TCP().get(ocrd_all_url, headers=headers) - if not response.status_code == 200: - raise ValueError(f"Failed to download ocrd all tool json from: '{ocrd_all_url}'") - return response.json() +def load_ocrd_all_tool_json(): + return loads(resource_string('ocrd', OCRD_ALL_TOOL_JSON)) def post_to_callback_url(logger, callback_url: str, result_message: OcrdResultMessage): From 7382c46ff1eb6825c4e8ea20f1ae5638fece8a92 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 29 Jul 2024 13:05:24 +0200 Subject: [PATCH 02/11] download tool json if missing --- src/ocrd_network/constants.py | 1 + src/ocrd_network/utils.py | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/ocrd_network/constants.py b/src/ocrd_network/constants.py index bfa137d9e0..f3d2de1247 100644 --- a/src/ocrd_network/constants.py +++ b/src/ocrd_network/constants.py @@ -7,6 +7,7 @@ NETWORK_PROTOCOLS = ["http://", "https://"] OCRD_ALL_TOOL_JSON = "ocrd-all-tool.json" +OCRD_ALL_TOOL_JSON_URL = "https://ocr-d.de/js/ocrd-all-tool.json" # Used as a placeholder to lock all pages when no page_id is specified SERVER_ALL_PAGES_PLACEHOLDER = "all_pages" diff --git a/src/ocrd_network/utils.py b/src/ocrd_network/utils.py index e0f3570a47..56d35558ef 100644 --- a/src/ocrd_network/utils.py +++ b/src/ocrd_network/utils.py @@ -16,7 +16,7 @@ from ocrd.workspace import Workspace from ocrd.mets_server import MpxReq from ocrd_utils import config, generate_range, REGEX_PREFIX, safe_filename, getLogger, resource_string -from .constants import OCRD_ALL_TOOL_JSON +from .constants import OCRD_ALL_TOOL_JSON, OCRD_ALL_TOOL_JSON_URL from .rabbitmq_utils import OcrdResultMessage @@ -94,8 +94,17 @@ def is_url_responsive(url: str, tries: int = 1, wait_time: int = 3) -> bool: return False -def load_ocrd_all_tool_json(): - return loads(resource_string('ocrd', OCRD_ALL_TOOL_JSON)) +def load_ocrd_all_tool_json(download_if_missing: bool = True): + try: + ocrd_all_tool_json = loads(resource_string('ocrd', OCRD_ALL_TOOL_JSON)) + except Exception as error: + if not download_if_missing: + raise Exception(error) + response = Session_TCP().get(OCRD_ALL_TOOL_JSON_URL, headers={"Accept": "application/json"}) + if not response.status_code == 200: + raise ValueError(f"Failed to download ocrd all tool json from: '{OCRD_ALL_TOOL_JSON_URL}'") + ocrd_all_tool_json = response.json() + return ocrd_all_tool_json def post_to_callback_url(logger, callback_url: str, result_message: OcrdResultMessage): From 035b4ea526a1a88a0f75d7ef54f37f2f47c36a56 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 29 Jul 2024 13:16:22 +0200 Subject: [PATCH 03/11] add: default ocrd-all-tool.json --- src/ocrd/ocrd-all-tool.json | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 src/ocrd/ocrd-all-tool.json diff --git a/src/ocrd/ocrd-all-tool.json b/src/ocrd/ocrd-all-tool.json new file mode 100644 index 0000000000..fee8e7ef62 --- /dev/null +++ b/src/ocrd/ocrd-all-tool.json @@ -0,0 +1,21 @@ +{ + "ocrd-dummy": { + "executable": "ocrd-dummy", + "description": "Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group", + "steps": [ + "preprocessing/optimization" + ], + "categories": [ + "Image preprocessing" + ], + "input_file_grp": "DUMMY_INPUT", + "output_file_grp": "DUMMY_OUTPUT", + "parameters": { + "copy_files": { + "type": "boolean", + "default": false, + "description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)" + } + } + } +} \ No newline at end of file From ccfaf100b1ceb11d96a74b2f23bf45f2e0a9f446 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 29 Jul 2024 13:42:39 +0200 Subject: [PATCH 04/11] remove downloading tool json --- src/ocrd_network/utils.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/ocrd_network/utils.py b/src/ocrd_network/utils.py index 56d35558ef..babd576956 100644 --- a/src/ocrd_network/utils.py +++ b/src/ocrd_network/utils.py @@ -94,16 +94,11 @@ def is_url_responsive(url: str, tries: int = 1, wait_time: int = 3) -> bool: return False -def load_ocrd_all_tool_json(download_if_missing: bool = True): +def load_ocrd_all_tool_json(): try: ocrd_all_tool_json = loads(resource_string('ocrd', OCRD_ALL_TOOL_JSON)) except Exception as error: - if not download_if_missing: - raise Exception(error) - response = Session_TCP().get(OCRD_ALL_TOOL_JSON_URL, headers={"Accept": "application/json"}) - if not response.status_code == 200: - raise ValueError(f"Failed to download ocrd all tool json from: '{OCRD_ALL_TOOL_JSON_URL}'") - ocrd_all_tool_json = response.json() + raise ValueError(f"Failed to load ocrd all tool json from: '{OCRD_ALL_TOOL_JSON}', {error}") return ocrd_all_tool_json From 1af0cc186ae373beccd320355789064978d22a78 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 29 Jul 2024 13:47:38 +0200 Subject: [PATCH 05/11] set: paramiko logging to ERROR --- src/ocrd_utils/ocrd_logging.conf | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/ocrd_utils/ocrd_logging.conf b/src/ocrd_utils/ocrd_logging.conf index 93e311a882..bc477d9a06 100644 --- a/src/ocrd_utils/ocrd_logging.conf +++ b/src/ocrd_utils/ocrd_logging.conf @@ -11,7 +11,7 @@ # each logger requires a corresponding configuration section below # [loggers] -keys=root,ocrd,ocrd_network,ocrd_tensorflow,ocrd_shapely_geos,ocrd_PIL,uvicorn,uvicorn_access,uvicorn_error,multipart +keys=root,ocrd,ocrd_network,ocrd_tensorflow,ocrd_shapely_geos,ocrd_PIL,uvicorn,uvicorn_access,uvicorn_error,multipart,paramiko,paramiko_transport # # mandatory handlers section @@ -91,6 +91,19 @@ level=INFO handlers=consoleHandler qualname=PIL +# +# paramiko loggers +# +[logger_paramiko] +level=ERROR +handlers=consoleHandler +qualname=paramiko + +[logger_paramiko_transport] +level=ERROR +handlers=consoleHandler +qualname=paramiko.transport + # # uvicorn loggers # From be133ea0c0f6b17d5079681f5665038f10dc4c1b Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 29 Jul 2024 13:51:45 +0200 Subject: [PATCH 06/11] set: propagate 0, logging config --- src/ocrd_utils/ocrd_logging.conf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ocrd_utils/ocrd_logging.conf b/src/ocrd_utils/ocrd_logging.conf index bc477d9a06..60925acae8 100644 --- a/src/ocrd_utils/ocrd_logging.conf +++ b/src/ocrd_utils/ocrd_logging.conf @@ -98,11 +98,13 @@ qualname=PIL level=ERROR handlers=consoleHandler qualname=paramiko +propagate=0 [logger_paramiko_transport] level=ERROR handlers=consoleHandler qualname=paramiko.transport +propagate=0 # # uvicorn loggers From 379f3a47313c57f0c3a9730b94ee23fb207f5f9f Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 29 Jul 2024 15:35:39 +0200 Subject: [PATCH 07/11] fix: supress paramiko warnings --- requirements.txt | 2 +- requirements_test.txt | 1 + src/ocrd_network/runtime_data/connection_clients.py | 8 +++++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index feb18104ac..f748a06057 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ atomicwrites >= 1.3.0 beanie~=1.7 click >=7 +cryptography >= 43.0.0 Deprecated == 1.2.0 docker fastapi>=0.78.0 @@ -32,4 +33,3 @@ requests_unixsocket2 shapely uvicorn uvicorn>=0.17.6 - diff --git a/requirements_test.txt b/requirements_test.txt index 0f0e5b97d4..be2ba65bca 100644 --- a/requirements_test.txt +++ b/requirements_test.txt @@ -1,4 +1,5 @@ autopep8 +cryptography >= 43.0.0 pytest >= 4.0.0 generateDS == 2.35.20 pytest-benchmark >= 3.2.3 diff --git a/src/ocrd_network/runtime_data/connection_clients.py b/src/ocrd_network/runtime_data/connection_clients.py index 67002a498f..ab2e48b144 100644 --- a/src/ocrd_network/runtime_data/connection_clients.py +++ b/src/ocrd_network/runtime_data/connection_clients.py @@ -1,7 +1,13 @@ from __future__ import annotations from docker import APIClient, DockerClient from docker.transport import SSHHTTPAdapter -from paramiko import AutoAddPolicy, SSHClient + +# TODO: A workaround to supress the annoying paramiko +# warnings which fail bash lib tests - core #1260 +from warnings import catch_warnings +from cryptography.utils import CryptographyDeprecationWarning +with catch_warnings(action="ignore", category=CryptographyDeprecationWarning): + from paramiko import AutoAddPolicy, SSHClient class CustomDockerClient(DockerClient): From 64d5abb195f6d95743c71059b17f116d2db989e9 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 29 Jul 2024 15:36:23 +0200 Subject: [PATCH 08/11] set paramiko logging to INFO --- src/ocrd_utils/ocrd_logging.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ocrd_utils/ocrd_logging.conf b/src/ocrd_utils/ocrd_logging.conf index 60925acae8..5cf161398e 100644 --- a/src/ocrd_utils/ocrd_logging.conf +++ b/src/ocrd_utils/ocrd_logging.conf @@ -95,13 +95,13 @@ qualname=PIL # paramiko loggers # [logger_paramiko] -level=ERROR +level=INFO handlers=consoleHandler qualname=paramiko propagate=0 [logger_paramiko_transport] -level=ERROR +level=INFO handlers=consoleHandler qualname=paramiko.transport propagate=0 From 52a099e90c1ba759dc88fe96ed41b09e4a9e3f6a Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 29 Jul 2024 15:49:32 +0200 Subject: [PATCH 09/11] revert, and just use < v43.0.0 --- requirements.txt | 2 +- requirements_test.txt | 2 +- src/ocrd_network/runtime_data/connection_clients.py | 8 +------- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index f748a06057..ed5fd56d59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ atomicwrites >= 1.3.0 beanie~=1.7 click >=7 -cryptography >= 43.0.0 +cryptography < 43.0.0 Deprecated == 1.2.0 docker fastapi>=0.78.0 diff --git a/requirements_test.txt b/requirements_test.txt index be2ba65bca..d8cef1dae7 100644 --- a/requirements_test.txt +++ b/requirements_test.txt @@ -1,5 +1,5 @@ autopep8 -cryptography >= 43.0.0 +cryptography < 43.0.0 pytest >= 4.0.0 generateDS == 2.35.20 pytest-benchmark >= 3.2.3 diff --git a/src/ocrd_network/runtime_data/connection_clients.py b/src/ocrd_network/runtime_data/connection_clients.py index ab2e48b144..67002a498f 100644 --- a/src/ocrd_network/runtime_data/connection_clients.py +++ b/src/ocrd_network/runtime_data/connection_clients.py @@ -1,13 +1,7 @@ from __future__ import annotations from docker import APIClient, DockerClient from docker.transport import SSHHTTPAdapter - -# TODO: A workaround to supress the annoying paramiko -# warnings which fail bash lib tests - core #1260 -from warnings import catch_warnings -from cryptography.utils import CryptographyDeprecationWarning -with catch_warnings(action="ignore", category=CryptographyDeprecationWarning): - from paramiko import AutoAddPolicy, SSHClient +from paramiko import AutoAddPolicy, SSHClient class CustomDockerClient(DockerClient): From c7e380014d12c36b39b7043246005dcc6d86009c Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Tue, 30 Jul 2024 15:02:46 +0200 Subject: [PATCH 10/11] remove OCRD_ALL_TOOL_JSON_URL Co-authored-by: Konstantin Baierer --- src/ocrd_network/constants.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ocrd_network/constants.py b/src/ocrd_network/constants.py index f3d2de1247..bfa137d9e0 100644 --- a/src/ocrd_network/constants.py +++ b/src/ocrd_network/constants.py @@ -7,7 +7,6 @@ NETWORK_PROTOCOLS = ["http://", "https://"] OCRD_ALL_TOOL_JSON = "ocrd-all-tool.json" -OCRD_ALL_TOOL_JSON_URL = "https://ocr-d.de/js/ocrd-all-tool.json" # Used as a placeholder to lock all pages when no page_id is specified SERVER_ALL_PAGES_PLACEHOLDER = "all_pages" From 3d47640fb7e5a078e1a042640c9966bb437404f3 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Tue, 30 Jul 2024 15:08:58 +0200 Subject: [PATCH 11/11] remove: OCRD_ALL_TOOL_JSON_URL import --- src/ocrd_network/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ocrd_network/utils.py b/src/ocrd_network/utils.py index babd576956..a2f563de43 100644 --- a/src/ocrd_network/utils.py +++ b/src/ocrd_network/utils.py @@ -16,7 +16,7 @@ from ocrd.workspace import Workspace from ocrd.mets_server import MpxReq from ocrd_utils import config, generate_range, REGEX_PREFIX, safe_filename, getLogger, resource_string -from .constants import OCRD_ALL_TOOL_JSON, OCRD_ALL_TOOL_JSON_URL +from .constants import OCRD_ALL_TOOL_JSON from .rabbitmq_utils import OcrdResultMessage