Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: 1257 - Load ocrd tool json locally #1260

Merged
merged 12 commits into from
Aug 1, 2024
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
atomicwrites >= 1.3.0
beanie~=1.7
click >=7
cryptography < 43.0.0
Deprecated == 1.2.0
docker
fastapi>=0.78.0
Expand Down Expand Up @@ -32,4 +33,3 @@ requests_unixsocket2
shapely
uvicorn
uvicorn>=0.17.6

1 change: 1 addition & 0 deletions requirements_test.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
autopep8
cryptography < 43.0.0
pytest >= 4.0.0
generateDS == 2.35.20
pytest-benchmark >= 3.2.3
Expand Down
21 changes: 21 additions & 0 deletions src/ocrd/ocrd-all-tool.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"ocrd-dummy": {
"executable": "ocrd-dummy",
"description": "Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group",
"steps": [
"preprocessing/optimization"
],
"categories": [
"Image preprocessing"
],
"input_file_grp": "DUMMY_INPUT",
"output_file_grp": "DUMMY_OUTPUT",
"parameters": {
"copy_files": {
"type": "boolean",
"default": false,
"description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)"
}
}
}
}
3 changes: 2 additions & 1 deletion src/ocrd_network/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
DOCKER_RABBIT_MQ_FEATURES = "quorum_queue,implicit_default_bindings,classic_mirrored_queue_version"

NETWORK_PROTOCOLS = ["http://", "https://"]
OCRD_ALL_JSON_TOOLS_URL = "https://ocr-d.de/js/ocrd-all-tool.json"
OCRD_ALL_TOOL_JSON = "ocrd-all-tool.json"
OCRD_ALL_TOOL_JSON_URL = "https://ocr-d.de/js/ocrd-all-tool.json"
MehmedGIT marked this conversation as resolved.
Show resolved Hide resolved
# Used as a placeholder to lock all pages when no page_id is specified
SERVER_ALL_PAGES_PLACEHOLDER = "all_pages"

Expand Down
8 changes: 4 additions & 4 deletions src/ocrd_network/processing_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from ocrd.task_sequence import ProcessorTask
from ocrd_utils import initLogging, getLogger
from .constants import AgentType, JobState, OCRD_ALL_JSON_TOOLS_URL, ServerApiTags
from .constants import AgentType, JobState, ServerApiTags
from .database import (
initiate_database,
db_get_processing_job,
Expand Down Expand Up @@ -58,7 +58,7 @@
)
from .tcp_to_uds_mets_proxy import MetsServerProxy
from .utils import (
download_ocrd_all_tool_json,
load_ocrd_all_tool_json,
expand_page_ids,
generate_id,
generate_workflow_content,
Expand Down Expand Up @@ -90,8 +90,8 @@ def __init__(self, config_path: str, host: str, port: int) -> None:
log_file = get_processing_server_logging_file_path(pid=getpid())
configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")

self.log.info(f"Downloading ocrd all tool json")
self.ocrd_all_tool_json = download_ocrd_all_tool_json(ocrd_all_url=OCRD_ALL_JSON_TOOLS_URL)
self.log.info(f"Loading ocrd all tool json")
self.ocrd_all_tool_json = load_ocrd_all_tool_json()
self.hostname = host
self.port = port

Expand Down
18 changes: 9 additions & 9 deletions src/ocrd_network/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from fastapi import UploadFile
from functools import wraps
from hashlib import md5
from json import loads
from pathlib import Path
from re import compile as re_compile, split as re_split
from requests import get as requests_get, Session as Session_TCP
Expand All @@ -14,7 +15,8 @@
from ocrd.resolver import Resolver
from ocrd.workspace import Workspace
from ocrd.mets_server import MpxReq
from ocrd_utils import config, generate_range, REGEX_PREFIX, safe_filename, getLogger
from ocrd_utils import config, generate_range, REGEX_PREFIX, safe_filename, getLogger, resource_string
from .constants import OCRD_ALL_TOOL_JSON, OCRD_ALL_TOOL_JSON_URL
from .rabbitmq_utils import OcrdResultMessage


Expand Down Expand Up @@ -92,14 +94,12 @@ def is_url_responsive(url: str, tries: int = 1, wait_time: int = 3) -> bool:
return False


def download_ocrd_all_tool_json(ocrd_all_url: str):
if not ocrd_all_url:
raise ValueError(f"The URL of ocrd all tool json is empty")
headers = {"Accept": "application/json"}
response = Session_TCP().get(ocrd_all_url, headers=headers)
if not response.status_code == 200:
raise ValueError(f"Failed to download ocrd all tool json from: '{ocrd_all_url}'")
return response.json()
def load_ocrd_all_tool_json():
try:
ocrd_all_tool_json = loads(resource_string('ocrd', OCRD_ALL_TOOL_JSON))
except Exception as error:
raise ValueError(f"Failed to load ocrd all tool json from: '{OCRD_ALL_TOOL_JSON}', {error}")
return ocrd_all_tool_json


def post_to_callback_url(logger, callback_url: str, result_message: OcrdResultMessage):
Expand Down
17 changes: 16 additions & 1 deletion src/ocrd_utils/ocrd_logging.conf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# each logger requires a corresponding configuration section below
#
[loggers]
keys=root,ocrd,ocrd_network,ocrd_tensorflow,ocrd_shapely_geos,ocrd_PIL,uvicorn,uvicorn_access,uvicorn_error,multipart
keys=root,ocrd,ocrd_network,ocrd_tensorflow,ocrd_shapely_geos,ocrd_PIL,uvicorn,uvicorn_access,uvicorn_error,multipart,paramiko,paramiko_transport

#
# mandatory handlers section
Expand Down Expand Up @@ -91,6 +91,21 @@ level=INFO
handlers=consoleHandler
qualname=PIL

#
# paramiko loggers
#
[logger_paramiko]
level=INFO
handlers=consoleHandler
qualname=paramiko
propagate=0

[logger_paramiko_transport]
level=INFO
handlers=consoleHandler
qualname=paramiko.transport
propagate=0

#
# uvicorn loggers
#
Expand Down
Loading