From eefcd1ec5cdd3adad1326d37cd3cbf0a79fe9e07 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 24 Jan 2024 15:46:57 +0100 Subject: [PATCH] replace pkg_resources with importlib --- Makefile | 3 +- requirements.txt | 12 ++++---- src/ocrd/cli/workspace.py | 4 +-- src/ocrd/constants.py | 6 ++-- src/ocrd/processor/base.py | 7 ++--- src/ocrd/processor/builtin/dummy_processor.py | 6 ++-- src/ocrd/workspace_bagger.py | 6 ++-- src/ocrd_models/constants.py | 5 ++-- src/ocrd_models/ocrd_mets.py | 2 +- src/ocrd_utils/__init__.py | 5 +++- src/ocrd_utils/constants.py | 4 +-- src/ocrd_utils/introspect.py | 28 +++++++++++++++++++ src/ocrd_validators/constants.py | 16 +++++------ tests/cli/test_bashlib.py | 1 - tests/test_resource_manager.py | 9 +++--- tests/validator/test_xsd_validator.py | 2 +- 16 files changed, 74 insertions(+), 42 deletions(-) diff --git a/Makefile b/Makefile index f863684d0d..cefcec2461 100644 --- a/Makefile +++ b/Makefile @@ -129,7 +129,7 @@ build: # (Re)install the tool install: #build # not stricttly necessary but a precaution against outdated python build tools, https://github.com/OCR-D/core/pull/1166 - $(PIP) install -U pip wheel setuptools + $(PIP) install -U pip wheel $(PIP_INSTALL) . $(PIP_INSTALL_CONFIG_OPTION) @# workaround for shapely#1598 $(PIP) config set global.no-binary shapely @@ -271,6 +271,7 @@ pyclean: rm -rf ./dist rm -rf htmlcov rm -rf .benchmarks + rm -rf **/*.egg-info rm -f **/*.pyc -find . -name '__pycache__' -exec rm -rf '{}' \; rm -rf .pytest_cache diff --git a/requirements.txt b/requirements.txt index 9a9b217218..a8ce91ba45 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,24 @@ atomicwrites >= 1.3.0 -bagit >= 1.7.0 -bagit_profile >= 1.3.0 beanie~=1.7 click >=7 Deprecated == 1.2.0 docker -fastapi fastapi>=0.78.0 filetype Flask frozendict>=2.3.4 gdown httpx>=0.22.0 +importlib_metadata ; python_version < '3.8' +importlib_resources ; python_version < '3.9' jsonschema lxml memory-profiler >= 0.58.0 +# XXX explicitly do not restrict the numpy version because different +# tensorflow versions might require different versions numpy +ocrd-fork-bagit >= 1.8.1.post2 +ocrd-fork-bagit_profile >= 1.3.0.post1 opencv-python-headless paramiko pika>=1.2.0 @@ -28,7 +31,6 @@ requests < 2.30 requests_unixsocket shapely sparklines >= 0.4.2 -# tensorflow versions might require different versions uvicorn uvicorn>=0.17.6 -# XXX explicitly do not restrict the numpy version because different + diff --git a/src/ocrd/cli/workspace.py b/src/ocrd/cli/workspace.py index 318c1e971d..d77e981dd4 100644 --- a/src/ocrd/cli/workspace.py +++ b/src/ocrd/cli/workspace.py @@ -286,14 +286,14 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi \b Examples: ocrd workspace bulk-add \\ - --regex '(?P[^/]+)/page_(?P.*)\.[^.]+' \\ + --regex '(?P[^/]+)/page_(?P.*)\\.[^.]+' \\ --page-id 'PHYS_{{ pageid }}' \\ --file-grp "{{ fileGrp }}" \\ path/to/files/*/*.* \b echo "path/to/src/file.xml SEG/page_p0001.xml" \\ | ocrd workspace bulk-add \\ - --regex '(?P.*?) (?P.+?)/page_(?P.*)\.(?P[^\.]*)' \\ + --regex '(?P.*?) (?P.+?)/page_(?P.*)\\.(?P[^\\.]*)' \\ --file-id 'FILE_{{ fileGrp }}_{{ pageid }}' \\ --page-id 'PHYS_{{ pageid }}' \\ --file-grp "{{ fileGrp }}" \\ diff --git a/src/ocrd/constants.py b/src/ocrd/constants.py index 1d436a7fa9..796aff1895 100644 --- a/src/ocrd/constants.py +++ b/src/ocrd/constants.py @@ -1,7 +1,7 @@ """ Constants for ocrd. """ -from pkg_resources import resource_filename +from ocrd_utils import resource_filename __all__ = [ 'TMP_PREFIX', @@ -18,7 +18,7 @@ DEFAULT_UPLOAD_FOLDER = '/tmp/uploads-ocrd-core' DOWNLOAD_DIR = '/tmp/ocrd-core-downloads' DEFAULT_REPOSITORY_URL = 'http://localhost:5000/' -BASHLIB_FILENAME = resource_filename(__name__, 'lib.bash') -RESOURCE_LIST_FILENAME = resource_filename(__name__, 'resource_list.yml') +BASHLIB_FILENAME = resource_filename(__package__, 'lib.bash') +RESOURCE_LIST_FILENAME = resource_filename(__package__, 'resource_list.yml') RESOURCE_USER_LIST_COMMENT = "# OCR-D private resource list (consider sending a PR with your own resources to OCR-D/core)" BACKUP_DIR = '.backup' diff --git a/src/ocrd/processor/base.py b/src/ocrd/processor/base.py index 38b7848a03..6107688bc2 100644 --- a/src/ocrd/processor/base.py +++ b/src/ocrd/processor/base.py @@ -9,8 +9,6 @@ 'run_processor' ] -from warnings import warn -from pkg_resources import resource_filename from os.path import exists from shutil import copyfileobj import json @@ -30,7 +28,8 @@ list_resource_candidates, pushd_popd, list_all_resources, - get_processor_resource_types + get_processor_resource_types, + resource_filename, ) from ocrd_validators import ParameterValidator from ocrd_models.ocrd_page import MetadataItemType, LabelType, LabelsType @@ -266,7 +265,7 @@ def moduledir(self): """ The filesystem path of the module directory. """ - return resource_filename(self.module, '') + return resource_filename(self.module, '.') @property def input_files(self): diff --git a/src/ocrd/processor/builtin/dummy_processor.py b/src/ocrd/processor/builtin/dummy_processor.py index c0371e2d0e..fcb24af6ec 100644 --- a/src/ocrd/processor/builtin/dummy_processor.py +++ b/src/ocrd/processor/builtin/dummy_processor.py @@ -1,6 +1,5 @@ # pylint: disable=missing-module-docstring,invalid-name from os.path import join, basename -from pkg_resources import resource_string import click @@ -13,11 +12,12 @@ make_file_id, MIME_TO_EXT, MIMETYPE_PAGE, - parse_json_string_with_comments + parse_json_string_with_comments, + resource_string ) from ocrd_modelfactory import page_from_file -OCRD_TOOL = parse_json_string_with_comments(resource_string(__name__, 'dummy/ocrd-tool.json').decode('utf8')) +OCRD_TOOL = parse_json_string_with_comments(resource_string(__package__ + '.dummy', 'ocrd-tool.json')) class DummyProcessor(Processor): """ diff --git a/src/ocrd/workspace_bagger.py b/src/ocrd/workspace_bagger.py index f838a65894..5c10103ce5 100644 --- a/src/ocrd/workspace_bagger.py +++ b/src/ocrd/workspace_bagger.py @@ -9,7 +9,6 @@ import sys from bagit import Bag, make_manifests, _load_tag_file, _make_tag_file, _make_tagmanifest_file # pylint: disable=no-name-in-module from distutils.dir_util import copy_tree -from pkg_resources import get_distribution from ocrd_utils import ( pushd_popd, @@ -19,6 +18,7 @@ DEFAULT_METS_BASENAME, MIMETYPE_PAGE, VERSION, + dist_version, ) from ocrd_validators.constants import BAGIT_TXT, TMP_BAGIT_PREFIX, OCRD_BAGIT_PROFILE_URL from ocrd_modelfactory import page_from_file @@ -117,8 +117,8 @@ def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ocrd_bas bag.info['BagIt-Profile-Identifier'] = OCRD_BAGIT_PROFILE_URL bag.info['Bag-Software-Agent'] = 'ocrd/core %s (bagit.py %s, bagit_profile %s) [cmdline: "%s"]' % ( VERSION, # TODO - get_distribution('bagit').version, - get_distribution('bagit_profile').version, + dist_version('ocrd-fork-bagit'), + dist_version('ocrd-fork-bagit_profile'), ' '.join(sys.argv)) bag.info['Ocrd-Identifier'] = ocrd_identifier diff --git a/src/ocrd_models/constants.py b/src/ocrd_models/constants.py index 6c8b0e1017..01068b7f34 100644 --- a/src/ocrd_models/constants.py +++ b/src/ocrd_models/constants.py @@ -1,8 +1,7 @@ """ Constants for ocrd_models. """ -from pkg_resources import resource_string -import re +from ocrd_utils import resource_string __all__ = [ 'IDENTIFIER_PRIORITY', @@ -32,7 +31,7 @@ IDENTIFIER_PRIORITY = ['purl', 'urn', 'doi', 'url'] -METS_XML_EMPTY = resource_string(__name__, 'mets-empty.xml') +METS_XML_EMPTY = resource_string(__package__, 'mets-empty.xml') NAMESPACES = { 'mets': "http://www.loc.gov/METS/", diff --git a/src/ocrd_models/ocrd_mets.py b/src/ocrd_models/ocrd_mets.py index 3319f8f6ff..ae4d75e290 100644 --- a/src/ocrd_models/ocrd_mets.py +++ b/src/ocrd_models/ocrd_mets.py @@ -52,7 +52,7 @@ def empty_mets(now=None, cache_flag=False): """ if not now: now = datetime.now().isoformat() - tpl = METS_XML_EMPTY.decode('utf-8') + tpl = METS_XML_EMPTY tpl = tpl.replace('{{ VERSION }}', VERSION) tpl = tpl.replace('{{ NOW }}', '%s' % now) return OcrdMets(content=tpl.encode('utf-8'), cache_flag=cache_flag) diff --git a/src/ocrd_utils/__init__.py b/src/ocrd_utils/__init__.py index 90cd554779..2348265c90 100644 --- a/src/ocrd_utils/__init__.py +++ b/src/ocrd_utils/__init__.py @@ -158,7 +158,10 @@ from .introspect import ( freeze_args, set_json_key_value_overrides, - membername + membername, + resource_filename, + resource_string, + dist_version ) from .logging import ( diff --git a/src/ocrd_utils/constants.py b/src/ocrd_utils/constants.py index 0b9f0ae02b..ca2d65d481 100644 --- a/src/ocrd_utils/constants.py +++ b/src/ocrd_utils/constants.py @@ -1,7 +1,7 @@ """ Constants for ocrd_utils. """ -from pkg_resources import get_distribution +from .introspect import dist_version from re import compile as regex_compile __all__ = [ @@ -18,7 +18,7 @@ 'VERSION', ] -VERSION = get_distribution('ocrd').version +VERSION = dist_version('ocrd') MIMETYPE_PAGE = 'application/vnd.prima.page+xml' diff --git a/src/ocrd_utils/introspect.py b/src/ocrd_utils/introspect.py index cfd3d32b52..a86b9b8984 100644 --- a/src/ocrd_utils/introspect.py +++ b/src/ocrd_utils/introspect.py @@ -3,8 +3,25 @@ """ import json from functools import wraps +from pathlib import Path from frozendict import frozendict +import atexit +from contextlib import ExitStack +# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files +import sys +if sys.version_info < (3, 9): + import importlib_resources +else: + import importlib.resources as importlib_resources + +if sys.version_info < (3, 8): + import importlib_metadata +else: + import importlib.metadata as importlib_metadata + +file_manager = ExitStack() +atexit.register(file_manager.close) # Taken from https://github.com/OCR-D/core/pull/884 def freeze_args(func): @@ -32,3 +49,14 @@ def set_json_key_value_overrides(obj, *kvpairs): except json.decoder.JSONDecodeError: obj[k] = v return obj + +def resource_filename(pkg : str, fname : str) -> Path: + ref = importlib_resources.files(pkg) / fname + return file_manager.enter_context(importlib_resources.as_file(ref)) + +def resource_string(pkg : str, fname : str) -> str: + with open(resource_filename(pkg, fname), 'r', encoding='utf-8') as f: + return f.read() + +def dist_version(module : str) -> str: + return importlib_metadata.version(module) diff --git a/src/ocrd_validators/constants.py b/src/ocrd_validators/constants.py index fc1ff445ae..b8d145bbba 100644 --- a/src/ocrd_validators/constants.py +++ b/src/ocrd_validators/constants.py @@ -2,7 +2,7 @@ Constants for ocrd_validators. """ import yaml -from pkg_resources import resource_string, resource_filename +from ocrd_utils import resource_string, resource_filename __all__ = [ 'PROCESSING_SERVER_CONFIG_SCHEMA', @@ -21,10 +21,10 @@ 'XSD_PATHS', ] -PROCESSING_SERVER_CONFIG_SCHEMA = yaml.safe_load(resource_string(__name__, 'processing_server_config.schema.yml')) -MESSAGE_SCHEMA_PROCESSING = yaml.safe_load(resource_string(__name__, 'message_processing.schema.yml')) -MESSAGE_SCHEMA_RESULT = yaml.safe_load(resource_string(__name__, 'message_result.schema.yml')) -OCRD_TOOL_SCHEMA = yaml.safe_load(resource_string(__name__, 'ocrd_tool.schema.yml')) +PROCESSING_SERVER_CONFIG_SCHEMA = yaml.safe_load(resource_string(__package__, 'processing_server_config.schema.yml')) +MESSAGE_SCHEMA_PROCESSING = yaml.safe_load(resource_string(__package__, 'message_processing.schema.yml')) +MESSAGE_SCHEMA_RESULT = yaml.safe_load(resource_string(__package__, 'message_result.schema.yml')) +OCRD_TOOL_SCHEMA = yaml.safe_load(resource_string(__package__, 'ocrd_tool.schema.yml')) RESOURCE_LIST_SCHEMA = { 'type': 'object', 'additionalProperties': False, @@ -32,7 +32,7 @@ '^ocrd-.*': OCRD_TOOL_SCHEMA['properties']['tools']['patternProperties']['ocrd-.*']['properties']['resources'] } } -OCRD_BAGIT_PROFILE = yaml.safe_load(resource_string(__name__, 'bagit-profile.yml')) +OCRD_BAGIT_PROFILE = yaml.safe_load(resource_string(__package__, 'bagit-profile.yml')) BAGIT_TXT = 'BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8' FILE_GROUP_PREFIX = 'OCR-D-' @@ -42,5 +42,5 @@ XSD_METS_URL = 'https://www.loc.gov/standards/mets/mets.xsd' XSD_PAGE_URL = 'http://www.primaresearch.org/schema/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd' XSD_PATHS = {} -XSD_PATHS[XSD_METS_URL] = resource_filename(__name__, 'mets.xsd') -XSD_PATHS[XSD_PAGE_URL] = resource_filename(__name__, 'page.xsd') +XSD_PATHS[XSD_METS_URL] = resource_filename(__package__, 'mets.xsd') +XSD_PATHS[XSD_PAGE_URL] = resource_filename(__package__, 'page.xsd') diff --git a/tests/cli/test_bashlib.py b/tests/cli/test_bashlib.py index 8a6e40b029..74a623d1b6 100644 --- a/tests/cli/test_bashlib.py +++ b/tests/cli/test_bashlib.py @@ -1,6 +1,5 @@ from tests.base import CapturingTestCase as TestCase, main, assets, copy_of_directory -from pkg_resources import parse_version import os, sys import traceback import subprocess diff --git a/tests/test_resource_manager.py b/tests/test_resource_manager.py index fb6db3adc4..221b0a3af1 100644 --- a/tests/test_resource_manager.py +++ b/tests/test_resource_manager.py @@ -21,7 +21,8 @@ def test_resources_manager_config_default(monkeypatch, tmp_path): # arrange monkeypatch.setenv('HOME', str(tmp_path)) - monkeypatch.delenv('XDG_CONFIG_HOME', raising=False) + if 'XDG_CONFIG_HOME' in os.environ: + monkeypatch.delenv('XDG_CONFIG_HOME', raising=False) # act mgr = OcrdResourceManager() @@ -48,9 +49,9 @@ def test_resources_manager_config_default(monkeypatch, tmp_path): def test_resources_manager_from_environment(tmp_path, monkeypatch): # arrange - monkeypatch.setenv('XDG_CONFIG_HOME', tmp_path) - monkeypatch.setenv('XDG_DATA_HOME', tmp_path) - monkeypatch.setenv('HOME', tmp_path) + monkeypatch.setenv('XDG_CONFIG_HOME', str(tmp_path)) + monkeypatch.setenv('XDG_DATA_HOME', str(tmp_path)) + monkeypatch.setenv('HOME', str(tmp_path)) # act mgr = OcrdResourceManager() diff --git a/tests/validator/test_xsd_validator.py b/tests/validator/test_xsd_validator.py index 4d4deadc15..d0150338dd 100644 --- a/tests/validator/test_xsd_validator.py +++ b/tests/validator/test_xsd_validator.py @@ -25,7 +25,7 @@ def test_constructor(self): def test_mets_empty(self): with TemporaryDirectory() as tempdir: mets_path = Path(tempdir, 'mets.xml') - mets_path.write_bytes(METS_XML_EMPTY) + mets_path.write_text(METS_XML_EMPTY) report = XsdMetsValidator.validate(mets_path) self.assertEqual(len(report.errors), 2) self.assertEqual(report.errors[0],