Skip to content

Commit

Permalink
replace pkg_resources with importlib
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Jan 24, 2024
1 parent 6178082 commit eefcd1e
Show file tree
Hide file tree
Showing 16 changed files with 74 additions and 42 deletions.
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ build:
# (Re)install the tool
install: #build
# not stricttly necessary but a precaution against outdated python build tools, https://github.com/OCR-D/core/pull/1166
$(PIP) install -U pip wheel setuptools
$(PIP) install -U pip wheel
$(PIP_INSTALL) . $(PIP_INSTALL_CONFIG_OPTION)
@# workaround for shapely#1598
$(PIP) config set global.no-binary shapely
Expand Down Expand Up @@ -271,6 +271,7 @@ pyclean:
rm -rf ./dist
rm -rf htmlcov
rm -rf .benchmarks
rm -rf **/*.egg-info
rm -f **/*.pyc
-find . -name '__pycache__' -exec rm -rf '{}' \;
rm -rf .pytest_cache
Expand Down
12 changes: 7 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
atomicwrites >= 1.3.0
bagit >= 1.7.0
bagit_profile >= 1.3.0
beanie~=1.7
click >=7
Deprecated == 1.2.0
docker
fastapi
fastapi>=0.78.0
filetype
Flask
frozendict>=2.3.4
gdown
httpx>=0.22.0
importlib_metadata ; python_version < '3.8'
importlib_resources ; python_version < '3.9'
jsonschema
lxml
memory-profiler >= 0.58.0
# XXX explicitly do not restrict the numpy version because different
# tensorflow versions might require different versions
numpy
ocrd-fork-bagit >= 1.8.1.post2
ocrd-fork-bagit_profile >= 1.3.0.post1
opencv-python-headless
paramiko
pika>=1.2.0
Expand All @@ -28,7 +31,6 @@ requests < 2.30
requests_unixsocket
shapely
sparklines >= 0.4.2
# tensorflow versions might require different versions
uvicorn
uvicorn>=0.17.6
# XXX explicitly do not restrict the numpy version because different

4 changes: 2 additions & 2 deletions src/ocrd/cli/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,14 +286,14 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
\b
Examples:
ocrd workspace bulk-add \\
--regex '(?P<fileGrp>[^/]+)/page_(?P<pageid>.*)\.[^.]+' \\
--regex '(?P<fileGrp>[^/]+)/page_(?P<pageid>.*)\\.[^.]+' \\
--page-id 'PHYS_{{ pageid }}' \\
--file-grp "{{ fileGrp }}" \\
path/to/files/*/*.*
\b
echo "path/to/src/file.xml SEG/page_p0001.xml" \\
| ocrd workspace bulk-add \\
--regex '(?P<src>.*?) (?P<fileGrp>.+?)/page_(?P<pageid>.*)\.(?P<ext>[^\.]*)' \\
--regex '(?P<src>.*?) (?P<fileGrp>.+?)/page_(?P<pageid>.*)\\.(?P<ext>[^\\.]*)' \\
--file-id 'FILE_{{ fileGrp }}_{{ pageid }}' \\
--page-id 'PHYS_{{ pageid }}' \\
--file-grp "{{ fileGrp }}" \\
Expand Down
6 changes: 3 additions & 3 deletions src/ocrd/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Constants for ocrd.
"""
from pkg_resources import resource_filename
from ocrd_utils import resource_filename

__all__ = [
'TMP_PREFIX',
Expand All @@ -18,7 +18,7 @@
DEFAULT_UPLOAD_FOLDER = '/tmp/uploads-ocrd-core'
DOWNLOAD_DIR = '/tmp/ocrd-core-downloads'
DEFAULT_REPOSITORY_URL = 'http://localhost:5000/'
BASHLIB_FILENAME = resource_filename(__name__, 'lib.bash')
RESOURCE_LIST_FILENAME = resource_filename(__name__, 'resource_list.yml')
BASHLIB_FILENAME = resource_filename(__package__, 'lib.bash')
RESOURCE_LIST_FILENAME = resource_filename(__package__, 'resource_list.yml')
RESOURCE_USER_LIST_COMMENT = "# OCR-D private resource list (consider sending a PR with your own resources to OCR-D/core)"
BACKUP_DIR = '.backup'
7 changes: 3 additions & 4 deletions src/ocrd/processor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
'run_processor'
]

from warnings import warn
from pkg_resources import resource_filename
from os.path import exists
from shutil import copyfileobj
import json
Expand All @@ -30,7 +28,8 @@
list_resource_candidates,
pushd_popd,
list_all_resources,
get_processor_resource_types
get_processor_resource_types,
resource_filename,
)
from ocrd_validators import ParameterValidator
from ocrd_models.ocrd_page import MetadataItemType, LabelType, LabelsType
Expand Down Expand Up @@ -266,7 +265,7 @@ def moduledir(self):
"""
The filesystem path of the module directory.
"""
return resource_filename(self.module, '')
return resource_filename(self.module, '.')

@property
def input_files(self):
Expand Down
6 changes: 3 additions & 3 deletions src/ocrd/processor/builtin/dummy_processor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# pylint: disable=missing-module-docstring,invalid-name
from os.path import join, basename
from pkg_resources import resource_string

import click

Expand All @@ -13,11 +12,12 @@
make_file_id,
MIME_TO_EXT,
MIMETYPE_PAGE,
parse_json_string_with_comments
parse_json_string_with_comments,
resource_string
)
from ocrd_modelfactory import page_from_file

OCRD_TOOL = parse_json_string_with_comments(resource_string(__name__, 'dummy/ocrd-tool.json').decode('utf8'))
OCRD_TOOL = parse_json_string_with_comments(resource_string(__package__ + '.dummy', 'ocrd-tool.json'))

class DummyProcessor(Processor):
"""
Expand Down
6 changes: 3 additions & 3 deletions src/ocrd/workspace_bagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import sys
from bagit import Bag, make_manifests, _load_tag_file, _make_tag_file, _make_tagmanifest_file # pylint: disable=no-name-in-module
from distutils.dir_util import copy_tree
from pkg_resources import get_distribution

from ocrd_utils import (
pushd_popd,
Expand All @@ -19,6 +18,7 @@
DEFAULT_METS_BASENAME,
MIMETYPE_PAGE,
VERSION,
dist_version,
)
from ocrd_validators.constants import BAGIT_TXT, TMP_BAGIT_PREFIX, OCRD_BAGIT_PROFILE_URL
from ocrd_modelfactory import page_from_file
Expand Down Expand Up @@ -117,8 +117,8 @@ def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ocrd_bas
bag.info['BagIt-Profile-Identifier'] = OCRD_BAGIT_PROFILE_URL
bag.info['Bag-Software-Agent'] = 'ocrd/core %s (bagit.py %s, bagit_profile %s) [cmdline: "%s"]' % (
VERSION, # TODO
get_distribution('bagit').version,
get_distribution('bagit_profile').version,
dist_version('ocrd-fork-bagit'),
dist_version('ocrd-fork-bagit_profile'),
' '.join(sys.argv))

bag.info['Ocrd-Identifier'] = ocrd_identifier
Expand Down
5 changes: 2 additions & 3 deletions src/ocrd_models/constants.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""
Constants for ocrd_models.
"""
from pkg_resources import resource_string
import re
from ocrd_utils import resource_string

__all__ = [
'IDENTIFIER_PRIORITY',
Expand Down Expand Up @@ -32,7 +31,7 @@

IDENTIFIER_PRIORITY = ['purl', 'urn', 'doi', 'url']

METS_XML_EMPTY = resource_string(__name__, 'mets-empty.xml')
METS_XML_EMPTY = resource_string(__package__, 'mets-empty.xml')

NAMESPACES = {
'mets': "http://www.loc.gov/METS/",
Expand Down
2 changes: 1 addition & 1 deletion src/ocrd_models/ocrd_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def empty_mets(now=None, cache_flag=False):
"""
if not now:
now = datetime.now().isoformat()
tpl = METS_XML_EMPTY.decode('utf-8')
tpl = METS_XML_EMPTY
tpl = tpl.replace('{{ VERSION }}', VERSION)
tpl = tpl.replace('{{ NOW }}', '%s' % now)
return OcrdMets(content=tpl.encode('utf-8'), cache_flag=cache_flag)
Expand Down
5 changes: 4 additions & 1 deletion src/ocrd_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,10 @@
from .introspect import (
freeze_args,
set_json_key_value_overrides,
membername
membername,
resource_filename,
resource_string,
dist_version
)

from .logging import (
Expand Down
4 changes: 2 additions & 2 deletions src/ocrd_utils/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Constants for ocrd_utils.
"""
from pkg_resources import get_distribution
from .introspect import dist_version
from re import compile as regex_compile

__all__ = [
Expand All @@ -18,7 +18,7 @@
'VERSION',
]

VERSION = get_distribution('ocrd').version
VERSION = dist_version('ocrd')

MIMETYPE_PAGE = 'application/vnd.prima.page+xml'

Expand Down
28 changes: 28 additions & 0 deletions src/ocrd_utils/introspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,25 @@
"""
import json
from functools import wraps
from pathlib import Path
from frozendict import frozendict
import atexit
from contextlib import ExitStack

# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
import sys
if sys.version_info < (3, 9):
import importlib_resources
else:
import importlib.resources as importlib_resources

if sys.version_info < (3, 8):
import importlib_metadata
else:
import importlib.metadata as importlib_metadata

file_manager = ExitStack()
atexit.register(file_manager.close)

# Taken from https://github.com/OCR-D/core/pull/884
def freeze_args(func):
Expand Down Expand Up @@ -32,3 +49,14 @@ def set_json_key_value_overrides(obj, *kvpairs):
except json.decoder.JSONDecodeError:
obj[k] = v
return obj

def resource_filename(pkg : str, fname : str) -> Path:
ref = importlib_resources.files(pkg) / fname
return file_manager.enter_context(importlib_resources.as_file(ref))

def resource_string(pkg : str, fname : str) -> str:
with open(resource_filename(pkg, fname), 'r', encoding='utf-8') as f:
return f.read()

def dist_version(module : str) -> str:
return importlib_metadata.version(module)
16 changes: 8 additions & 8 deletions src/ocrd_validators/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Constants for ocrd_validators.
"""
import yaml
from pkg_resources import resource_string, resource_filename
from ocrd_utils import resource_string, resource_filename

__all__ = [
'PROCESSING_SERVER_CONFIG_SCHEMA',
Expand All @@ -21,18 +21,18 @@
'XSD_PATHS',
]

PROCESSING_SERVER_CONFIG_SCHEMA = yaml.safe_load(resource_string(__name__, 'processing_server_config.schema.yml'))
MESSAGE_SCHEMA_PROCESSING = yaml.safe_load(resource_string(__name__, 'message_processing.schema.yml'))
MESSAGE_SCHEMA_RESULT = yaml.safe_load(resource_string(__name__, 'message_result.schema.yml'))
OCRD_TOOL_SCHEMA = yaml.safe_load(resource_string(__name__, 'ocrd_tool.schema.yml'))
PROCESSING_SERVER_CONFIG_SCHEMA = yaml.safe_load(resource_string(__package__, 'processing_server_config.schema.yml'))
MESSAGE_SCHEMA_PROCESSING = yaml.safe_load(resource_string(__package__, 'message_processing.schema.yml'))
MESSAGE_SCHEMA_RESULT = yaml.safe_load(resource_string(__package__, 'message_result.schema.yml'))
OCRD_TOOL_SCHEMA = yaml.safe_load(resource_string(__package__, 'ocrd_tool.schema.yml'))
RESOURCE_LIST_SCHEMA = {
'type': 'object',
'additionalProperties': False,
'patternProperties': {
'^ocrd-.*': OCRD_TOOL_SCHEMA['properties']['tools']['patternProperties']['ocrd-.*']['properties']['resources']
}
}
OCRD_BAGIT_PROFILE = yaml.safe_load(resource_string(__name__, 'bagit-profile.yml'))
OCRD_BAGIT_PROFILE = yaml.safe_load(resource_string(__package__, 'bagit-profile.yml'))

BAGIT_TXT = 'BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8'
FILE_GROUP_PREFIX = 'OCR-D-'
Expand All @@ -42,5 +42,5 @@
XSD_METS_URL = 'https://www.loc.gov/standards/mets/mets.xsd'
XSD_PAGE_URL = 'http://www.primaresearch.org/schema/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd'
XSD_PATHS = {}
XSD_PATHS[XSD_METS_URL] = resource_filename(__name__, 'mets.xsd')
XSD_PATHS[XSD_PAGE_URL] = resource_filename(__name__, 'page.xsd')
XSD_PATHS[XSD_METS_URL] = resource_filename(__package__, 'mets.xsd')
XSD_PATHS[XSD_PAGE_URL] = resource_filename(__package__, 'page.xsd')
1 change: 0 additions & 1 deletion tests/cli/test_bashlib.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from tests.base import CapturingTestCase as TestCase, main, assets, copy_of_directory

from pkg_resources import parse_version
import os, sys
import traceback
import subprocess
Expand Down
9 changes: 5 additions & 4 deletions tests/test_resource_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def test_resources_manager_config_default(monkeypatch, tmp_path):

# arrange
monkeypatch.setenv('HOME', str(tmp_path))
monkeypatch.delenv('XDG_CONFIG_HOME', raising=False)
if 'XDG_CONFIG_HOME' in os.environ:
monkeypatch.delenv('XDG_CONFIG_HOME', raising=False)

# act
mgr = OcrdResourceManager()
Expand All @@ -48,9 +49,9 @@ def test_resources_manager_config_default(monkeypatch, tmp_path):
def test_resources_manager_from_environment(tmp_path, monkeypatch):

# arrange
monkeypatch.setenv('XDG_CONFIG_HOME', tmp_path)
monkeypatch.setenv('XDG_DATA_HOME', tmp_path)
monkeypatch.setenv('HOME', tmp_path)
monkeypatch.setenv('XDG_CONFIG_HOME', str(tmp_path))
monkeypatch.setenv('XDG_DATA_HOME', str(tmp_path))
monkeypatch.setenv('HOME', str(tmp_path))

# act
mgr = OcrdResourceManager()
Expand Down
2 changes: 1 addition & 1 deletion tests/validator/test_xsd_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_constructor(self):
def test_mets_empty(self):
with TemporaryDirectory() as tempdir:
mets_path = Path(tempdir, 'mets.xml')
mets_path.write_bytes(METS_XML_EMPTY)
mets_path.write_text(METS_XML_EMPTY)
report = XsdMetsValidator.validate(mets_path)
self.assertEqual(len(report.errors), 2)
self.assertEqual(report.errors[0],
Expand Down

0 comments on commit eefcd1e

Please sign in to comment.