Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace pkg_resources with importlib #1174

Merged
merged 2 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ build:
# (Re)install the tool
install: #build
# not stricttly necessary but a precaution against outdated python build tools, https://github.com/OCR-D/core/pull/1166
$(PIP) install -U pip wheel setuptools
$(PIP) install -U pip wheel
$(PIP_INSTALL) . $(PIP_INSTALL_CONFIG_OPTION)
@# workaround for shapely#1598
$(PIP) config set global.no-binary shapely
Expand Down Expand Up @@ -280,6 +280,7 @@ pyclean:
rm -rf ./dist
rm -rf htmlcov
rm -rf .benchmarks
rm -rf **/*.egg-info
rm -f **/*.pyc
-find . -name '__pycache__' -exec rm -rf '{}' \;
rm -rf .pytest_cache
Expand Down
12 changes: 7 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
atomicwrites >= 1.3.0
bagit >= 1.7.0
bagit_profile >= 1.3.0
beanie~=1.7
click >=7
Deprecated == 1.2.0
docker
fastapi
fastapi>=0.78.0
filetype
Flask
frozendict>=2.3.4
gdown
httpx>=0.22.0
importlib_metadata ; python_version < '3.8'
importlib_resources ; python_version < '3.9'
jsonschema
lxml
memory-profiler >= 0.58.0
# XXX explicitly do not restrict the numpy version because different
# tensorflow versions might require different versions
numpy
ocrd-fork-bagit >= 1.8.1.post2
ocrd-fork-bagit_profile >= 1.3.0.post1
opencv-python-headless
paramiko
pika>=1.2.0
Expand All @@ -27,7 +30,6 @@ pyyaml
requests < 2.30
requests_unixsocket
shapely
# tensorflow versions might require different versions
uvicorn
uvicorn>=0.17.6
# XXX explicitly do not restrict the numpy version because different

4 changes: 2 additions & 2 deletions src/ocrd/cli/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,14 +286,14 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_fi
\b
Examples:
ocrd workspace bulk-add \\
--regex '(?P<fileGrp>[^/]+)/page_(?P<pageid>.*)\.[^.]+' \\
--regex '(?P<fileGrp>[^/]+)/page_(?P<pageid>.*)\\.[^.]+' \\
--page-id 'PHYS_{{ pageid }}' \\
--file-grp "{{ fileGrp }}" \\
path/to/files/*/*.*
\b
echo "path/to/src/file.xml SEG/page_p0001.xml" \\
| ocrd workspace bulk-add \\
--regex '(?P<src>.*?) (?P<fileGrp>.+?)/page_(?P<pageid>.*)\.(?P<ext>[^\.]*)' \\
--regex '(?P<src>.*?) (?P<fileGrp>.+?)/page_(?P<pageid>.*)\\.(?P<ext>[^\\.]*)' \\
--file-id 'FILE_{{ fileGrp }}_{{ pageid }}' \\
--page-id 'PHYS_{{ pageid }}' \\
--file-grp "{{ fileGrp }}" \\
Expand Down
6 changes: 3 additions & 3 deletions src/ocrd/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Constants for ocrd.
"""
from pkg_resources import resource_filename
from ocrd_utils import resource_filename

__all__ = [
'TMP_PREFIX',
Expand All @@ -18,7 +18,7 @@
DEFAULT_UPLOAD_FOLDER = '/tmp/uploads-ocrd-core'
DOWNLOAD_DIR = '/tmp/ocrd-core-downloads'
DEFAULT_REPOSITORY_URL = 'http://localhost:5000/'
BASHLIB_FILENAME = resource_filename(__name__, 'lib.bash')
RESOURCE_LIST_FILENAME = resource_filename(__name__, 'resource_list.yml')
BASHLIB_FILENAME = resource_filename(__package__, 'lib.bash')
RESOURCE_LIST_FILENAME = resource_filename(__package__, 'resource_list.yml')
RESOURCE_USER_LIST_COMMENT = "# OCR-D private resource list (consider sending a PR with your own resources to OCR-D/core)"
BACKUP_DIR = '.backup'
7 changes: 3 additions & 4 deletions src/ocrd/processor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
'run_processor'
]

from warnings import warn
from pkg_resources import resource_filename
from os.path import exists
from shutil import copyfileobj
import json
Expand All @@ -30,7 +28,8 @@
list_resource_candidates,
pushd_popd,
list_all_resources,
get_processor_resource_types
get_processor_resource_types,
resource_filename,
)
from ocrd_validators import ParameterValidator
from ocrd_models.ocrd_page import MetadataItemType, LabelType, LabelsType
Expand Down Expand Up @@ -266,7 +265,7 @@ def moduledir(self):
"""
The filesystem path of the module directory.
"""
return resource_filename(self.module, '')
return resource_filename(self.module, '.')

@property
def input_files(self):
Expand Down
6 changes: 3 additions & 3 deletions src/ocrd/processor/builtin/dummy_processor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# pylint: disable=missing-module-docstring,invalid-name
from os.path import join, basename
from pkg_resources import resource_string

import click

Expand All @@ -13,11 +12,12 @@
make_file_id,
MIME_TO_EXT,
MIMETYPE_PAGE,
parse_json_string_with_comments
parse_json_string_with_comments,
resource_string
)
from ocrd_modelfactory import page_from_file

OCRD_TOOL = parse_json_string_with_comments(resource_string(__name__, 'dummy/ocrd-tool.json').decode('utf8'))
OCRD_TOOL = parse_json_string_with_comments(resource_string(__package__ + '.dummy', 'ocrd-tool.json'))

class DummyProcessor(Processor):
"""
Expand Down
6 changes: 3 additions & 3 deletions src/ocrd/workspace_bagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import sys
from bagit import Bag, make_manifests, _load_tag_file, _make_tag_file, _make_tagmanifest_file # pylint: disable=no-name-in-module
from distutils.dir_util import copy_tree
from pkg_resources import get_distribution

from ocrd_utils import (
pushd_popd,
Expand All @@ -19,6 +18,7 @@
DEFAULT_METS_BASENAME,
MIMETYPE_PAGE,
VERSION,
dist_version,
)
from ocrd_validators.constants import BAGIT_TXT, TMP_BAGIT_PREFIX, OCRD_BAGIT_PROFILE_URL
from ocrd_modelfactory import page_from_file
Expand Down Expand Up @@ -117,8 +117,8 @@ def _set_bag_info(self, bag, total_bytes, total_files, ocrd_identifier, ocrd_bas
bag.info['BagIt-Profile-Identifier'] = OCRD_BAGIT_PROFILE_URL
bag.info['Bag-Software-Agent'] = 'ocrd/core %s (bagit.py %s, bagit_profile %s) [cmdline: "%s"]' % (
VERSION, # TODO
get_distribution('bagit').version,
get_distribution('bagit_profile').version,
dist_version('ocrd-fork-bagit'),
dist_version('ocrd-fork-bagit_profile'),
' '.join(sys.argv))

bag.info['Ocrd-Identifier'] = ocrd_identifier
Expand Down
5 changes: 2 additions & 3 deletions src/ocrd_models/constants.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""
Constants for ocrd_models.
"""
from pkg_resources import resource_string
import re
from ocrd_utils import resource_string

__all__ = [
'IDENTIFIER_PRIORITY',
Expand Down Expand Up @@ -32,7 +31,7 @@

IDENTIFIER_PRIORITY = ['purl', 'urn', 'doi', 'url']

METS_XML_EMPTY = resource_string(__name__, 'mets-empty.xml')
METS_XML_EMPTY = resource_string(__package__, 'mets-empty.xml')

NAMESPACES = {
'mets': "http://www.loc.gov/METS/",
Expand Down
2 changes: 1 addition & 1 deletion src/ocrd_models/ocrd_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def empty_mets(now=None, cache_flag=False):
"""
if not now:
now = datetime.now().isoformat()
tpl = METS_XML_EMPTY.decode('utf-8')
tpl = METS_XML_EMPTY
tpl = tpl.replace('{{ VERSION }}', VERSION)
tpl = tpl.replace('{{ NOW }}', '%s' % now)
return OcrdMets(content=tpl.encode('utf-8'), cache_flag=cache_flag)
Expand Down
5 changes: 4 additions & 1 deletion src/ocrd_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,10 @@
from .introspect import (
freeze_args,
set_json_key_value_overrides,
membername
membername,
resource_filename,
resource_string,
dist_version
)

from .logging import (
Expand Down
4 changes: 2 additions & 2 deletions src/ocrd_utils/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Constants for ocrd_utils.
"""
from pkg_resources import get_distribution
from .introspect import dist_version
from re import compile as regex_compile

__all__ = [
Expand All @@ -18,7 +18,7 @@
'VERSION',
]

VERSION = get_distribution('ocrd').version
VERSION = dist_version('ocrd')

MIMETYPE_PAGE = 'application/vnd.prima.page+xml'

Expand Down
28 changes: 28 additions & 0 deletions src/ocrd_utils/introspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,25 @@
"""
import json
from functools import wraps
from pathlib import Path
from frozendict import frozendict
import atexit
from contextlib import ExitStack

# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
import sys
if sys.version_info < (3, 9):
import importlib_resources
else:
import importlib.resources as importlib_resources

if sys.version_info < (3, 8):
import importlib_metadata
else:
import importlib.metadata as importlib_metadata

file_manager = ExitStack()
atexit.register(file_manager.close)

# Taken from https://github.com/OCR-D/core/pull/884
def freeze_args(func):
Expand Down Expand Up @@ -32,3 +49,14 @@ def set_json_key_value_overrides(obj, *kvpairs):
except json.decoder.JSONDecodeError:
obj[k] = v
return obj

def resource_filename(pkg : str, fname : str) -> Path:
ref = importlib_resources.files(pkg) / fname
return file_manager.enter_context(importlib_resources.as_file(ref))

def resource_string(pkg : str, fname : str) -> str:
with open(resource_filename(pkg, fname), 'r', encoding='utf-8') as f:
return f.read()

def dist_version(module : str) -> str:
return importlib_metadata.version(module)
16 changes: 8 additions & 8 deletions src/ocrd_validators/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Constants for ocrd_validators.
"""
import yaml
from pkg_resources import resource_string, resource_filename
from ocrd_utils import resource_string, resource_filename

__all__ = [
'PROCESSING_SERVER_CONFIG_SCHEMA',
Expand All @@ -21,18 +21,18 @@
'XSD_PATHS',
]

PROCESSING_SERVER_CONFIG_SCHEMA = yaml.safe_load(resource_string(__name__, 'processing_server_config.schema.yml'))
MESSAGE_SCHEMA_PROCESSING = yaml.safe_load(resource_string(__name__, 'message_processing.schema.yml'))
MESSAGE_SCHEMA_RESULT = yaml.safe_load(resource_string(__name__, 'message_result.schema.yml'))
OCRD_TOOL_SCHEMA = yaml.safe_load(resource_string(__name__, 'ocrd_tool.schema.yml'))
PROCESSING_SERVER_CONFIG_SCHEMA = yaml.safe_load(resource_string(__package__, 'processing_server_config.schema.yml'))
MESSAGE_SCHEMA_PROCESSING = yaml.safe_load(resource_string(__package__, 'message_processing.schema.yml'))
MESSAGE_SCHEMA_RESULT = yaml.safe_load(resource_string(__package__, 'message_result.schema.yml'))
OCRD_TOOL_SCHEMA = yaml.safe_load(resource_string(__package__, 'ocrd_tool.schema.yml'))
RESOURCE_LIST_SCHEMA = {
'type': 'object',
'additionalProperties': False,
'patternProperties': {
'^ocrd-.*': OCRD_TOOL_SCHEMA['properties']['tools']['patternProperties']['ocrd-.*']['properties']['resources']
}
}
OCRD_BAGIT_PROFILE = yaml.safe_load(resource_string(__name__, 'bagit-profile.yml'))
OCRD_BAGIT_PROFILE = yaml.safe_load(resource_string(__package__, 'bagit-profile.yml'))

BAGIT_TXT = 'BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8'
FILE_GROUP_PREFIX = 'OCR-D-'
Expand All @@ -42,5 +42,5 @@
XSD_METS_URL = 'https://www.loc.gov/standards/mets/mets.xsd'
XSD_PAGE_URL = 'http://www.primaresearch.org/schema/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd'
XSD_PATHS = {}
XSD_PATHS[XSD_METS_URL] = resource_filename(__name__, 'mets.xsd')
XSD_PATHS[XSD_PAGE_URL] = resource_filename(__name__, 'page.xsd')
XSD_PATHS[XSD_METS_URL] = resource_filename(__package__, 'mets.xsd')
XSD_PATHS[XSD_PAGE_URL] = resource_filename(__package__, 'page.xsd')
1 change: 0 additions & 1 deletion tests/cli/test_bashlib.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from tests.base import CapturingTestCase as TestCase, main, assets, copy_of_directory

from pkg_resources import parse_version
import os, sys
import traceback
import subprocess
Expand Down
9 changes: 5 additions & 4 deletions tests/test_resource_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def test_resources_manager_config_default(monkeypatch, tmp_path):

# arrange
monkeypatch.setenv('HOME', str(tmp_path))
monkeypatch.delenv('XDG_CONFIG_HOME', raising=False)
if 'XDG_CONFIG_HOME' in os.environ:
monkeypatch.delenv('XDG_CONFIG_HOME', raising=False)

# act
mgr = OcrdResourceManager()
Expand All @@ -48,9 +49,9 @@ def test_resources_manager_config_default(monkeypatch, tmp_path):
def test_resources_manager_from_environment(tmp_path, monkeypatch):

# arrange
monkeypatch.setenv('XDG_CONFIG_HOME', tmp_path)
monkeypatch.setenv('XDG_DATA_HOME', tmp_path)
monkeypatch.setenv('HOME', tmp_path)
monkeypatch.setenv('XDG_CONFIG_HOME', str(tmp_path))
monkeypatch.setenv('XDG_DATA_HOME', str(tmp_path))
monkeypatch.setenv('HOME', str(tmp_path))

# act
mgr = OcrdResourceManager()
Expand Down
2 changes: 1 addition & 1 deletion tests/validator/test_xsd_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_constructor(self):
def test_mets_empty(self):
with TemporaryDirectory() as tempdir:
mets_path = Path(tempdir, 'mets.xml')
mets_path.write_bytes(METS_XML_EMPTY)
mets_path.write_text(METS_XML_EMPTY)
report = XsdMetsValidator.validate(mets_path)
self.assertEqual(len(report.errors), 2)
self.assertEqual(report.errors[0],
Expand Down
Loading