Skip to content

Commit

Permalink
utils.generate_range: raise a ValueError if non-numeric parts differ
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Jan 17, 2024
1 parent ee8fb69 commit 1427c07
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 3 deletions.
2 changes: 2 additions & 0 deletions ocrd_utils/ocrd_utils/str.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,8 @@ def generate_range(start, end):
start_num, end_num = re.findall(r'\d+', start)[-1], re.findall(r'\d+', end)[-1]
except IndexError:
raise ValueError("Range '%s..%s': could not find numeric part" % (start, end))
if start[:-len(start_num)] != end[:-len(end_num)]:
raise ValueError(f"Range '{start}..{end}' differ in their non-numeric part: '{start[:-len(start_num)]}' != '{end[:-len(end_num)]}'")
if start_num == end_num:
warn("Range '%s..%s': evaluates to the same number")
for i in range(int(start_num), int(end_num) + 1):
Expand Down
1 change: 1 addition & 0 deletions tests/cli/test_workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ def _call(args):
assert _call(['-f', 'json']) == '[[["PHYS_0001"], ["PHYS_0002"], ["PHYS_0003"], ["PHYS_0004"], ["PHYS_0005"], ["PHYS_0006"], ["PHYS_0008"], ["PHYS_0009"], ["PHYS_0010"], ["PHYS_0011"], ["PHYS_0012"], ["PHYS_0013"], ["PHYS_0014"], ["PHYS_0015"], ["PHYS_0016"], ["PHYS_0017"], ["PHYS_0018"], ["PHYS_0019"], ["PHYS_0020"], ["PHYS_0022"], ["PHYS_0023"], ["PHYS_0024"], ["PHYS_0025"], ["PHYS_0026"], ["PHYS_0027"], ["PHYS_0028"], ["PHYS_0029"]]]'
assert _call(['-f', 'comma-separated', '-R', '5..5']) == 'PHYS_0005'
assert _call(['-f', 'comma-separated', '-R', '6..8']) == 'PHYS_0006,PHYS_0008,PHYS_0009'
assert _call(['-f', 'comma-separated', '-r', '1..5']) == 'PHYS_0001,PHYS_0002,PHYS_0003,PHYS_0004,PHYS_0005'
assert _call(['-f', 'comma-separated', '-r', 'PHYS_0006..PHYS_0009']) == 'PHYS_0006,PHYS_0008,PHYS_0009'
assert _call(['-f', 'comma-separated', '-r', 'PHYS_0001..PHYS_0010', '-D', '3']) == 'PHYS_0001,PHYS_0002,PHYS_0003\nPHYS_0004,PHYS_0005,PHYS_0006\nPHYS_0008,PHYS_0009,PHYS_0010'
assert _call(['-f', 'comma-separated', '-r', 'PHYS_0001..PHYS_0010', '-D', '3', '-C', '2']) == 'PHYS_0008,PHYS_0009,PHYS_0010'
Expand Down
3 changes: 2 additions & 1 deletion tests/model/test_ocrd_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ def test_find_all_files(sbb_sample_01):
assert len(sbb_sample_01.find_all_files(pageId='//PHYS_0001,//PHYS_0005')) == 18, '18 files in PHYS_001 and PHYS_0005 (two regexes)'
assert len(sbb_sample_01.find_all_files(pageId='//PHYS_0005,PHYS_0001..PHYS_0002')) == 35, '35 files in //PHYS_0005,PHYS_0001..PHYS_0002'
assert len(sbb_sample_01.find_all_files(pageId='//PHYS_0005,PHYS_0001..PHYS_0002')) == 35, '35 files in //PHYS_0005,PHYS_0001..PHYS_0002'
assert len(sbb_sample_01.find_all_files(pageId='0..100')) == 35, '35 files in @ORDER range 1..10'
assert len(sbb_sample_01.find_all_files(pageId='1..10')) == 35, '35 files in @ORDER range 1..10'
assert len(sbb_sample_01.find_all_files(pageId='1..PHYS_0002')) == 35, '35 files in @ORDER range 1..10'

def test_find_all_files_local_only(sbb_sample_01):
assert len(sbb_sample_01.find_all_files(pageId='PHYS_0001',
Expand Down
10 changes: 8 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,15 @@ def test_make_file_id_744():
def test_generate_range():
assert generate_range('PHYS_0001', 'PHYS_0005') == ['PHYS_0001', 'PHYS_0002', 'PHYS_0003', 'PHYS_0004', 'PHYS_0005']
with raises(ValueError, match='could not find numeric part'):
generate_range('NONUMBER', 'ALSO_NONUMBER')
assert generate_range('NONUMBER', 'ALSO_NONUMBER')
with raises(ValueError, match='differ in their non-numeric part'):
generate_range('PHYS_0001_123', 'PHYS_0010_123')
with raises(ValueError, match='differ in their non-numeric part'):
assert generate_range('1', 'PHYS_0005') == 0
with raises(ValueError, match='differ in their non-numeric part'):
assert generate_range('1', 'page 5') == 0
with warns(UserWarning, match='same number'):
generate_range('PHYS_0001_123', 'PHYS_0010_123') == 'PHYS_0001_123'
assert generate_range('PHYS_0001_123', 'PHYS_0001_123') == ['PHYS_0001_123']

def test_safe_filename():
assert safe_filename('Hello world,!') == 'Hello_world_'
Expand Down

0 comments on commit 1427c07

Please sign in to comment.