Skip to content

Commit

Permalink
Fix space stripping
Browse files Browse the repository at this point in the history
  • Loading branch information
erykjj committed Feb 15, 2023
1 parent aa72db9 commit 64df20c
Showing 1 changed file with 11 additions and 9 deletions.
20 changes: 11 additions & 9 deletions linkture.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
SOFTWARE.
"""

VERSION = '2.0.1'
VERSION = '2.0.2'


import argparse, json, regex, sqlite3
Expand Down Expand Up @@ -70,15 +70,15 @@ def __init__(self, language='English', translate=None, form=None, verbose=False)
self._tr_book_names.insert(rec[2], rec[form])
for rec in cur.execute(f"SELECT * FROM Books WHERE Language = '{language}';").fetchall():
for i in range(3,6):
normalized = unidecode(rec[i].replace(' ', '').replace('.', '').replace('-', '').upper())
normalized = unidecode(rec[i].replace(' ', ' ').replace('.', '').replace('-', '').upper()) # non-breaking space
self._src_book_names[normalized] = rec[2]
with open(path / 'res/custom.json', 'r', encoding='UTF-8') as json_file:
b = json.load(json_file)
if language in b.keys():
for row in b[language]:
names = row[1].split(', ')
for item in names:
normalized = unidecode(item.replace(' ', '').replace('.', '').replace('-', '').upper())
normalized = unidecode(item.replace(' ', ' ').replace('.', '').replace('-', '').upper()) # non-breaking space
self._src_book_names[normalized] = row[0]
self._ranges = pd.read_sql_query("SELECT * FROM Ranges;", con)
cur.close()
Expand All @@ -92,7 +92,8 @@ def __init__(self, language='English', translate=None, form=None, verbose=False)
# no capitals required (bit slower)
self._first_pass = regex.compile(r'(?![^{]*})((?:(?:(?:[1-5]\p{L}{0,2}|[iIvV]{1,3})[—–\-\.   ]*)?\p{L}[\p{L}\.—–\-]+(?![,—–\-])[:\.—–\-\d,   ;]*(?<!;\s)\d)|(?:(?:[1-5]\p{L}{0,2}|[iIvV]{1,3})[\.—–\-   ]*\p{Lu}[\p{L}\.—–\-]+))')
self._second_pass = regex.compile(r'(?![^{]*})(\p{L}[\p{L}\.—–\-]+(?![,—–\-])[:\.—–\-\d,   ;]*(?<!;\s)\d)')
self._bk_ref = regex.compile(r'((?:[1-5]\p{L}{0,2}|[iIvV]{1,3})?[\-\.]?[\p{L}\-\.]{2,})(.*)') # CHECK: not tested with non-Latin characters
# CHECK: not tested with non-Latin characters:
self._bk_ref = regex.compile(r'((?:[1-5]\p{L}{0,2}|[iIvV]{1,3})?[\-\.]?[\p{L}\-\. ]{2,})(.*)') # non-breaking space
self._tagged = regex.compile(r'({{.*?}})')
self._pretagged = regex.compile(r'{{(.*?)}}')

Expand Down Expand Up @@ -124,7 +125,7 @@ def check_book(bk_name):
bk_num = self._src_book_names[bk_name]
return self._ranges.loc[(self._ranges.Book == bk_num) & (self._ranges.Chapter.isnull()), ['Book', 'Last']].values[0]

reduced = regex.sub(r'[   ]', '', scripture)
reduced = regex.sub(r'[   ]', ' ', scripture) # non-breaking space
reduced = regex.sub(r'[—–]', '-', reduced)
result = self._bk_ref.search(reduced)
if result:
Expand Down Expand Up @@ -188,7 +189,7 @@ def reform_series(txt): # rewrite comma-separated consecutive sequences as (1, 2
else:
if self._rewrite:
bk_name = self._tr_book_names[bk_num]
output = bk_name+' '
output = bk_name+' ' # non-breaking space
for chunk in rest.split(';'):
chunk = reform_series(chunk)
output += chunk.strip()+'; '
Expand Down Expand Up @@ -392,9 +393,9 @@ def _decode_scripture(self, bcv_range):
return None
bk_name = self._tr_book_names[sb]
if self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter.isnull()), ['Last']].values[0] == 1:
ch = ' '
ch = ' ' # non-breaking space
else:
ch = f" {sc}:"
ch = f" {sc}:" # non-breaking space
if start == end:
scripture = f"{bk_name}{ch}{sv}"
else:
Expand Down Expand Up @@ -474,7 +475,8 @@ def process_verses(chunk, book, multi):

def r(match):
scripture = match.group(1).strip('}{')
_, _, tr_name, bk_num, rest, last = scripture.split('|')
_, bk_name, tr_name, bk_num, rest, last = scripture.split('|')
print(bk_name,tr_name)
bk_num = int(bk_num)
last = int(last)
if rest == '': # whole book
Expand Down

0 comments on commit 64df20c

Please sign in to comment.