Skip to content

Commit

Permalink
Some small logic safety fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
acabal committed Dec 27, 2024
1 parent 6a8d970 commit 05ad747
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 8 deletions.
4 changes: 3 additions & 1 deletion se/commands/compare_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def compare_versions(plain_output: bool) -> int:
# We wrap this whole thing in a try block, because we need to call
# driver.quit() if execution is interrupted (like by ctrl + c, or by an unhandled exception). If we don't call driver.quit(),
# Firefox will stay around as a zombie process even if the Python script is dead.
driver = None
try:
driver = se.browser.initialize_selenium_firefox_webdriver()

Expand Down Expand Up @@ -194,7 +195,8 @@ def compare_versions(plain_output: bool) -> int:

finally:
try:
driver.quit()
if driver:
driver.quit()
except Exception:
# We might get here if we ctrl + c before selenium has finished initializing the driver
pass
Expand Down
4 changes: 2 additions & 2 deletions se/se_epub_lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -2787,7 +2787,7 @@ def _lint_xhtml_typography_checks(filename: Path, dom: se.easy_xml.EasyXmlTree,
ignored_words = ["cafe", "cafes", "consomme", "debut", "menage", "puree", "regime", "regimes", "reveille", "reveilles"]

# Initialize our dictionary
se.spelling.initialize_dictionary()
dictionary = se.spelling.initialize_dictionary()

for node in nodes:
# Remove any child nodes that have a language specified
Expand All @@ -2801,7 +2801,7 @@ def _lint_xhtml_typography_checks(filename: Path, dom: se.easy_xml.EasyXmlTree,
# to have accents for scansion anyway.
for word in regex.findall(r"[A-Za-z]+[áéíóú]+[A-za-z]+", node.inner_text()):
unaccented_word = unidecode(word)
if unaccented_word in se.spelling.DICTIONARY and unaccented_word not in ignored_words:
if unaccented_word in dictionary and unaccented_word not in ignored_words:
filtered_nodes.append(node)

if filtered_nodes:
Expand Down
13 changes: 8 additions & 5 deletions se/spelling.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,17 @@ def get_xhtml_language(xhtml: str) -> str:

return language

def initialize_dictionary():
def initialize_dictionary() -> Set[str]:
"""
Initialize the spelling word list dictionary, if we haven't already.
"""

if not se.spelling.DICTIONARY:
if not DICTIONARY:
with importlib.resources.files("se.data").joinpath("words").open("r", encoding="utf-8") as dictionary:
se.spelling.DICTIONARY = {line.strip().lower() for line in dictionary}
return {line.strip().lower() for line in dictionary}

else:
return DICTIONARY

def modernize_hyphenation(xhtml: str) -> str:
"""
Expand All @@ -49,7 +52,7 @@ def modernize_hyphenation(xhtml: str) -> str:
A string representing the XHTML with its hyphenation modernized
"""

initialize_dictionary()
dictionary = initialize_dictionary()

# Easy fix for a common case
xhtml = regex.sub(r"\b([Nn])ow-a-days\b", r"\1owadays", xhtml) # now-a-days -> nowadays
Expand All @@ -60,7 +63,7 @@ def modernize_hyphenation(xhtml: str) -> str:

for word in set(result): # set() removes duplicates
new_word = word.replace("-", "").lower()
if new_word in se.spelling.DICTIONARY:
if new_word in dictionary:
# To preserve capitalization of the first word, we get the individual parts
# then replace the original match with them joined together and titlecased.
lhs = regex.sub(r"\-.+$", r"", word)
Expand Down

0 comments on commit 05ad747

Please sign in to comment.