Skip to content

Commit

Permalink
modernize-spelling: Sion -> Zion
Browse files Browse the repository at this point in the history
  • Loading branch information
acabal committed Jan 30, 2024
1 parent 8a13146 commit ff8bb50
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions se/spelling.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,8 +492,9 @@ def modernize_spelling(xhtml: str) -> str:
xhtml = regex.sub(r"[YJ]edd?o\b", r"Edo", xhtml) # Yeddo/Jeddo -> Edo
xhtml = regex.sub(r"Pesth\b", r"Pest", xhtml) # Pesth -> Pest, i.e. Buda-Pest
xhtml = regex.sub(r"Buda-Pest\b", r"Budapest", xhtml) # Buda-Pest -> Budapest
xhtml = regex.sub(r"Chili(\b|an\b)\b", r"Chile\1", xhtml) # Chili -> Chile
xhtml = regex.sub(r"(?<![\.!\?])\sAl-([A-Z])", r" al-\1", xhtml) # Lowercase Arabic definite article (e.g. Al-Zubayr -> al-Zubayr) in the middle of a sentence
xhtml = regex.sub(r"Chili(\b|an\b)\b", r"Chile\1", xhtml) # Chili -> Chile
xhtml = regex.sub(r"(?<![\.!\?])\sAl-([A-Z])", r" al-\1", xhtml) # Lowercase Arabic definite article (e.g. Al-Zubayr -> al-Zubayr) in the middle of a sentence
xhtml = regex.sub(r"\bSion\b", r"Zion", xhtml) # Sion -> Zion

# Remove archaic diphthongs
xhtml = regex.sub(r"\b([Mm])edi(æ|ae)val", r"\1edieval", xhtml)
Expand Down

0 comments on commit ff8bb50

Please sign in to comment.