Skip to content

Commit

Permalink
Don't use hard coded Wiktionary domain in EPUB books
Browse files Browse the repository at this point in the history
  • Loading branch information
xxyzz committed Feb 28, 2024
1 parent 0b52bbf commit 73b1fa6
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 15 deletions.
31 changes: 17 additions & 14 deletions epub.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,8 @@ def remove_entities(self, minimal_count: int) -> None:
def modify_epub(
self,
prefs: Prefs,
lang: str,
lemma_lang: str,
gloss_lang: str,
lemmas_conn: sqlite3.Connection | None,
has_multiple_ipas: bool,
) -> None:
Expand All @@ -267,10 +268,10 @@ def modify_epub(
query_wikidata(self.entities, self.mediawiki, self.wikidata)
if prefs["minimal_x_ray_count"] > 1:
self.remove_entities(prefs["minimal_x_ray_count"])
self.create_x_ray_footnotes(prefs, lang)
self.insert_anchor_elements(lang)
self.create_x_ray_footnotes(prefs, lemma_lang)
self.insert_anchor_elements(lemma_lang)
if self.lemmas:
self.create_word_wise_footnotes(lang)
self.create_word_wise_footnotes(lemma_lang, gloss_lang)
self.modify_opf()
self.zip_extract_folder()
if self.mediawiki is not None:
Expand All @@ -282,7 +283,7 @@ def modify_epub(
if lemmas_conn is not None:
lemmas_conn.close()

def insert_anchor_elements(self, lang: str) -> None:
def insert_anchor_elements(self, lemma_lang: str) -> None:
css_rules = ""
if len(self.lemmas) > 0:
css_rules += """
Expand Down Expand Up @@ -332,7 +333,7 @@ def insert_anchor_elements(self, lang: str) -> None:
)
else:
new_xhtml_str += self.build_word_wise_tag(
occurrence.lemma, word, lang
occurrence.lemma, word, lemma_lang
)
last_w_end = occurrence.word_end
if occurrence.paragraph_end != last_p_end:
Expand All @@ -354,15 +355,15 @@ def insert_anchor_elements(self, lang: str) -> None:
)
f.write(new_xhtml_str)

def build_word_wise_tag(self, lemma: str, word: str, lang: str) -> str:
def build_word_wise_tag(self, lemma: str, word: str, lemma_lang: str) -> str:
if lemma not in self.lemmas:
return word
data = self.get_lemma_gloss(lemma, lang)
data = self.get_lemma_gloss(lemma, lemma_lang)
if not data:
del self.lemmas[lemma]
return word
short_def = data[0][0]
len_ratio = 3 if lang in CJK_LANGS else 2.5
len_ratio = 3 if lemma_lang in CJK_LANGS else 2.5
lemma_id = self.lemmas[lemma]
if len(short_def) / len(word) > len_ratio:
return (
Expand Down Expand Up @@ -466,23 +467,25 @@ def create_x_ray_footnotes(self, prefs: Prefs, lang: str) -> None:
with self.xhtml_folder.joinpath("x_ray.xhtml").open("w", encoding="utf-8") as f:
f.write(s)

def create_word_wise_footnotes(self, lang: str) -> None:
def create_word_wise_footnotes(self, lemma_lang: str, gloss_lang: str) -> None:
s = f"""
<html xmlns="http://www.w3.org/1999/xhtml"
xmlns:epub="http://www.idpf.org/2007/ops"
lang="{lang}" xml:lang="{lang}">
lang="{lemma_lang}" xml:lang="{lemma_lang}">
<head><title>Word Wise</title><meta charset="utf-8"/></head>
<body>
"""
for lemma, lemma_id in self.lemmas.items():
s += self.create_ww_aside_tag(lemma, lemma_id, lang)
s += self.create_ww_aside_tag(lemma, lemma_id, lemma_lang, gloss_lang)
s += "</body></html>"
with self.xhtml_folder.joinpath("word_wise.xhtml").open(
"w", encoding="utf-8"
) as f:
f.write(s)

def create_ww_aside_tag(self, lemma: str, lemma_id: int, lemma_lang: str) -> str:
def create_ww_aside_tag(
self, lemma: str, lemma_id: int, lemma_lang: str, gloss_lang: str
) -> str:
data = self.get_lemma_gloss(lemma, lemma_lang)
tag_str = ""
added_ipa = False
Expand All @@ -499,7 +502,7 @@ def create_ww_aside_tag(self, lemma: str, lemma_id: int, lemma_lang: str) -> str
tag_str += f"<p><i>{escape(example)}</i></p>"
tag_str += "<hr/>"
tag_str += (
f"<p>Source: <a href='https://en.wiktionary.org/wiki/"
f"<p>Source: <a href='https://{gloss_lang}.wiktionary.org/wiki/"
f"{quote(lemma)}'>Wiktionary</a></p></aside>"
)
return tag_str
Expand Down
4 changes: 3 additions & 1 deletion parse_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,9 @@ def create_files(data: ParseJobData, prefs: Prefs, notif: Any) -> None:
supported_languages[gloss_lang]["gloss_source"] == "kaikki"
and prefs.get(f"{data.book_lang}_ipa") is not None
)
epub.modify_epub(prefs, data.book_lang, lemmas_conn, has_multiple_ipas)
epub.modify_epub(
prefs, data.book_lang, gloss_lang, lemmas_conn, has_multiple_ipas
)
return

# Kindle
Expand Down

0 comments on commit 73b1fa6

Please sign in to comment.