-
-
Notifications
You must be signed in to change notification settings - Fork 20
/
import_lemmas.py
155 lines (137 loc) · 4.49 KB
/
import_lemmas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import csv
import re
import sqlite3
import zipfile
from html import escape
from pathlib import Path
from typing import Any
def extract_apkg(apkg_path: Path) -> dict[str, int]:
cards = {}
with zipfile.ZipFile(apkg_path) as zf:
db_path = zipfile.Path(zf, "collection.anki21")
if not db_path.exists(): # no scheduling information
db_path = zipfile.Path(zf, "collection.anki2")
ex_db_path = zf.extract(db_path.name, apkg_path.parent)
conn = sqlite3.connect(ex_db_path)
for card_type, fields in conn.execute(
"SELECT type, flds FROM cards JOIN notes ON cards.nid = notes.id"
):
cards[fields.split("\x1f", 1)[0]] = card_type_to_difficult_level(card_type)
conn.close()
Path(ex_db_path).unlink()
return cards
def card_type_to_difficult_level(card_type: int) -> int:
# https://github.com/ankidroid/Anki-Android/wiki/Database-Structure#cards
match card_type:
case 0: # new
return 1
case 1: # learning
return 3
case 2: # review
return 5
case 3: # relearning
return 4
case _:
return 1
def extract_csv(csv_path: Path) -> dict[str, int]:
csv_words = {}
with open(csv_path, newline="") as f:
for row in csv.reader(f):
if len(row) >= 2:
word, difficulty_str, *_ = row
try:
difficulty = int(difficulty_str)
except ValueError:
difficulty = 1
else:
word = row[0]
difficulty = 1
csv_words[word] = difficulty
return csv_words
def query_vocabulary_builder(lang: str, db_path: Path) -> dict[str, int]:
conn = sqlite3.connect(db_path)
words = {}
for stem, category, lookups in conn.execute(
"""
SELECT stem, category, count(*)
FROM WORDS JOIN LOOKUPS ON LOOKUPS.word_key = WORDS.id
WHERE lang = ? GROUP BY stem
""",
(lang,),
):
words[stem] = lookups_to_difficulty(lookups, category)
conn.close()
return words
def lookups_to_difficulty(lookups: int, category: int) -> int:
if category == 100:
return 5 # mastered
match lookups:
case 1:
return 5
case 2:
return 4
case 3:
return 3
case 4:
return 2
case _:
return 1
def apply_imported_lemmas_data(
db_path: Path, import_path: Path, retain_lemmas: bool, lemma_lang: str
) -> None:
lemmas_dict = {}
match import_path.suffix:
case ".apkg":
lemmas_dict = extract_apkg(import_path)
case ".csv":
lemmas_dict = extract_csv(import_path)
case ".db":
lemmas_dict = query_vocabulary_builder(lemma_lang, import_path)
case _:
return
conn = sqlite3.connect(db_path)
for lemma_id, lemma in conn.execute("SELECT id, lemma FROM lemmas"):
if lemma in lemmas_dict:
conn.execute(
"UPDATE senses SET enabled = 1, difficulty = ? WHERE lemma_id = ?",
(lemmas_dict.get(lemma), lemma_id),
)
elif not retain_lemmas:
conn.execute(
"UPDATE senses SET enabled = 0, difficulty = 1 WHERE lemma_id = ?",
(lemma_id,),
)
conn.commit()
conn.close()
def export_lemmas_job(
db_path: Path,
export_path: Path,
only_enabled: bool,
difficulty_limit: int,
is_kindle: bool,
lemma_lang: str,
gloss_lang: str,
abort: Any = None,
log: Any = None,
notifications: Any = None,
) -> None:
conn = sqlite3.connect(db_path)
with open(export_path, "w", encoding="utf-8") as f:
query_sql = """
SELECT lemma, pos, full_def, example
FROM senses JOIN lemmas ON senses.lemma_id = lemmas.id
WHERE difficulty <= ?
"""
if only_enabled:
query_sql += " AND enabled = 1"
for lemma, pos_type, full_def, example in conn.execute(
query_sql, (difficulty_limit,)
):
back_text = f"<p>{pos_type}</p>"
full_def = escape(re.sub(r"\t|\n", " ", full_def))
back_text += f"<p>{full_def}</p>"
if example is not None and len(example) > 0:
example = escape(re.sub(r"\t|\n", " ", example))
back_text += f"<i>{example}</i>"
f.write(f"{lemma}\t{back_text}\n")
conn.close()