Skip to content

Commit

Permalink
Changed migrations to better parse RW annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
fbanados committed Dec 10, 2024
1 parent 69dcfa7 commit 32bab40
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ def migrate_semantic_domains(apps, schema_editor):
# For every wordform, collect the semantic domain information in the old
# format and place it where it belongs.
wordforms = Wordform.objects.all()
count = wordforms.count()
for wf in wordforms:
if wf.rw_indices:
rapidwords = [x.strip() for x in wf.rw_indices.split(";")]
Expand Down
34 changes: 29 additions & 5 deletions src/morphodict/lexicon/migrations/0016_auto_20241202_1907.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from django.db import migrations
from morphodict.search.types import WordnetEntry
from django.db.models import Q


def migrate_from_linguistinfo(apps, schema_editor):
Expand All @@ -12,7 +13,6 @@ def migrate_from_linguistinfo(apps, schema_editor):
# For every wordform, collect the semantic domain information in the old
# format and place it where it belongs.
wordforms = Wordform.objects.all()
count = wordforms.count()
for wf in wordforms:
if not wf.linguist_info:
continue
Expand All @@ -23,11 +23,35 @@ def migrate_from_linguistinfo(apps, schema_editor):
for rw in rapidwords:
index = rw.strip()
try:
wf.rapidwords.add(RapidWords.objects.get(index=index))
rapidword = RapidWords.objects.get(index=index)
except RapidWords.DoesNotExist:
print(
f"ERROR: Slug {wf.slug} is annotated with nonexistent {index} RW index"
)
# Try flexible search
try:
try:
candidates = [
RapidWords.objects.get(
index=".".join(index.split(".")[:-1])
)
]
except RapidWords.DoesNotExist:
query = Q(domain__iexact=wf.linguist_info["rw_domains"][0])
for domain in wf.linguist_info["rw_domains"][1:]:
query |= Q(domain__iexact=domain)
universe = RapidWords.objects.filter(query)
candidates = [
x for x in universe if index.startswith(x.index)
]
except:
candidates = []
if len(candidates) > 0:
candidates.sort(key=lambda x: len(x.index), reverse=True)
rapidword = candidates[0]
else:
print(
f"WARNING: ImportJSON error: Slug {wf.slug} is annotated with nonexistent {index} RW index"
)
if rapidword:
wf.rapidwords.add(rapidword)

if "wn_domains" in wf.linguist_info:
for wn in wf.linguist_info["wn_domains"]:
Expand Down

0 comments on commit 32bab40

Please sign in to comment.