Skip to content

Commit

Permalink
Updated black formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
fbanados committed Dec 10, 2024
1 parent 8dcc79a commit 1001c72
Show file tree
Hide file tree
Showing 17 changed files with 161 additions and 107 deletions.
6 changes: 4 additions & 2 deletions src/morphodict/frontend/templatetags/morphodict_extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,13 +192,15 @@ def sort_sources(sources: list):
ret = sources
return ret


@register.filter(name="next")
def next(some_list, current_index):
try:
return some_list[int(current_index) + 1]
except:
return ''

return ""


@register.filter(name="has_next")
def has_next(some_list, current_index):
return not current_index >= len(some_list) - 1
15 changes: 9 additions & 6 deletions src/morphodict/frontend/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,17 +140,19 @@ def index(request): # pragma: no cover
)
return render(request, "morphodict/index.html", context)


def wordnet(request, user_query, results):
def process_result(r):
return {
"wn_entry" : r[0],
"wn_entry": r[0],
"results": r[1].serialized_presentation_results(
display_mode=DisplayMode.current_value_from_request(request),
animate_emoji=AnimateEmoji.current_value_from_request(request),
show_emoji=ShowEmoji.current_value_from_request(request),
dict_source=get_dict_source(request),
)
display_mode=DisplayMode.current_value_from_request(request),
animate_emoji=AnimateEmoji.current_value_from_request(request),
show_emoji=ShowEmoji.current_value_from_request(request),
dict_source=get_dict_source(request),
),
}

context = create_context_for_index_template(
"search-page",
word_search_form=WordSearchForm(),
Expand All @@ -160,6 +162,7 @@ def process_result(r):
)
return render(request, "morphodict/wordnet-search.html", context)


def search_results(request, query_string: str): # pragma: no cover
"""
returns rendered boxes of search results according to user query
Expand Down
3 changes: 2 additions & 1 deletion src/morphodict/lexicon/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
TargetLanguageKeyword,
Wordform,
SourceLanguageKeyword,
WordNetSynset
WordNetSynset,
)

admin.site.register(WordNetSynset)


# https://stackoverflow.com/a/1720961/14558
def admin_url_for(obj):
return reverse(
Expand Down
45 changes: 30 additions & 15 deletions src/morphodict/lexicon/management/commands/importjsondict.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
SourceLanguageKeyword,
ImportStamp,
RapidWords,
WordNetSynset
WordNetSynset,
)
from morphodict.lexicon.util import to_source_language_keyword

Expand Down Expand Up @@ -301,7 +301,7 @@ def run(self):
existing_slugs = self.gather_slugs()

form_definitions = []

for entry in tqdm(self.data, smoothing=0):
if "formOf" in entry:
form_definitions.append(entry)
Expand Down Expand Up @@ -369,14 +369,16 @@ def run(self):

# Make sure everything is saved for upcoming formOf queries
self.flush_insert_buffers()

wordforms = Wordform.objects.all()
for wf in tqdm(wordforms.iterator(),total=wordforms.count()):
for wf in tqdm(wordforms.iterator(), total=wordforms.count()):
if not wf.linguist_info:
continue

if "rw_indices" in wf.linguist_info:
rapidwords = {rw for l in wf.linguist_info["rw_indices"].values() for rw in l}
rapidwords = {
rw for l in wf.linguist_info["rw_indices"].values() for rw in l
}
for rw in rapidwords:
index = rw.strip()
try:
Expand All @@ -385,20 +387,30 @@ def run(self):
# Try flexible search
try:
try:
candidates = [RapidWords.objects.get(index=".".join(index.split(".")[:-1]))]
candidates = [
RapidWords.objects.get(
index=".".join(index.split(".")[:-1])
)
]
except RapidWords.DoesNotExist:
query = Q(domain__iexact=wf.linguist_info["rw_domains"][0])
query = Q(
domain__iexact=wf.linguist_info["rw_domains"][0]
)
for domain in wf.linguist_info["rw_domains"][1:]:
query |= Q(domain__iexact=domain)
universe = RapidWords.objects.filter(query)
candidates = [x for x in universe if index.startswith(x.index)]
candidates = [
x for x in universe if index.startswith(x.index)
]
except:
candidates=[]
if len(candidates)>0:
candidates.sort(key=lambda x:len(x.index),reverse=True)
candidates = []
if len(candidates) > 0:
candidates.sort(key=lambda x: len(x.index), reverse=True)
rapidword = candidates[0]
else:
print(f"WARNING: ImportJSON error: Slug {wf.slug} is annotated with nonexistent {index} RW index")
print(
f"WARNING: ImportJSON error: Slug {wf.slug} is annotated with nonexistent {index} RW index"
)
if rapidword:
wf.rapidwords.add(rapidword)

Expand All @@ -413,12 +425,15 @@ def run(self):
# which stand for ADJ, ADJ_SAT, ADV, NOUN, VERB)
# - entry annotated with a non-canonical lemma. Use the canonical lemma appearing in
# "name" in our wordnet instance site.
print(f"WARNING: ImportJSON error: Slug {wf.slug} is annotated with nonexistent {wn.strip()} WN domain")
print(
f"WARNING: ImportJSON error: Slug {wf.slug} is annotated with nonexistent {wn.strip()} WN domain"
)
if normalized_name:
synset, _ = WordNetSynset.objects.get_or_create(name=normalized_name)
synset, _ = WordNetSynset.objects.get_or_create(
name=normalized_name
)
wf.synsets.add(synset)


for entry in form_definitions:
if self.incremental and freshness_check.is_fresh(entry["formOf"]):
continue
Expand Down
15 changes: 8 additions & 7 deletions src/morphodict/lexicon/migrations/0014_auto_20241128_2341.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,21 @@
from tqdm import tqdm
import json


def load_rapidwords(apps, schema_editor):
RapidWords = apps.get_model("lexicon", "RapidWords")
RAPIDWORDS_JSON_FILE = settings.BASE_DIR / ".." / "morphodict" / "resources" / "rapidwords.json"
RAPIDWORDS_JSON_FILE = (
settings.BASE_DIR / ".." / "morphodict" / "resources" / "rapidwords.json"
)

with open(RAPIDWORDS_JSON_FILE,'r') as f:
with open(RAPIDWORDS_JSON_FILE, "r") as f:
rw_data = json.load(f)

rw_entries = []

for key, items in tqdm(rw_data.items()):
rw_entries.append(RapidWords(index=key, domain=items["domain"]))

RapidWords.objects.bulk_create(rw_entries)


Expand All @@ -26,6 +29,4 @@ class Migration(migrations.Migration):
("lexicon", "0013_wordnetsynset_wordform_rapidwords_wordform_synsets"),
]

operations = [
migrations.RunPython(load_rapidwords)
]
operations = [migrations.RunPython(load_rapidwords)]
19 changes: 9 additions & 10 deletions src/morphodict/lexicon/migrations/0015_auto_20241128_2351.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
from django.db import migrations
from morphodict.search.types import WordnetEntry


def migrate_semantic_domains(apps, schema_editor):
RapidWords = apps.get_model("lexicon", "RapidWords")
WordNetSynset = apps.get_model("lexicon", "WordNetSynset")
Wordform = apps.get_model("lexicon", "Wordform")

# For every wordform, collect the semantic domain information in the old
# format and place it where it belongs.
wordforms = Wordform.objects.all()
Expand All @@ -18,10 +19,10 @@ def migrate_semantic_domains(apps, schema_editor):
else:
rapidwords = []
if wf.wn_synsets:
synsets = [x.strip() for x in wf.wn_synsets.split(";")]
synsets = [x.strip() for x in wf.wn_synsets.split(";")]
else:
synsets = []
synsets = []

for rw in rapidwords:
try:
if rw:
Expand All @@ -32,16 +33,14 @@ def migrate_semantic_domains(apps, schema_editor):
if wn:
normalized_entry = str(WordnetEntry(wn))
wf.synsets.add(
WordNetSynset.objects.get_or_create(
name=normalized_entry
))
WordNetSynset.objects.get_or_create(name=normalized_entry)
)


class Migration(migrations.Migration):

dependencies = [
("lexicon", "0014_auto_20241128_2341"),
]

operations = [
migrations.RunPython(migrate_semantic_domains)
]
operations = [migrations.RunPython(migrate_semantic_domains)]
24 changes: 15 additions & 9 deletions src/morphodict/lexicon/migrations/0016_auto_20241202_1907.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
from django.db import migrations
from morphodict.search.types import WordnetEntry


def migrate_from_linguistinfo(apps, schema_editor):
RapidWords = apps.get_model("lexicon", "RapidWords")
WordNetSynset = apps.get_model("lexicon", "WordNetSynset")
Wordform = apps.get_model("lexicon", "Wordform")

# For every wordform, collect the semantic domain information in the old
# format and place it where it belongs.
wordforms = Wordform.objects.all()
Expand All @@ -16,13 +17,17 @@ def migrate_from_linguistinfo(apps, schema_editor):
if not wf.linguist_info:
continue
if "rw_indices" in wf.linguist_info:
rapidwords = {rw for l in wf.linguist_info["rw_indices"].values() for rw in l}
rapidwords = {
rw for l in wf.linguist_info["rw_indices"].values() for rw in l
}
for rw in rapidwords:
index = rw.strip()
try:
wf.rapidwords.add(RapidWords.objects.get(index=index))
except RapidWords.DoesNotExist:
print(f"ERROR: Slug {wf.slug} is annotated with nonexistent {index} RW index")
print(
f"ERROR: Slug {wf.slug} is annotated with nonexistent {index} RW index"
)

if "wn_domains" in wf.linguist_info:
for wn in wf.linguist_info["wn_domains"]:
Expand All @@ -36,11 +41,14 @@ def migrate_from_linguistinfo(apps, schema_editor):
# which stand for ADJ, ADJ_SAT, ADV, NOUN, VERB)
# - entry annotated with a non-canonical lemma. Use the canonical lemma appearing in
# "name" in our wordnet instance site.
print(f"ERROR: Slug {wf.slug} is annotated with nonexistent {wn.strip()} WN domain")
print(
f"ERROR: Slug {wf.slug} is annotated with nonexistent {wn.strip()} WN domain"
)
if normalized_name:
synset, _ = WordNetSynset.objects.get_or_create(name=normalized_name)
synset, _ = WordNetSynset.objects.get_or_create(
name=normalized_name
)
wf.synsets.add(synset)



class Migration(migrations.Migration):
Expand All @@ -49,6 +57,4 @@ class Migration(migrations.Migration):
("lexicon", "0015_auto_20241128_2351"),
]

operations = [
migrations.RunPython(migrate_from_linguistinfo)
]
operations = [migrations.RunPython(migrate_from_linguistinfo)]
5 changes: 4 additions & 1 deletion src/morphodict/lexicon/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,22 @@ def __init__(self, *args, **kwargs):
kwargs = {**kwargs, "ensure_ascii": False}
super().__init__(*args, **kwargs)


class RapidWords(models.Model):
index = models.CharField(max_length=MAX_WORDFORM_LENGTH, primary_key=True)
domain = models.CharField(max_length=MAX_TEXT_LENGTH)

def __str__(self):
return self.index


class WordNetSynset(models.Model):
name = models.CharField(max_length=MAX_TEXT_LENGTH, primary_key=True)

def __str__(self):
return self.name


class Wordform(models.Model):
# Queries always do .select_related("lemma"):
objects = WordformLemmaManager()
Expand Down Expand Up @@ -165,7 +168,7 @@ class Meta:
# - affix tree intialization
# - sitemap generation
models.Index(fields=["is_lemma", "text"]),
models.Index(fields=["slug"])
models.Index(fields=["slug"]),
]

def __str__(self):
Expand Down
7 changes: 4 additions & 3 deletions src/morphodict/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from .query import Query
from .wordnet import WordnetEntry


def search_with_affixes(
query: str, include_auto_definitions=False, inflect_english_phrases=False
) -> SearchResults:
Expand Down Expand Up @@ -38,10 +39,10 @@ def api_search(
inflect_english_phrases=inflect_english_phrases,
).serialized_presentation_results()

def wordnet_search(
query:str) -> list[tuple[WordnetEntry, SearchResults]] | None :

def wordnet_search(query: str) -> list[tuple[WordnetEntry, SearchResults]] | None:
# If we are doing an english simple phrase
search_query = Query(query)
if search_query.wn:
return wordnet_runner(search_query)
return None
return None
2 changes: 1 addition & 1 deletion src/morphodict/search/lemma_freq.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def load_lemma_data():
# we want to normalize the lemma frequency
# so I found the max of 32334
# and now we divide by that
LEMMA_FREQUENCY[l] = int(l_freq) #/ max
LEMMA_FREQUENCY[l] = int(l_freq) # / max


def get_lemma_freq(search_results):
Expand Down
2 changes: 1 addition & 1 deletion src/morphodict/search/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def fetch_results(query: core.Query, search_results: core.SearchResults):
fetch_results_from_target_language_keywords(query, search_results)
fetch_results_from_source_language_keywords(query, search_results)

# Then we proceed to analyze the query, if successfull, we look for those
# Then we proceed to analyze the query, if successfull, we look for those
# entries in the dictionary that share the analysis with the FST result.
# This introduces source-level spelling relaxation if the FST supports it.

Expand Down
4 changes: 3 additions & 1 deletion src/morphodict/search/pos_matches.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from morphodict.analysis import rich_analyze_relaxed


def find_pos_matches(tag_source: EsptSearch | None, search_results: SearchResults) -> None:
def find_pos_matches(
tag_source: EsptSearch | None, search_results: SearchResults
) -> None:
if not tag_source:
return
tags = tag_source.tags
Expand Down
Loading

0 comments on commit 1001c72

Please sign in to comment.