Skip to content

Commit

Permalink
Glottochronology (exluded exclusions) extends #1435 (#1446)
Browse files Browse the repository at this point in the history
* Reduce multi spaces

* Refactoring
  • Loading branch information
vmonakhov authored Jun 22, 2023
1 parent 685dcbc commit eefccdd
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions lingvodoc/schema/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -13242,13 +13242,18 @@ def compare_translations(swadesh_lex, dictionary_lex):
def split_lex(lex):
# Split by commas and open brackets to separate
# various forms of lexeme and extra note if is
return set(f" {form}".lower().replace(" заим.", "").strip()
lex = ' '.join(lex.lower().split()) # reduce multi spaces
if "убрать из стословника" in lex:
return set()

return set(form.strip()
for form in lex.replace('(', ',').split(',')
if form.strip()
and ')' not in form) # exclude notes
if form.strip() and ')' not in form) # exclude notes

# return true if the intersection is not empty
return bool(split_lex(swadesh_lex) & split_lex(dictionary_lex))


# Gathering entry grouping data.

if not debug_flag:
Expand Down Expand Up @@ -13294,6 +13299,7 @@ def split_lex(lex):
with gzip.open(tag_data_file_name, 'wb') as tag_data_file:
pickle.dump((r1, group_list, r3), tag_data_file)


# Getting text data for each perspective.
# entries_set gathers entry_id(s) of words met in Swadesh' list
# swadesh_total gathers numbers of words within Swadesh' list
Expand Down

0 comments on commit eefccdd

Please sign in to comment.