diff --git a/munge/README.md b/munge/README.md deleted file mode 100644 index e336ddf4d..000000000 --- a/munge/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# JavaScript code for producing `importjson` - -`importjson` is our name for a JSON-based data format for loading dictionary -`data into morphodict. - -This directory contains code, some of it very language-specific, for converting -various dictionary sources into `importjson`. diff --git a/munge/arpeng/run.js b/munge/arpeng/run.js deleted file mode 100755 index 0bd3400cd..000000000 --- a/munge/arpeng/run.js +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env node - -/* - * A launcher script for toimportjson.ts that sets up run-time transpilation. - */ - -const { runHelper } = require("../shared/run-helper"); - -runHelper("toimportjson.ts"); diff --git a/munge/arpeng/toimportjson-test.ts b/munge/arpeng/toimportjson-test.ts deleted file mode 100644 index 6829220c1..000000000 --- a/munge/arpeng/toimportjson-test.ts +++ /dev/null @@ -1,193 +0,0 @@ -import { expect } from "chai"; -import { ArapahoLexiconEntry, AraphoLexicon, munge } from "./toimportjson"; - -// Allow test inputs to both include a bunch of extra stuff copied from -// original, and, for testing purposes, inputs missing required fields -type LooseArapahoLexicon = { - [id: string]: Partial & { [key: string]: unknown }; -}; - -describe("arpeng toimportjson", function () { - it("works on a basic input", function () { - const munged = munge((({ - L16737: { - semantic_domain: "", - image: "", - pos: "vii", - parent_lex: "", - examples: [], - morphology: "nihoon-yoo-", - examplefrequency: 0, - gloss: "yellow", - etymology: "", - literal: "", - allolexemes: [ - "niihooyoo-", - "niihooyou'-", - "niihooyóó- IC", - "nihooyoo-", - ], - usage_main: "", - status: "", - base_form: "níhooyóó-", - senses: [ - { - definition: "yellow", - }, - ], - cultural: "", - user: "", - lex: "nihooyoo-", - date_added: "2011-04-29 00:00:00", - parent_lexid: "", - parent_rel: "", - sound: [ - "http://verbs.colorado.edu/~ghka9436/audiovisual/audio/n/heet-nihooyoo-'.mp3", - "http://verbs.colorado.edu/~ghka9436/audiovisual/audio/n/nihooyoo-'.mp3", - ], - language: "Arapaho", - date_modified: "2016-01-14 14:36:40", - derivations: [], - }, - // Entries marked deleted should be skipped - L23962: { - status: "deleted", - }, - // the IC version, with an additional definition, should end up linked - L16276: { - semantic_domain: "", - image: "", - pos: "vai", - parent_lex: "", - examples: [], - morphology: "ni'-i-3ecoo-", - examplefrequency: 0, - gloss: "glad, happy", - etymology: "", - literal: "", - allolexemes: ["nii'í3ecóó- IC", "ni'i3ecoo-"], - usage_main: "", - status: "", - base_form: "ni'í3ecóó-", - senses: [ - { - definition: "glad, happy", - }, - ], - cultural: "", - user: "", - lex: "ni'i3ecoo-", - date_added: "2014-09-09 00:00:00", - parent_lexid: "", - parent_rel: "", - sound: [ - "http://verbs.colorado.edu/~ghka9436/audiovisual/audio/n/heetih-ni'i3ecoo-3i'.wav", - "http://verbs.colorado.edu/~ghka9436/audiovisual/audio/n/ni'i3ecoo-3i'.mp3", - "http://verbs.colorado.edu/~ghka9436/audiovisual/audio/n/ni'i3ecoo-n.mp3", - "http://verbs.colorado.edu/~ghka9436/audiovisual/audio/n/ni'i3ecoo-ni'.mp3", - "http://verbs.colorado.edu/~ghka9436/audiovisual/audio/n/ni'i3ecoo-no'.mp3", - "http://verbs.colorado.edu/~ghka9436/audiovisual/audio/n/ni'i3ecoo-noo.mp3", - "http://verbs.colorado.edu/~ghka9436/audiovisual/audio/n/ni'i3ecoo-noo3.mp3", - "http://verbs.colorado.edu/~ghka9436/audiovisual/audio/n/ni'i3ecoo-t.mp3", - ], - language: "Arapaho", - date_modified: "2016-09-25 17:40:12", - derivations: [], - }, - L16792: { - semantic_domain: "", - image: "", - pos: "vti", - parent_lex: "", - examples: [], - morphology: "ni'-i-3ecoo-:t-", - examplefrequency: 0, - gloss: "IC.enjoy s.t.", - etymology: "", - literal: "", - allolexemes: ["nii'i3ecoot-"], - usage_main: "", - status: "", - base_form: "nii'í3ecóot-", - senses: [ - { - definition: "IC.enjoy s.t.", - }, - ], - cultural: "", - user: "", - lex: "nii'i3ecoot-", - date_added: "2009-03-20 00:00:00", - parent_lexid: "", - parent_rel: "allolexeme", - sound: "", - language: "Arapaho", - date_modified: "2009-03-20 00:00:00", - derivations: [], - }, - } as LooseArapahoLexicon) as any) as AraphoLexicon); - - expect(JSON.parse(munged)).to.eql([ - { - analysis: [ - ["[VERB]", "[AI]", "[ANIMATE-SUBJECT]", "[IMPERATIVE]"], - "ni'i3ecoo", - ["[2SG-SUBJ]"], - ], - head: "ni'í3ecóó-", - linguistInfo: { - morphologies: ["ni'-i-3ecoo-"], - pos: "vai", - sourceIds: ["L16276"], - }, - paradigm: "AI", - senses: [ - { - definition: "glad, happy", - sources: ["ALD"], - }, - ], - slug: "ni'í3ecóó-", - }, - { - analysis: [ - [ - "[VERB]", - "[AI]", - "[ANIMATE-SUBJECT]", - "[AFFIRMATIVE]", - "[PRESENT]", - "[IC]", - ], - "ni'i3ecoo", - ["[3SG-SUBJ]"], - ], - formOf: "ni'í3ecóó-", - head: "nii'í3ecóot-", - senses: [ - { - definition: "IC.enjoy s.t.", - sources: ["ALD"], - }, - ], - }, - { - head: "níhooyóó-", - linguistInfo: { - pos: "vii", - sourceIds: ["L16737"], - morphologies: ["nihoon-yoo-"], - }, - senses: [ - { - definition: "yellow", - sources: ["ALD"], - }, - ], - fstLemma: "nihooyoo", - paradigm: "II", - slug: "níhooyóó-", - }, - ]); - }); -}); diff --git a/munge/arpeng/toimportjson.ts b/munge/arpeng/toimportjson.ts deleted file mode 100644 index 58098bad5..000000000 --- a/munge/arpeng/toimportjson.ts +++ /dev/null @@ -1,232 +0,0 @@ -import { resourceDir } from "../shared/util"; -import { Command } from "commander"; -import { Dictionary } from "../shared/dictionary"; -import { intersection } from "lodash"; - -const { Transducer } = require("hfstol"); -const { execIfMain } = require("execifmain"); -const { readFile, writeFile } = require("fs/promises"); -const { join: joinPath } = require("path"); - -const RESOURCE_DIR = resourceDir("arpeng"); -const DICTIONARY_DIR = joinPath(RESOURCE_DIR, "dictionary"); -const FST_DIR = joinPath(RESOURCE_DIR, "fst"); - -export type ArapahoLexiconEntry = { - base_form: string; - status: string; - pos: string; - lex: string; - senses: [{ definition: string }]; - // Only used to populate linguistInfo: - morphology: string; -}; -export type AraphoLexicon = { [id: string]: ArapahoLexiconEntry }; - -type AraphoLinguistInfo = { - sourceIds?: string[]; - pos?: string; - morphologies?: string[]; -}; - -/** - * Entries from arapho_lexicon.json to skip. Entries are identified by key, but - * only skipped if all of the indicated properties match: that way, if the - * source entry is updated to no longer be problematic, it will no longer be - * automatically excluded. - */ -const ENTRIES_TO_SKIP = new Map>([ - ["L3", { base_form: "'" }], -]); - -// Inspired by python’s functools.cache() -function cached(instantiator: () => T): () => T { - const cache: { value?: T } = {}; - return () => { - if (!("value" in cache)) { - cache.value = instantiator(); - } - return cache.value!; - }; -} - -const normativeAnalyzer = cached(() => { - console.log("loading analyser-gt-norm"); - return new Transducer(joinPath(FST_DIR, "analyser-gt-norm.hfstol")); -}); -const normativeGenerator = cached( - () => new Transducer(joinPath(FST_DIR, "generator-gt-norm.hfstol")) -); -const descriptiveAnalyzer = cached( - () => new Transducer(joinPath(FST_DIR, "analyser-gt-desc.hfstol")) -); - -async function main() { - const program = new Command(); - program - .option( - "--input-lexicon ", - "The original source dictionary to use", - `${DICTIONARY_DIR}/arapaho_lexicon.json` - ) - .option( - "--output-file ", - "Where to write the generated importjson file", - `${DICTIONARY_DIR}/arpeng_dictionary.importjson` - ); - - program.parse(); - - const options = program.opts(); - - const lexicalDatabase = JSON.parse( - await readFile(options.inputLexicon, "utf-8") - ) as AraphoLexicon; - - const assembled = munge(lexicalDatabase); - - await writeFile(options.outputFile, assembled); - console.log(`Wrote ${options.outputFile}`); -} - -export function munge(lexicalDatabase: AraphoLexicon): string { - const dictionary = new Dictionary([]); - - for (const [key, obj] of Object.entries(lexicalDatabase)) { - if (obj.status === "deleted") { - continue; - } - - const head = obj.base_form; - if (!head) { - continue; - } - - // Skip certain entries listed with matching key, but only if specified - // properties still match - let exclusion = ENTRIES_TO_SKIP.get(key); - if (exclusion) { - let shouldExclude = true; - let propertyName: keyof ArapahoLexiconEntry; - for (propertyName in exclusion) { - if (exclusion[propertyName] !== obj[propertyName]) { - shouldExclude = false; - break; - } - } - if (shouldExclude) { - continue; - } - } - - const entry = dictionary.getOrCreate({ text: head }); - // The new ??= local assignment operator would need NodeJS 16 - entry.linguistInfo = entry.linguistInfo ?? {}; - if (entry.linguistInfo.pos && entry.linguistInfo.pos !== obj.pos) { - console.log( - `Warning: pos mismatch: ${JSON.stringify(entry.linguistInfo)} ${ - entry.head - } ${entry.paradigm} vs ${key} ${head} ${obj.pos}` - ); - } - entry.linguistInfo.pos = obj.pos; - - entry.linguistInfo.sourceIds = entry.linguistInfo?.sourceIds ?? []; - entry.linguistInfo.sourceIds.push(key); - - entry.linguistInfo.morphologies = entry.linguistInfo?.morphologies ?? []; - if ( - obj.morphology && - !entry.linguistInfo.morphologies.includes(obj.morphology) - ) { - entry.linguistInfo.morphologies.push(obj.morphology); - } - - for (const sense of obj.senses) { - const { definition } = sense; - if (definition) { - entry.addDefinition(definition, ["ALD"]); - } - } - - // TODO: could also use obj.pos, e.g., `vii`, to disambiguate when there are - // multiple analyses - const normativeAnalyses = normativeAnalyzer().lookup_lemma_with_affixes( - head - ); - if (normativeAnalyses.length === 1) { - entry.analysis = normativeAnalyses[0]; - } else if (normativeAnalyses.length > 1) { - console.log(`multiple normative analyses for ${head}`); - } else { - const descriptiveAnalyses = descriptiveAnalyzer().lookup_lemma_with_affixes( - head - ); - if (descriptiveAnalyses.length === 1) { - entry.analysis = descriptiveAnalyses[0]; - } else if (descriptiveAnalyses.length > 1) { - console.log(`multiple descriptive analyses for ${head}`); - } else { - // could warn here, not analyzable - } - } - - // Now, assign paradigms. There are more paradigms but this is the only one - // we have a paradigm table for right now. - for (const paradigm of ["AI", "II", "TA", "TI"]) { - if ( - entry.analysis && - intersection(entry.analysis[0], ["[VERB]", `[${paradigm}]`]).length === - 2 - ) { - entry.paradigm = paradigm; - break; - } - } - - // If analyzing the base form didn’t work, try using the lex field as an FST - // lemma - if (!entry.analysis && normativeAnalyses.length === 0) { - for (const [pos, paradigm, template] of [ - [ - "vti", - "TI", - (lemma) => - `[VERB][TI][INANIMATE-OBJECT][AFFIRMATIVE][PRESENT][IC]${lemma}[3SG-SUBJ]`, - ], - [ - "vai", - "AI", - (lemma) => - `[VERB][AI][ANIMATE-SUBJECT][AFFIRMATIVE][PRESENT][IC]${lemma}[1SG-SUBJ]`, - ], - [ - "vii", - "II", - (lemma) => - `[VERB][II][INANIMATE-SUBJECT][AFFIRMATIVE][PRESENT][IC]${lemma}[3SG-SUBJ]`, - ], - [ - "vta", - "TA", - (lemma) => - `[VERB][TA][ANIMATE-OBJECT][AFFIRMATIVE][PRESENT][IC]${lemma}[1SG-SUBJ][2SG-OBJ]`, - ], - ] as [string, string, (lemma: string) => string][]) { - if (obj.pos.startsWith(pos)) { - const lemma = obj.lex.replace(/-$/, ""); - const generated = normativeGenerator().lookup(template(lemma)); - if (generated.length !== 0) { - entry.fstLemma = lemma; - entry.paradigm = paradigm; - break; - } - } - } - } - } - - return dictionary.assemble(); -} - -execIfMain(main, module); diff --git a/munge/cwdeng/cwdize-test.ts b/munge/cwdeng/cwdize-test.ts deleted file mode 100644 index 998f7153c..000000000 --- a/munge/cwdeng/cwdize-test.ts +++ /dev/null @@ -1,277 +0,0 @@ -import { expect } from "chai"; -import { Dictionary, Wordform } from "../shared/dictionary"; -import { munge, NdjsonDatabase, removeMdOnlyEntries } from "./cwdize"; - -describe("cwdize", function () { - describe("removeMdOnlyEntries", function () { - it("removes the expected stuff", function () { - const dictionary = new Dictionary(); - - let word1 = dictionary.getOrCreate({ text: "word1" }); - word1.senses = [ - { definition: "md-only", sources: ["MD"] }, - { definition: "shared", sources: ["CW", "MD"] }, - { definition: "cw-only", sources: ["CW"] }, - ]; - let word2 = dictionary.getOrCreate({ text: "word2" }); - word2.senses = [{ definition: "md-only", sources: ["MD"] }]; - - const word2form = new Wordform(); - word2form.head = "word2-form"; - word2form.senses = [{ definition: "foo", sources: ["CW"] }]; - word2form.formOf = word2; - dictionary.addWordform(word2form); - - removeMdOnlyEntries(dictionary); - - expect([...dictionary]).to.eql([ - { - head: "word1", - senses: [ - { definition: "shared", sources: ["CW"] }, - { definition: "cw-only", sources: ["CW"] }, - ], - }, - ]); - }); - }); - - describe("munge", function () { - it("works on a basic input", function () { - const inputCrkengImportJson = [ - { - head: "asiskîwiyâkanihkamawêw", - analysis: [ - [], - "asiskîwiyâkanihkamawêw", - ["+V", "+TA", "+Ind", "+3Sg", "+4Sg/PlO"], - ], - paradigm: "VTA", - senses: [ - { - definition: "s/he makes pottery for s.o.", - sources: ["CW"], - }, - ], - linguistInfo: { - inflectional_category: "VTA-2", - pos: "V", - stem: "asiskîwiyâkanihkamaw-", - wordclass: "VTA", - }, - slug: "asiskîwiyâkanihkamawêw", - }, - { - head: "ayâw", - analysis: [[], "ayâw", ["+V", "+II", "+Ind", "+3Sg"]], - paradigm: "VII", - senses: [ - { - definition: "it is, it is there", - sources: ["CW"], - }, - ], - linguistInfo: { - inflectional_category: "VII-2v", - pos: "V", - stem: "ayâ-", - wordclass: "VII", - }, - slug: "ayâw@vii", - }, - - // An entry with an associated wordform - { - head: "mâyiskawêw", - analysis: [ - [], - "mâyiskawêw", - ["+V", "+TA", "+Ind", "+3Sg", "+4Sg/PlO"], - ], - paradigm: "VTA", - senses: [ - { - definition: - "s/he affects s.o. negatively, s/he has an adverse effect on s.o.; s/he makes s.o. ill; s/he is not suited to s.o., s/he does not fit in with s.o.", - sources: ["CW"], - }, - ], - linguistInfo: { - inflectional_category: "VTA-2", - pos: "V", - stem: "mâyiskaw-", - wordclass: "VTA", - }, - slug: "mâyiskawêw", - }, - { - head: "mâyiskâkow", - analysis: [ - [], - "mâyiskawêw", - ["+V", "+TA", "+Ind", "+4Sg/Pl", "+3SgO"], - ], - senses: [ - { - definition: - "it affects s.o. badly, it has an adverse effect on s.o.; it makes s.o. ill, it makes s.o. react allergically", - sources: ["CW"], - }, - ], - formOf: "mâyiskawêw", - }, - ]; - - const inputCrkengNdjson = new NdjsonDatabase([ - { - dataSources: { - CW: { - pos: "VTA-2", - senses: [{ definition: "s/he makes pottery for s.o." }], - }, - }, - lemma: { - proto: "asiskîwiýâkanihkamawêw", - sro: "asiskîwiyâkanihkamawêw", - }, - }, - { - dataSources: { - CW: { - pos: "VII-2v", - senses: [{ definition: "it is, it is there" }], - }, - }, - lemma: { proto: "ayâw", sro: "ayâw" }, - }, - { - dataSources: { - CW: { - pos: "VTA-2", - senses: [ - { - definition: - "s/he affects s.o. negatively, s/he has an adverse effect on s.o.", - }, - { definition: "s/he makes s.o. ill" }, - { - definition: - "s/he is not suited to s.o., s/he does not fit in with s.o.", - }, - ], - }, - }, - lemma: { proto: "mâýiskawêw", sro: "mâyiskawêw" }, - }, - { - dataSources: { - CW: { - pos: "VTA-2", - senses: [ - { - definition: - "it affects s.o. badly, it has an adverse effect on s.o.", - }, - { - definition: - "it makes s.o. ill, it makes s.o. react allergically", - }, - ], - }, - }, - lemma: { proto: "mâýiskâkow", sro: "mâyiskâkow" }, - }, - ]); - - const munged = munge( - Dictionary.fromJson(JSON.stringify(inputCrkengImportJson)), - inputCrkengNdjson - ); - - expect(JSON.parse(munged)).to.eql([ - { - head: "asiskîwithâkanihkamawîw", - analysis: [ - [], - "asiskîwithâkanihkamawîw", - ["+V", "+TA", "+Ind", "+3Sg", "+4Sg/PlO"], - ], - paradigm: "VTA", - senses: [ - { - definition: "s/he makes pottery for s.o.", - sources: ["CW"], - }, - ], - linguistInfo: { - inflectional_category: "VTA-2", - pos: "V", - proto: "asiskîwiýâkanihkamawêw", - // FIXME: stem requires crk-db code to include proto stem - stem: "asiskîwiyâkanihkamaw-", - wordclass: "VTA", - }, - slug: "asiskīwithākanihkamawīw", - }, - { - analysis: [[], "ayâw", ["+V", "+II", "+Ind", "+3Sg"]], - head: "ayâw", - linguistInfo: { - inflectional_category: "VII-2v", - pos: "V", - stem: "ayâ-", - wordclass: "VII", - }, - paradigm: "VII", - senses: [ - { - definition: "it is, it is there", - sources: ["CW"], - }, - ], - slug: "ayâw@vii", - }, - { - head: "mâthiskawîw", - analysis: [ - [], - "mâthiskawîw", - ["+V", "+TA", "+Ind", "+3Sg", "+4Sg/PlO"], - ], - paradigm: "VTA", - senses: [ - { - definition: - "s/he affects s.o. negatively, s/he has an adverse effect on s.o.; s/he makes s.o. ill; s/he is not suited to s.o., s/he does not fit in with s.o.", - sources: ["CW"], - }, - ], - linguistInfo: { - inflectional_category: "VTA-2", - pos: "V", - proto: "mâýiskawêw", - stem: "mâyiskaw-", - wordclass: "VTA", - }, - slug: "māthiskawīw", - }, - { - head: "mâthiskâkow", - analysis: [ - [], - "mâthiskawîw", - ["+V", "+TA", "+Ind", "+4Sg/Pl", "+3SgO"], - ], - senses: [ - { - definition: - "it affects s.o. badly, it has an adverse effect on s.o.; it makes s.o. ill, it makes s.o. react allergically", - sources: ["CW"], - }, - ], - formOf: "māthiskawīw", - }, - ]); - }); - }); -}); diff --git a/munge/cwdeng/cwdize.ts b/munge/cwdeng/cwdize.ts deleted file mode 100644 index 72272b1c3..000000000 --- a/munge/cwdeng/cwdize.ts +++ /dev/null @@ -1,237 +0,0 @@ -/** - * Script to create a cwdeng dictionary from crkeng importjson by: - * - removing MD definitions - * - getting proto head from ndjson and using to transliterate - */ - -import { execIfMain } from "execifmain"; -import { Command } from "commander"; -import { join as joinPath } from "path"; -import { Counter, DefaultMap, resourceDir } from "../shared/util"; -import { Dictionary, DictionaryEntry } from "../shared/dictionary"; -import { readFile, writeFile } from "fs/promises"; -import { readNdjsonFile } from "./util"; -import { every, remove } from "lodash"; - -export type NdjsonEntry = { - dataSources: { - [SourceAbbreviation: string]: { - senses: { definition: string }[]; - pos?: string; - stems?: string[]; - }; - }; - lemma?: { proto?: string; sro?: string }; -}; - -const CRK_DICTIONARY_DIR = joinPath(resourceDir("crkeng"), "dictionary"); -const CWD_DICTIONARY_DIR = joinPath(resourceDir("cwdeng"), "dictionary"); - -interface CreeLinguistInfo { - inflectional_category?: string; - pos?: string; - stem?: string; - wordclass?: string; - proto?: string; -} - -function protoToWoods(s: string) { - let ret = s; - ret = ret.replace(/ý/g, "th"); - ret = ret.replace(/ê/g, "î"); - return ret; -} - -function toMacrons(s: string) { - let ret = s; - ret = ret.replace(/â/g, "ā"); - ret = ret.replace(/[êî]/g, "ī"); - ret = ret.replace(/ô/g, "ō"); - return ret; -} - -export class NdjsonDatabase { - private _entries; - private _byHead; - - constructor(entries: NdjsonEntry[]) { - this._entries = entries; - this._byHead = new DefaultMap((k) => []); - - for (const e of this._entries) { - if (!e.lemma) debugger; - if (e.lemma?.sro) { - this._byHead.getOrCreate(e.lemma.sro).push(e); - } - } - } - - getMatches(head: string) { - return this._byHead.get(head); - } -} - -// visible for testing -export function removeMdOnlyEntries(importjson: Dictionary) { - const entriesToRemove = []; - for (const entry of importjson) { - const senses = entry.senses ?? []; - for (const sense of senses) { - remove(sense.sources, (source) => source === "MD"); - } - remove(senses, (s) => s.sources.length === 0); - - if (senses.length === 0) { - entriesToRemove.push(entry); - } - } - // second loop to avoid mutating while iterating - for (const entry of entriesToRemove) { - importjson.remove(entry); - } -} - -function cleanupLinguistInfo(linguistInfo: CreeLinguistInfo) { - const unsafeLinguistInfo = linguistInfo as { [key: string]: unknown }; - // Currently, these keys are not used by morphodict, but do exist in the - // production importjson. Don’t propagate them. - delete unsafeLinguistInfo.as_is; - delete unsafeLinguistInfo.inflectional_category_linguistic; - delete unsafeLinguistInfo.inflectional_category_plain_english; - delete unsafeLinguistInfo.wordclass_emoji; - delete unsafeLinguistInfo.smushedAnalysis; -} - -function transliterateHeads( - importjson: Dictionary, - ndjson: NdjsonDatabase -) { - const statCounts = new Counter(); - - for (const entry of importjson) { - const origHead = entry.head!; - let proto = origHead; - - statCounts.increment("entries"); - - // Don’t bother doing the work of trying to find a proto version unless the - // headword contains an ambiguous character. Not so much to save time as to - // avoid a misleadingly large number of entries for which a unique ndjson - // entry could not be found. - if (/y/.test(origHead)) { - statCounts.increment("entries with y"); - - const matches = ndjson.getMatches(origHead); - const matchCount = matches?.length ?? 0; - statCounts.increment(`${matchCount} matches`); - - if (matches) { - const possibleProtos = matches.map((m) => m.lemma?.proto); - - if ( - possibleProtos[0] && - every(possibleProtos, (p) => p === possibleProtos[0]) - ) { - proto = possibleProtos[0]; - statCounts.increment(`all protos match`); - } else { - // the algorithm here was going to be, if all matches have `y`s but no - // `ý`s, or vice versa, transform `y`s in original head accordingly. - // But with only 11 word(form)s hitting this case, it’s not worth - // writing the code for that at the moment. - statCounts.increment(`don’t know what to do`); - } - } else { - statCounts.increment("no ndjson matches"); - } - } - - if ("linguistInfo" in entry) { - cleanupLinguistInfo(entry.linguistInfo ?? {}); - } - - entry.head = protoToWoods(proto); - - if (entry.head !== origHead) { - // we’ve adjusted the headword; let’s adjust related fields too - - if (entry instanceof DictionaryEntry) { - if (entry.analysis?.[1] === origHead) { - entry.analysis![1] = protoToWoods(proto); - } - - if (entry.linguistInfo == undefined) { - entry.linguistInfo = {}; - } - - entry.linguistInfo!.proto = proto; - - let [baseSlug, suffix] = entry.slug!.split("@", 2); - if (origHead === baseSlug) { - entry.slug = protoToWoods(proto) + (suffix ? `@${suffix}` : ""); - } - entry.slug = toMacrons(entry.slug!); - } - } - } - - // now that all lemmas are adjusted, adjust non-lemma analyses - for (const entry of importjson) { - if (!("formOf" in entry)) { - continue; - } - - if (entry.analysis) { - entry.analysis[1] = entry.formOf!.analysis![1]; - } - } - - console.log("stats on matching crkeng entries to ndjson proto:"); - console.log(statCounts); -} - -export function munge( - importjson: Dictionary, - ndjson: NdjsonDatabase -) { - removeMdOnlyEntries(importjson); - transliterateHeads(importjson, ndjson); - return importjson.assemble({ lemmatize: false }); -} - -async function main() { - const program = new Command(); - program - .description("Interim script to build a cwdeng dictionary from crkeng one") - .option( - "--input-importjson ", - "The production crkeng source dictionary to use, in importjson format", - `${CRK_DICTIONARY_DIR}/crkeng_dictionary.importjson` - ) - .option( - "--input-ndjson ", - "The DLX database to get proto-Cree forms from, in ndjson format", - `${CRK_DICTIONARY_DIR}/database.ndjson` - ) - .option( - "--output-importjson ", - "Where to write the generated importjson file", - `${CWD_DICTIONARY_DIR}/cwdeng_dictionary.importjson` - ); - - program.parse(); - - const options = program.opts(); - - const ndjson = new NdjsonDatabase(await readNdjsonFile(options.inputNdjson)); - - const importjson = Dictionary.fromJson( - await readFile(options.inputImportjson, "utf8") - ); - - const assembled = munge(importjson, ndjson); - await writeFile(options.outputImportjson, assembled); - console.log(`Wrote ${options.outputImportjson}`); -} - -execIfMain(main, module); diff --git a/munge/cwdeng/run.js b/munge/cwdeng/run.js deleted file mode 100755 index 0eaa5abaf..000000000 --- a/munge/cwdeng/run.js +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env node - -/* - * A launcher script for cwdize.ts that sets up run-time transpilation. - */ - -const { runHelper } = require("../shared/run-helper"); - -runHelper("cwdize.ts"); diff --git a/munge/cwdeng/util.ts b/munge/cwdeng/util.ts deleted file mode 100644 index 507931693..000000000 --- a/munge/cwdeng/util.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { readFile } from "fs/promises"; -import { NdjsonEntry } from "./cwdize"; - -export async function readNdjsonFile(path: string): Promise { - const ret = []; - const text = await readFile(path, "utf8"); - for (const piece of text.split("\n")) { - if (!piece) { - continue; - } - ret.push(JSON.parse(piece) as NdjsonEntry); - } - return ret; -} diff --git a/munge/package-lock.json b/munge/package-lock.json deleted file mode 100644 index 11cb5418b..000000000 --- a/munge/package-lock.json +++ /dev/null @@ -1,1239 +0,0 @@ -{ - "name": "munge", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "name": "munge", - "dependencies": { - "@types/json-stable-stringify": "^1.0.33", - "commander": "^8.0.0", - "execifmain": "^0.0.2", - "hfstol": "^0.0.3", - "json-stable-stringify": "^1.0.1", - "lodash": "^4.17.21", - "prettier": "^2.3.2", - "sucrase": "^3.19.0" - }, - "devDependencies": { - "@types/chai": "^4.2.19", - "@types/lodash": "^4.14.170", - "@types/mocha": "^8.2.2", - "@types/node": "^15.14.0", - "@types/prettier": "^2.3.1", - "chai": "^4.3.4", - "mocha": "~10.7", - "typescript": "^4.3.5" - } - }, - "node_modules/@types/bindings": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/@types/bindings/-/bindings-1.5.1.tgz", - "integrity": "sha512-8HzueDeoxGXdsJ0Ep7TOXHGN+woRTWa1bAds30r5we7PCC3P5zrSTRknePLn/KYAubgQv5t/1zkonnStHLCWOg==", - "dependencies": { - "@types/node": "*" - } - }, - "node_modules/@types/bindings/node_modules/@types/node": { - "version": "16.3.1", - "resolved": "https://registry.npmjs.org/@types/node/-/node-16.3.1.tgz", - "integrity": "sha512-N87VuQi7HEeRJkhzovao/JviiqKjDKMVKxKMfUvSKw+MbkbW8R0nA3fi/MQhhlxV2fQ+2ReM+/Nt4efdrJx3zA==" - }, - "node_modules/@types/chai": { - "version": "4.2.21", - "resolved": "https://registry.npmjs.org/@types/chai/-/chai-4.2.21.tgz", - "integrity": "sha512-yd+9qKmJxm496BOV9CMNaey8TWsikaZOwMRwPHQIjcOJM9oV+fi9ZMNw3JsVnbEEbo2gRTDnGEBv8pjyn67hNg==", - "dev": true - }, - "node_modules/@types/json-stable-stringify": { - "version": "1.0.33", - "resolved": "https://registry.npmjs.org/@types/json-stable-stringify/-/json-stable-stringify-1.0.33.tgz", - "integrity": "sha512-qEWiQff6q2tA5gcJGWwzplQcXdJtm+0oy6IHGHzlOf3eFAkGE/FIPXZK9ofWgNSHVp8AFFI33PJJshS0ei3Gvw==" - }, - "node_modules/@types/lodash": { - "version": "4.14.171", - "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.171.tgz", - "integrity": "sha512-7eQ2xYLLI/LsicL2nejW9Wyko3lcpN6O/z0ZLHrEQsg280zIdCv1t/0m6UtBjUHokCGBQ3gYTbHzDkZ1xOBwwg==", - "dev": true - }, - "node_modules/@types/mocha": { - "version": "8.2.3", - "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-8.2.3.tgz", - "integrity": "sha512-ekGvFhFgrc2zYQoX4JeZPmVzZxw6Dtllga7iGHzfbYIYkAMUx/sAFP2GdFpLff+vdHXu5fl7WX9AT+TtqYcsyw==", - "dev": true - }, - "node_modules/@types/node": { - "version": "15.14.2", - "resolved": "https://registry.npmjs.org/@types/node/-/node-15.14.2.tgz", - "integrity": "sha512-dvMUE/m2LbXPwlvVuzCyslTEtQ2ZwuuFClDrOQ6mp2CenCg971719PTILZ4I6bTP27xfFFc+o7x2TkLuun/MPw==", - "dev": true - }, - "node_modules/@types/prettier": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/@types/prettier/-/prettier-2.3.2.tgz", - "integrity": "sha512-eI5Yrz3Qv4KPUa/nSIAi0h+qX0XyewOliug5F2QAtuRg6Kjg6jfmxe1GIwoIRhZspD1A0RP8ANrPwvEXXtRFog==", - "dev": true - }, - "node_modules/ansi-colors": { - "version": "4.1.3", - "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.3.tgz", - "integrity": "sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==", - "dev": true, - "engines": { - "node": ">=6" - } - }, - "node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/any-promise": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz", - "integrity": "sha1-q8av7tzqUugJzcA3au0845Y10X8=" - }, - "node_modules/anymatch": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", - "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", - "dev": true, - "dependencies": { - "normalize-path": "^3.0.0", - "picomatch": "^2.0.4" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/argparse": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", - "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "dev": true - }, - "node_modules/assertion-error": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz", - "integrity": "sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw==", - "dev": true, - "engines": { - "node": "*" - } - }, - "node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" - }, - "node_modules/binary-extensions": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", - "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", - "dev": true, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/bindings": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", - "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", - "dependencies": { - "file-uri-to-path": "1.0.0" - } - }, - "node_modules/brace-expansion": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", - "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", - "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" - } - }, - "node_modules/braces": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", - "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", - "dev": true, - "dependencies": { - "fill-range": "^7.1.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/browser-stdout": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/browser-stdout/-/browser-stdout-1.3.1.tgz", - "integrity": "sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==", - "dev": true - }, - "node_modules/camelcase": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.2.0.tgz", - "integrity": "sha512-c7wVvbw3f37nuobQNtgsgG9POC9qMbNuMQmTCqZv23b6MIz0fcYpBiOlv9gEN/hdLdnZTDQhg6e9Dq5M1vKvfg==", - "dev": true, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/chai": { - "version": "4.3.4", - "resolved": "https://registry.npmjs.org/chai/-/chai-4.3.4.tgz", - "integrity": "sha512-yS5H68VYOCtN1cjfwumDSuzn/9c+yza4f3reKXlE5rUg7SFcCEy90gJvydNgOYtblyf4Zi6jIWRnXOgErta0KA==", - "dev": true, - "dependencies": { - "assertion-error": "^1.1.0", - "check-error": "^1.0.2", - "deep-eql": "^3.0.1", - "get-func-name": "^2.0.0", - "pathval": "^1.1.1", - "type-detect": "^4.0.5" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/chalk": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.1.tgz", - "integrity": "sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==", - "dev": true, - "dependencies": { - "ansi-styles": "^4.1.0", - "supports-color": "^7.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/chalk?sponsor=1" - } - }, - "node_modules/chalk/node_modules/supports-color": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", - "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", - "dev": true, - "dependencies": { - "has-flag": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/check-error": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.2.tgz", - "integrity": "sha1-V00xLt2Iu13YkS6Sht1sCu1KrII=", - "dev": true, - "engines": { - "node": "*" - } - }, - "node_modules/chokidar": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", - "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==", - "dev": true, - "dependencies": { - "anymatch": "~3.1.2", - "braces": "~3.0.2", - "glob-parent": "~5.1.2", - "is-binary-path": "~2.1.0", - "is-glob": "~4.0.1", - "normalize-path": "~3.0.0", - "readdirp": "~3.6.0" - }, - "engines": { - "node": ">= 8.10.0" - }, - "funding": { - "url": "https://paulmillr.com/funding/" - }, - "optionalDependencies": { - "fsevents": "~2.3.2" - } - }, - "node_modules/cliui": { - "version": "7.0.4", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", - "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", - "dev": true, - "dependencies": { - "string-width": "^4.2.0", - "strip-ansi": "^6.0.0", - "wrap-ansi": "^7.0.0" - } - }, - "node_modules/color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true - }, - "node_modules/commander": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-8.0.0.tgz", - "integrity": "sha512-Xvf85aAtu6v22+E5hfVoLHqyul/jyxh91zvqk/ioJTQuJR7Z78n7H558vMPKanPSRgIEeZemT92I2g9Y8LPbSQ==", - "engines": { - "node": ">= 12" - } - }, - "node_modules/concat-map": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", - "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=" - }, - "node_modules/debug": { - "version": "4.3.6", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.6.tgz", - "integrity": "sha512-O/09Bd4Z1fBrU4VzkhFqVgpPzaGbw6Sm9FEkBT1A/YBXQFGuuSxa1dN2nxgxS34JmKXqYx8CZAwEVoJFImUXIg==", - "dependencies": { - "ms": "2.1.2" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/decamelize": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-4.0.0.tgz", - "integrity": "sha512-9iE1PgSik9HeIIw2JO94IidnE3eBoQrFJ3w7sFuzSX4DpmZ3v5sZpUiV5Swcf6mQEF+Y0ru8Neo+p+nyh2J+hQ==", - "dev": true, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/deep-eql": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-3.0.1.tgz", - "integrity": "sha512-+QeIQyN5ZuO+3Uk5DYh6/1eKO0m0YmJFGNmFHGACpf1ClL1nmlV/p4gNgbl2pJGxgXb4faqo6UE+M5ACEMyVcw==", - "dev": true, - "dependencies": { - "type-detect": "^4.0.0" - }, - "engines": { - "node": ">=0.12" - } - }, - "node_modules/diff": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/diff/-/diff-5.2.0.tgz", - "integrity": "sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==", - "dev": true, - "engines": { - "node": ">=0.3.1" - } - }, - "node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true - }, - "node_modules/escalade": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", - "dev": true, - "engines": { - "node": ">=6" - } - }, - "node_modules/escape-string-regexp": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", - "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", - "dev": true, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/execifmain": { - "version": "0.0.2", - "resolved": "https://registry.npmjs.org/execifmain/-/execifmain-0.0.2.tgz", - "integrity": "sha512-Nly2EMHd/k2ua12ped72Wppy0YwHTPtqpCi3X/umq0y+9UafEVzLxRrx1TCeZiqzC3xs0/+eRzfqGH+7+yn8Xw==", - "dependencies": { - "debug": "^4.3.1", - "is-promise": "^4.0.0" - } - }, - "node_modules/file-uri-to-path": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", - "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==" - }, - "node_modules/fill-range": { - "version": "7.1.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", - "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", - "dev": true, - "dependencies": { - "to-regex-range": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/find-up": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", - "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==", - "dev": true, - "dependencies": { - "locate-path": "^6.0.0", - "path-exists": "^4.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/flat": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz", - "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==", - "dev": true, - "bin": { - "flat": "cli.js" - } - }, - "node_modules/fs.realpath": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=" - }, - "node_modules/fsevents": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", - "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", - "dev": true, - "hasInstallScript": true, - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^8.16.0 || ^10.6.0 || >=11.0.0" - } - }, - "node_modules/get-caller-file": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", - "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", - "dev": true, - "engines": { - "node": "6.* || 8.* || >= 10.*" - } - }, - "node_modules/get-func-name": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/get-func-name/-/get-func-name-2.0.2.tgz", - "integrity": "sha512-8vXOvuE167CtIc3OyItco7N/dpRtBbYOsPsXCz7X/PMnlGjYjSGuZJgM1Y7mmew7BKf9BqvLX2tnOVy1BBUsxQ==", - "dev": true, - "engines": { - "node": "*" - } - }, - "node_modules/glob": { - "version": "7.1.6", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz", - "integrity": "sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==", - "deprecated": "Glob versions prior to v9 are no longer supported", - "dependencies": { - "fs.realpath": "^1.0.0", - "inflight": "^1.0.4", - "inherits": "2", - "minimatch": "^3.0.4", - "once": "^1.3.0", - "path-is-absolute": "^1.0.0" - }, - "engines": { - "node": "*" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "dev": true, - "dependencies": { - "is-glob": "^4.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/has-flag": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", - "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/he": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", - "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", - "dev": true, - "bin": { - "he": "bin/he" - } - }, - "node_modules/hfstol": { - "version": "0.0.3", - "resolved": "https://registry.npmjs.org/hfstol/-/hfstol-0.0.3.tgz", - "integrity": "sha512-aA6f/UqjK/oAp0l0NofY/Baan2dMlVLtTlsAdZ8vPVFlsGg8V1UZpAhfI4lz/pNh5aKajIaz6KljJWMOefLyzw==", - "hasInstallScript": true, - "dependencies": { - "@types/bindings": "^1.5.0", - "bindings": "^1.5.0", - "node-addon-api": "^3.2.1" - } - }, - "node_modules/inflight": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", - "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", - "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.", - "dependencies": { - "once": "^1.3.0", - "wrappy": "1" - } - }, - "node_modules/inherits": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" - }, - "node_modules/is-binary-path": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", - "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", - "dev": true, - "dependencies": { - "binary-extensions": "^2.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", - "dev": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/is-glob": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", - "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", - "dev": true, - "dependencies": { - "is-extglob": "^2.1.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/is-number": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", - "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", - "dev": true, - "engines": { - "node": ">=0.12.0" - } - }, - "node_modules/is-plain-obj": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-2.1.0.tgz", - "integrity": "sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/is-promise": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz", - "integrity": "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==" - }, - "node_modules/is-unicode-supported": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz", - "integrity": "sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==", - "dev": true, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/js-yaml": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", - "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", - "dev": true, - "dependencies": { - "argparse": "^2.0.1" - }, - "bin": { - "js-yaml": "bin/js-yaml.js" - } - }, - "node_modules/json-stable-stringify": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/json-stable-stringify/-/json-stable-stringify-1.0.1.tgz", - "integrity": "sha1-mnWdOcXy/1A/1TAGRu1EX4jE+a8=", - "dependencies": { - "jsonify": "~0.0.0" - } - }, - "node_modules/jsonify": { - "version": "0.0.0", - "resolved": "https://registry.npmjs.org/jsonify/-/jsonify-0.0.0.tgz", - "integrity": "sha1-LHS27kHZPKUbe1qu6PUDYx0lKnM=", - "engines": { - "node": "*" - } - }, - "node_modules/lines-and-columns": { - "version": "1.1.6", - "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.1.6.tgz", - "integrity": "sha1-HADHQ7QzzQpOgHWPe2SldEDZ/wA=" - }, - "node_modules/locate-path": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", - "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==", - "dev": true, - "dependencies": { - "p-locate": "^5.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/lodash": { - "version": "4.17.21", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", - "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" - }, - "node_modules/log-symbols": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-4.1.0.tgz", - "integrity": "sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg==", - "dev": true, - "dependencies": { - "chalk": "^4.1.0", - "is-unicode-supported": "^0.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", - "dependencies": { - "brace-expansion": "^1.1.7" - }, - "engines": { - "node": "*" - } - }, - "node_modules/mocha": { - "version": "10.7.3", - "resolved": "https://registry.npmjs.org/mocha/-/mocha-10.7.3.tgz", - "integrity": "sha512-uQWxAu44wwiACGqjbPYmjo7Lg8sFrS3dQe7PP2FQI+woptP4vZXSMcfMyFL/e1yFEeEpV4RtyTpZROOKmxis+A==", - "dev": true, - "dependencies": { - "ansi-colors": "^4.1.3", - "browser-stdout": "^1.3.1", - "chokidar": "^3.5.3", - "debug": "^4.3.5", - "diff": "^5.2.0", - "escape-string-regexp": "^4.0.0", - "find-up": "^5.0.0", - "glob": "^8.1.0", - "he": "^1.2.0", - "js-yaml": "^4.1.0", - "log-symbols": "^4.1.0", - "minimatch": "^5.1.6", - "ms": "^2.1.3", - "serialize-javascript": "^6.0.2", - "strip-json-comments": "^3.1.1", - "supports-color": "^8.1.1", - "workerpool": "^6.5.1", - "yargs": "^16.2.0", - "yargs-parser": "^20.2.9", - "yargs-unparser": "^2.0.0" - }, - "bin": { - "_mocha": "bin/_mocha", - "mocha": "bin/mocha.js" - }, - "engines": { - "node": ">= 14.0.0" - } - }, - "node_modules/mocha/node_modules/brace-expansion": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", - "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", - "dev": true, - "dependencies": { - "balanced-match": "^1.0.0" - } - }, - "node_modules/mocha/node_modules/glob": { - "version": "8.1.0", - "resolved": "https://registry.npmjs.org/glob/-/glob-8.1.0.tgz", - "integrity": "sha512-r8hpEjiQEYlF2QU0df3dS+nxxSIreXQS1qRhMJM0Q5NDdR386C7jb7Hwwod8Fgiuex+k0GFjgft18yvxm5XoCQ==", - "deprecated": "Glob versions prior to v9 are no longer supported", - "dev": true, - "dependencies": { - "fs.realpath": "^1.0.0", - "inflight": "^1.0.4", - "inherits": "2", - "minimatch": "^5.0.1", - "once": "^1.3.0" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/mocha/node_modules/minimatch": { - "version": "5.1.6", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz", - "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", - "dev": true, - "dependencies": { - "brace-expansion": "^2.0.1" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/mocha/node_modules/ms": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true - }, - "node_modules/mocha/node_modules/yargs": { - "version": "16.2.0", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", - "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==", - "dev": true, - "dependencies": { - "cliui": "^7.0.2", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "require-directory": "^2.1.1", - "string-width": "^4.2.0", - "y18n": "^5.0.5", - "yargs-parser": "^20.2.2" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" - }, - "node_modules/mz": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", - "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==", - "dependencies": { - "any-promise": "^1.0.0", - "object-assign": "^4.0.1", - "thenify-all": "^1.0.0" - } - }, - "node_modules/node-addon-api": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-3.2.1.tgz", - "integrity": "sha512-mmcei9JghVNDYydghQmeDX8KoAm0FAiYyIcUt/N4nhyAipB17pllZQDOJD2fotxABnt4Mdz+dKTO7eftLg4d0A==" - }, - "node_modules/node-modules-regexp": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/node-modules-regexp/-/node-modules-regexp-1.0.0.tgz", - "integrity": "sha1-jZ2+KJZKSsVxLpExZCEHxx6Q7EA=", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/normalize-path": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", - "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", - "dev": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/once": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", - "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", - "dependencies": { - "wrappy": "1" - } - }, - "node_modules/p-limit": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", - "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", - "dev": true, - "dependencies": { - "yocto-queue": "^0.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/p-locate": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz", - "integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==", - "dev": true, - "dependencies": { - "p-limit": "^3.0.2" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/path-exists": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", - "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/path-is-absolute": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", - "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/pathval": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/pathval/-/pathval-1.1.1.tgz", - "integrity": "sha512-Dp6zGqpTdETdR63lehJYPeIOqpiNBNtc7BpWSLrOje7UaIsE5aY92r/AunQA7rsXvet3lrJ3JnZX29UPTKXyKQ==", - "dev": true, - "engines": { - "node": "*" - } - }, - "node_modules/picomatch": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", - "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", - "dev": true, - "engines": { - "node": ">=8.6" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/pirates": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.1.tgz", - "integrity": "sha512-WuNqLTbMI3tmfef2TKxlQmAiLHKtFhlsCZnPIpuv2Ow0RDVO8lfy1Opf4NUzlMXLjPl+Men7AuVdX6TA+s+uGA==", - "dependencies": { - "node-modules-regexp": "^1.0.0" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/prettier": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/prettier/-/prettier-2.3.2.tgz", - "integrity": "sha512-lnJzDfJ66zkMy58OL5/NY5zp70S7Nz6KqcKkXYzn2tMVrNxvbqaBpg7H3qHaLxCJ5lNMsGuM8+ohS7cZrthdLQ==", - "bin": { - "prettier": "bin-prettier.js" - }, - "engines": { - "node": ">=10.13.0" - } - }, - "node_modules/randombytes": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz", - "integrity": "sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==", - "dev": true, - "dependencies": { - "safe-buffer": "^5.1.0" - } - }, - "node_modules/readdirp": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", - "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", - "dev": true, - "dependencies": { - "picomatch": "^2.2.1" - }, - "engines": { - "node": ">=8.10.0" - } - }, - "node_modules/require-directory": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", - "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=", - "dev": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/safe-buffer": { - "version": "5.2.1", - "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", - "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ] - }, - "node_modules/serialize-javascript": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.2.tgz", - "integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==", - "dev": true, - "dependencies": { - "randombytes": "^2.1.0" - } - }, - "node_modules/string-width": { - "version": "4.2.2", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz", - "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==", - "dev": true, - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-ansi": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz", - "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==", - "dev": true, - "dependencies": { - "ansi-regex": "^5.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-ansi/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-json-comments": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", - "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", - "dev": true, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/sucrase": { - "version": "3.20.0", - "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.20.0.tgz", - "integrity": "sha512-Rsp+BX7DRuCleJvBAHN7gQ3ddk7U0rJev19XlIBF6dAq9vX4Tr5mHk4E7+ig/I7BM3DLYotCmm20lfBElT2XtQ==", - "dependencies": { - "commander": "^4.0.0", - "glob": "7.1.6", - "lines-and-columns": "^1.1.6", - "mz": "^2.7.0", - "pirates": "^4.0.1", - "ts-interface-checker": "^0.1.9" - }, - "bin": { - "sucrase": "bin/sucrase", - "sucrase-node": "bin/sucrase-node" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/sucrase/node_modules/commander": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz", - "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==", - "engines": { - "node": ">= 6" - } - }, - "node_modules/supports-color": { - "version": "8.1.1", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", - "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", - "dev": true, - "dependencies": { - "has-flag": "^4.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/supports-color?sponsor=1" - } - }, - "node_modules/thenify": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", - "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==", - "dependencies": { - "any-promise": "^1.0.0" - } - }, - "node_modules/thenify-all": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz", - "integrity": "sha1-GhkY1ALY/D+Y+/I02wvMjMEOlyY=", - "dependencies": { - "thenify": ">= 3.1.0 < 4" - }, - "engines": { - "node": ">=0.8" - } - }, - "node_modules/to-regex-range": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", - "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", - "dev": true, - "dependencies": { - "is-number": "^7.0.0" - }, - "engines": { - "node": ">=8.0" - } - }, - "node_modules/ts-interface-checker": { - "version": "0.1.13", - "resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz", - "integrity": "sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==" - }, - "node_modules/type-detect": { - "version": "4.0.8", - "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz", - "integrity": "sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==", - "dev": true, - "engines": { - "node": ">=4" - } - }, - "node_modules/typescript": { - "version": "4.3.5", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.3.5.tgz", - "integrity": "sha512-DqQgihaQ9cUrskJo9kIyW/+g0Vxsk8cDtZ52a3NGh0YNTfpUSArXSohyUGnvbPazEPLu398C0UxmKSOrPumUzA==", - "dev": true, - "bin": { - "tsc": "bin/tsc", - "tsserver": "bin/tsserver" - }, - "engines": { - "node": ">=4.2.0" - } - }, - "node_modules/workerpool": { - "version": "6.5.1", - "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.5.1.tgz", - "integrity": "sha512-Fs4dNYcsdpYSAfVxhnl1L5zTksjvOJxtC5hzMNl+1t9B8hTJTdKDyZ5ju7ztgPy+ft9tBFXoOlDNiOT9WUXZlA==", - "dev": true - }, - "node_modules/wrap-ansi": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "dev": true, - "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/wrappy": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=" - }, - "node_modules/y18n": { - "version": "5.0.8", - "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", - "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", - "dev": true, - "engines": { - "node": ">=10" - } - }, - "node_modules/yargs-parser": { - "version": "20.2.9", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", - "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==", - "dev": true, - "engines": { - "node": ">=10" - } - }, - "node_modules/yargs-unparser": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/yargs-unparser/-/yargs-unparser-2.0.0.tgz", - "integrity": "sha512-7pRTIA9Qc1caZ0bZ6RYRGbHJthJWuakf+WmHK0rVeLkNrrGhfoabBNdue6kdINI6r4if7ocq9aD/n7xwKOdzOA==", - "dev": true, - "dependencies": { - "camelcase": "^6.0.0", - "decamelize": "^4.0.0", - "flat": "^5.0.2", - "is-plain-obj": "^2.1.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/yocto-queue": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", - "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", - "dev": true, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - } - } -} diff --git a/munge/package.json b/munge/package.json deleted file mode 100644 index c12b26a54..000000000 --- a/munge/package.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "dependencies": { - "@types/json-stable-stringify": "^1.0.33", - "commander": "^8.0.0", - "execifmain": "^0.0.2", - "hfstol": "^0.0.3", - "json-stable-stringify": "^1.0.1", - "lodash": "^4.17.21", - "prettier": "^2.3.2", - "sucrase": "^3.19.0" - }, - "devDependencies": { - "@types/chai": "^4.2.19", - "@types/lodash": "^4.14.170", - "@types/mocha": "^8.2.2", - "@types/node": "^15.14.0", - "@types/prettier": "^2.3.1", - "chai": "^4.3.4", - "mocha": "~10.7", - "typescript": "^4.3.5" - }, - "scripts": { - "test:ci": "npm run test && tsc", - "test": "mocha -r sucrase/register/ts '**/*-test.ts'" - } -} diff --git a/munge/shared/dictionary.ts b/munge/shared/dictionary.ts deleted file mode 100644 index f37e92d3c..000000000 --- a/munge/shared/dictionary.ts +++ /dev/null @@ -1,381 +0,0 @@ -import assert from "assert"; -import { groupBy, minBy, remove, sortBy, union } from "lodash"; -import jsonStableStringify from "json-stable-stringify"; -import { DefaultMap, makePrettierJson, stringDistance, zip } from "./util"; -import { disambiguateSlugs } from "./slug-disambiguator"; - -export type Analysis = [string[], string, string[]]; -type DefinitionList = { - definition: string; - sources: string[]; -}[]; - -type DefaultLinguistInfo = never; - -export class DictionaryEntry { - head?: string; - analysis?: Analysis; - paradigm?: string; - senses?: DefinitionList; - slug?: string; - fstLemma?: string; - linguistInfo?: L; - - addDefinition(definition: string, sources: string[]) { - if (!definition.trim()) { - return; - } - - if (this.senses === undefined) { - this.senses = []; - } - for (const k of this.senses) { - if (k.definition === definition) { - k.sources = union(k.sources, sources); - return; - } - } - this.senses.push({ definition, sources: sources.slice() }); - } -} - -export class Wordform { - head?: string; - analysis?: Analysis; - senses?: DefinitionList; - formOf?: DictionaryEntry; -} - -type ExportableWordform = Required, "formOf">> & { - formOf: string; -}; - -type NonFunctionMembers = { - [key in keyof T]: T[key] extends Function ? never : T[key]; -}; - -/** - * A non-class version of DictionaryEntry, to represent the JSON-serialized form. - */ -interface ExportableDictionaryEntry - extends NonFunctionMembers> {} - -export type ImportJsonJsonEntry = - | ExportableDictionaryEntry - | ExportableWordform; - -export class Dictionary { - /** - * FST tags which distinguish the lexeme, e.g., +N and +V, as opposed to tags - * that distinguish the wordform within a lexeme, e.g., +Sg and +Pl. - */ - readonly _lexicalTags: Set | null; - _entries: (DictionaryEntry | Wordform)[]; - /* WARNING: only holds DictionaryEntry object, not Wordforms */ - _byText: Map[]>; - _bySlug: Map>; - - constructor(lexicalTags?: string[]) { - if (lexicalTags) { - this._lexicalTags = new Set(lexicalTags); - } else { - this._lexicalTags = null; - } - this._entries = []; - this._byText = new Map(); - this._bySlug = new Map(); - } - - /** - * Create a new Dictionary object from pre-existing importjson-format data. - */ - static fromJson(jsonText: string): Dictionary { - const dictionary = new Dictionary(); - - const data = JSON.parse(jsonText) as ImportJsonJsonEntry[]; - const forms = []; - for (const d of data) { - if ("formOf" in d) { - forms.push(d); - } else { - const entry = dictionary.getOrCreate({ text: d.head!, slug: d.slug }); - entry.analysis = d.analysis; - entry.paradigm = d.paradigm; - entry.senses = d.senses; - entry.slug = d.slug; - entry.linguistInfo = d.linguistInfo; - } - } - - for (const form of forms) { - const lemma = dictionary._bySlug.get(form.formOf); - assert(lemma); - const formObj = new Wordform(); - formObj.senses = form.senses; - formObj.head = form.head; - formObj.analysis = form.analysis; - formObj.formOf = lemma; - dictionary.addWordform(formObj); - } - - return dictionary; - } - - assignSlugs(keyfunc = (entry: DictionaryEntry) => "") { - function saferHeadWordForSlug(head: string) { - return head.replace(/[/\\ ]+/g, "_"); - } - - const wordsNeedingSlugs = groupBy( - this._entries.filter( - (e) => e instanceof DictionaryEntry && !e.slug - ) as DictionaryEntry[], - (e) => saferHeadWordForSlug(e.head!) - ); - - for (const [baseSlug, entries] of Object.entries(wordsNeedingSlugs)) { - const disambiguators = disambiguateSlugs(entries.map((e) => keyfunc(e))); - for (const [entry, disambiguator] of zip(entries, disambiguators)) { - let slug = `${baseSlug}${disambiguator}`; - if (this._bySlug.has(slug)) { - throw new Error(`Attempted to reuse slug ${slug}`); - } - entry.slug = slug; - this._bySlug.set(slug, entry); - } - } - } - - /** - * Group entries by FST lemma, elect one entry to be the lemma, and demote the - * rest to wordforms. - */ - determineLemmas() { - // Save locations for replacing with Wordform objects - const entryIndices = new Map, number>(); - for (let i = 0; i < this._entries.length; i++) { - const e = this._entries[i]; - if (e instanceof DictionaryEntry) { - entryIndices.set(e, i); - } - } - - // Group by lemma and lexical tags - const byFstLemmaAndLexicalTags = new DefaultMap< - string, - DictionaryEntry[] - >(() => Array()); - for (const e of this._entries) { - if (e instanceof DictionaryEntry) { - if (!e.analysis) { - continue; - } - const fstLemma = e.fstLemma ?? e.analysis[1]; - const lexicalTags = this._extractLexicalTags(e.analysis); - const key = JSON.stringify({ fstLemma, lexicalTags }); - byFstLemmaAndLexicalTags.getOrCreate(key).push(e); - } - } - - // replace non-lemmas with wordform referring to lemmas - for (const [key, entries] of byFstLemmaAndLexicalTags.entries()) { - const { fstLemma } = JSON.parse(key); - const lemmaEntry = minBy(entries, (e) => - stringDistance(fstLemma, e.head!) - ); - assert(lemmaEntry); - - for (const e of entries) { - if (e === lemmaEntry) { - continue; - } - const wordform = new Wordform(); - wordform.head = e.head; - wordform.analysis = e.analysis; - wordform.senses = e.senses; - wordform.formOf = lemmaEntry; - - const index = entryIndices.get(e); - assert(index); - entryIndices.delete(e); - this._entries[index] = wordform; - } - } - } - - /** - * Return the set of lexical tags, suitable for use as a lookup key. - */ - private _extractLexicalTags(analysis: Analysis) { - const tags = [...analysis[0], ...analysis[2]]; - const ret = []; - for (const t of tags) { - if (this.lexicalTags.has(t)) { - ret.push(t); - } - } - return [...new Set(ret)].sort(); - } - - getOrCreate({ text, slug }: { text: string; slug?: string }) { - assert(text); - - if (slug) { - const existing = this._bySlug.get(slug); - if (existing) { - assert(existing.head === text); - return existing; - } - } else { - const candidates = this._byText.get(text); - // Passing different slugs may have created multiple entries with the same - // text. In that case it’s an error to call getOrCreate and pass only text - // and not a slug. - assert(!candidates || candidates.length <= 1); - if (candidates?.length === 1) { - return candidates[0]; - } - } - - const entry = this.create(text); - if (slug) { - entry.slug = slug; - this._bySlug.set(slug, entry); - } - - return entry; - } - - /** - * Create a new entry. Useful for creating homographs. - */ - create(text: string) { - assert(text); - - const entry = new DictionaryEntry(); - - // This happens for a couple of entries in the Tsuut’ina “Vocabulary” - // spreadsheet input. - if (/^\p{Combining_Mark}/u.test(text.normalize())) { - console.log( - `Warning: ${JSON.stringify(text)} begins with a combining character` - ); - } - - entry.head = text; - let currentByTextList = this._byText.get(text); - if (currentByTextList) { - currentByTextList.push(entry); - } else { - this._byText.set(text, [entry]); - } - - this._entries.push(entry); - - return entry; - } - - addWordform(form: Wordform) { - assert(form instanceof Wordform); - this._entries.push(form); - } - - /** - * Remove an entry, and all linked wordforms, from the dictionary. - */ - remove(entry: DictionaryEntry | Wordform) { - remove(this._entries, (e) => e === entry); - - if (entry instanceof DictionaryEntry) { - remove(this._byText.get(entry.head!)!, (e) => e === entry); - if (entry.slug) { - this._bySlug.delete(entry.slug); - } - const formsToRemove = this._entries.filter( - (e) => e instanceof Wordform && e.formOf === entry - ); - for (const f of formsToRemove) { - this.remove(f); - } - } - } - - /** - * Assign slugs, determine lemmas, and return a prettified JSON string for the - * dictionary as a whole. - */ - assemble({ pretty = true, lemmatize = true } = {}) { - if (lemmatize) { - this.determineLemmas(); - } - this.assignSlugs(); - - let entriesToExport: ImportJsonJsonEntry[] = []; - for (const e of this._entries) { - if (!e.senses || e.senses.length === 0) { - console.log(`Warning: no definitions for ${JSON.stringify(e)}`); - e.senses = [{ definition: "?", sources: ["OS"] }]; - } - - if (e instanceof Wordform) { - const { head, analysis, senses } = e; - assert(head); - assert(analysis); - const formOf = e.formOf!.slug; - assert(formOf); - entriesToExport.push({ - head, - analysis, - senses: senses ?? [], - formOf, - }); - } else { - entriesToExport.push(e as ExportableDictionaryEntry); - } - } - - entriesToExport = sortBy(entriesToExport, entryKeyBySlugThenText); - - if (pretty) { - return makePrettierJson(entriesToExport); - } else { - return jsonStableStringify(entriesToExport, { space: 2 }); - } - } - - get lexicalTags() { - if (!this._lexicalTags) { - throw new Error( - "Attempted to use lexical tags on dictionary not configured with any" - ); - } - return this._lexicalTags; - } - - [Symbol.iterator]() { - return this._entries[Symbol.iterator](); - } -} - -// If you change how this sort works, you should change the matching -// entry_sort_key function written in Python as well. -function entryKeyBySlugThenText(entry: ImportJsonJsonEntry) { - let slug: string; - let form: string; - - if ("slug" in entry) { - assert(entry.slug); - slug = entry.slug; - form = ""; - } else if ("formOf" in entry) { - slug = entry.formOf; - form = entry.head; - } else { - assert(false); - } - - slug = slug!.normalize("NFD"); - form = form!.normalize("NFD"); - - return [slug, form]; -} diff --git a/munge/shared/run-helper.js b/munge/shared/run-helper.js deleted file mode 100644 index fb386ca2f..000000000 --- a/munge/shared/run-helper.js +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env node - -const { spawn } = require("child_process"); -const { dirname, join } = require("path"); - -/** - * Run targetScript, relative to the directory containing the ‘main’ node - * script, with run-time transpilation set up. - * - * Written in JS for windows compatibility; otherwise it would be a short - * shell script that did `exec node -r sucrase/ts/register foo.ts`. - */ -function runHelper(targetScript) { - const argv = process.argv.slice(); - const nodeExecutable = argv.shift(); - const thisScript = argv.shift(); - - const targetPath = join(dirname(thisScript), targetScript); - - let child; - - // There’s no exec()-style system call to replace the current process with - // a new one in Windows, so we have to spawn a new process and wait for it - // to exit. - child = spawn( - nodeExecutable, - ["-r", "sucrase/register/ts", targetPath, ...argv], - { - stdio: "inherit", - } - ); - child.on("error", function (e) { - throw e; - }); - child.on("exit", function (code, signal) { - if (code) { - process.exit(code); - } - if (signal) { - process.exit(1); - } - }); -} - -module.exports = { runHelper }; diff --git a/munge/shared/slug-disambiguator-test.ts b/munge/shared/slug-disambiguator-test.ts deleted file mode 100644 index 7e4380d07..000000000 --- a/munge/shared/slug-disambiguator-test.ts +++ /dev/null @@ -1,67 +0,0 @@ -import { expect } from "chai"; -import { disambiguateSlugs } from "./slug-disambiguator"; - -function stringify(strings: string[]) { - return strings.map((s) => JSON.stringify(s)).join(", "); -} - -describe("disambiguateSlugs", function () { - for (const [inputs, expectedOutputs] of [ - [["XYZ"], [""]], - [ - ["V", "N", "N"], - ["@v", "@n.1", "@n.2"], - ], - [ - ["PR", "VTI"], - ["@p", "@v"], - ], - [ - ["PR", "PrA"], - ["@pr", "@pra"], - ], - [ - ["PrA", "PrI"], - ["@pra", "@pri"], - ], - [ - ["NA", "VTI"], - ["@n", "@v"], - ], - [ - ["NA", "VAI", "VTI"], - ["@n", "@vai", "@vti"], - ], - [ - ["NA", "NI"], - ["@na", "@ni"], - ], - [ - ["NA", "NA"], - ["@na.1", "@na.2"], - ], - [ - ["NA-1", "NA-2"], - ["@na-1", "@na-2"], - ], - [ - ["VTI", "NA-1", "NA-2"], - ["@v", "@na-1", "@na-2"], - ], - // we may not have anything to disambiguate the item by - [ - ["", "", ""], - ["@1", "@2", "@3"], - ], - [ - ["NA-1", ""], - ["@n", "@1"], - ], - ] as [string[], string[]][]) { - it(`returns ${stringify(expectedOutputs)} for ${stringify( - inputs - )}`, function () { - expect(disambiguateSlugs(inputs)).to.eql(expectedOutputs); - }); - } -}); diff --git a/munge/shared/slug-disambiguator.ts b/munge/shared/slug-disambiguator.ts deleted file mode 100644 index 995cf7305..000000000 --- a/munge/shared/slug-disambiguator.ts +++ /dev/null @@ -1,138 +0,0 @@ -import assert from "assert"; -import { groupBy } from "lodash"; - -/** - * - * Return a unique list of short strings to disambiguate items. - * - * If there is only one input class, then no disambiguator is needed: - * - * > disambiguateSlugs(["NAI-1"]) - * [''] - * - * But if there are multiple input classes, e.g., for nêwokâtêw, then - * return a unique disambiguator for each input: - * - * > disambiguateSlugs(["VAI-1", "NA-2", "VII-2v"]) - * ['@vai', '@n', '@vii'] - * - * See the unit tests for more examples. - * """ - * - */ -export function disambiguateSlugs(inputs: string[]) { - inputs = inputs.map((i) => i.toLowerCase()); - assert(inputs.length >= 1); - if (inputs.length === 1) { - return [""]; - } - const disambiguator = new SlugDisambiguator(inputs); - disambiguator.disambiguate(); - return disambiguator.results().map((r) => `@${r.disambiguator}`); -} - -/** - * Non-exported class which handles actual disambiguation - */ -class SlugDisambiguator { - private _items: InputItem[]; - private _uniqueKeys: Set; - - constructor(inputs: string[]) { - this._items = []; - for (let i = 0; i < inputs.length; i++) { - this._items.push(new InputItem(i, inputs[i])); - } - this._uniqueKeys = new Set(); - } - - unassignedItems() { - return this._items.filter((x) => !x.assigned); - } - - isDone() { - return this.unassignedItems().length === 0; - } - - private usingKeyGroups(keyFunc: (s: string) => string) { - const groups = groupBy(this.unassignedItems(), (i) => keyFunc(i.value)); - for (const [key, items] of Object.entries(groups)) { - if (key && items.length === 1) { - this.assign(items[0], key); - } - } - } - - private usingEnumeration() { - for (const [ix, item] of this.unassignedItems().entries()) { - this.assign(item, item.value ? `${item.value}.${ix + 1}` : `${ix + 1}`); - } - } - - disambiguate() { - function generalWordClass(inflectionalCategory: string) { - if (!inflectionalCategory) { - return ""; - } - return inflectionalCategory[0]; - } - - function specificWordClass(inflectionalCategory: string) { - return inflectionalCategory.split("-")[0]; - } - - function inflectionalCategory(inflectionalCategory: string) { - return inflectionalCategory; - } - - for (const method of [ - () => this.usingKeyGroups(generalWordClass), - () => this.usingKeyGroups(specificWordClass), - () => this.usingKeyGroups(inflectionalCategory), - () => this.usingEnumeration(), - ]) { - method(); - if (this.isDone()) { - return; - } - } - throw new Error( - `Unable to disambiguate inputs ${JSON.stringify( - this._items.map((i) => i.value) - )}` - ); - } - - assign(input: InputItem, key: string) { - if (this._uniqueKeys.has(key)) { - throw new Error(`attempt to re-use key ${key}`); - } - this._uniqueKeys.add(key); - input.disambiguator = key; - } - - results() { - assert(this.isDone()); - return this._items; - } -} - -/** - * Wrapper class to hold input items with disambiguator values. Among other - * reasons to use a class instead of a plain array is that we may get duplicate - * inputs which need to be tracked separately. - */ -class InputItem { - index: number; - value: string; - disambiguator?: string; - - constructor(index: number, value: string) { - this.index = index; - this.value = value; - } - - get assigned() { - return !!this.disambiguator; - } -} diff --git a/munge/shared/util-test.ts b/munge/shared/util-test.ts deleted file mode 100644 index 75c545f9a..000000000 --- a/munge/shared/util-test.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { describe } from "mocha"; -import { expect } from "chai"; -import { stringDistance } from "./util"; - -describe("editDistance", function () { - for (const [a, b, dist] of [ - ["a", "a", 0], - ["ax", "a", 1], - ["â", "a", 0.2], - ["foo", "bar", 3], - ["foo", "fojjjo", 3], - // test twiddle - ["hello world", "hello wordl", 1], - ["Hello", "hello", 0.2], - ] as [string, string, number][]) { - it(`returns edit distance ${dist} for ${JSON.stringify( - a - )} vs ${JSON.stringify(b)}`, function () { - expect(stringDistance(a, b)).to.equal(dist); - }); - } -}); diff --git a/munge/shared/util.ts b/munge/shared/util.ts deleted file mode 100644 index f266710af..000000000 --- a/munge/shared/util.ts +++ /dev/null @@ -1,193 +0,0 @@ -import assert from "assert"; -import { readFile } from "fs/promises"; -import { resolve as resolvePath } from "path"; -import jsonStableStringify from "json-stable-stringify"; -import prettier from "prettier"; - -/** - * Run the input through the prettier tool. - * - * If the input is a string, it must be valid JSON; otherwise the input object - * will be converted to JSON. - */ -export function makePrettierJson(data: unknown) { - // Assume strings already contain JSON; otherwise, stringify - if (typeof data !== "string") { - data = jsonStableStringify(data, { space: 2 }); - } - assert(typeof data === "string"); - return prettier.format(data, { - parser: "json", - }); -} - -/** - * Return array of corresponding pairs of items from two input arrays. - * - * Concept borrowed from the python function of the same name. - * https://docs.python.org/3/library/functions.html#zip - * - * zip([k1, k2, k3], [v1, v2, v3]) ⇒ [[k1, v1], [k2, v2], [k3, v3]] - */ -export function zip(array1: T1[], array2: T2[]): [T1, T2][] { - assert(array1.length === array2.length); - const ret = Array(array1.length); - for (let i = 0; i < array1.length; i++) { - ret[i] = [array1[i], array2[i]]; - } - return ret; -} - -/** - * Load a TSV file containing a header row, and return an array consisting of - * one {$header: value, …} object for each row. - */ -export async function loadTsvFile(path: string) { - const contents = await readFile(path, "utf-8"); - const lines = contents.split("\n"); - const header = lines.shift()!.split("\t"); - const ret = []; - for (const line of lines) { - if (line) { - ret.push(Object.fromEntries(zip(header, line.split("\t")))); - } - } - return ret; -} - -type DefaultValueProvider = (key: K) => V; - -/** - * A subclass of Map that automatically sets a default value when get(key) is - * given a key not already present. - */ -export class DefaultMap extends Map { - // Using `#`syntax to hide function value from console.log() - readonly #defaultValueProvider: DefaultValueProvider; - - constructor(defaultValueProvider: DefaultValueProvider) { - super(); - this.#defaultValueProvider = defaultValueProvider; - } - - getOrCreate(key: K): V { - if (!super.has(key)) { - const v = this.#defaultValueProvider(key); - super.set(key, v); - return v; - } - return super.get(key)!; - } -} - -export class Counter extends DefaultMap { - constructor() { - super(() => 0); - } - - increment(key: K, amount = 1): void { - const oldValue = this.getOrCreate(key); - const newValue = oldValue + amount; - this.set(key, newValue); - } -} - -// finds non-combining character followed by combining character -const combiningRegExp = /(?\P{Mark})(?\p{Mark}+)/gu; - -/** - * Return a version of the string without diacritics or other ornamentation; - * useful for search indexing, fuzzy matching, edit distance computations, and - * so on. - */ -function toBaseCharacters(s: string) { - // remove combining characters - s = s.normalize("NFD").replace(combiningRegExp, "$1"); - s = [...s] - .map((c) => { - switch (c) { - // Characters without combining decompositions. You could partly - // automate building a table of these by looking for unicode characters - // named “LATIN (SMALL|CAPITAL) LETTER X WITH ___” - // - // But as long as you’re including all the characters actually occurring - // in your source data, you’ll be fine. - case "ł": - return "l"; - case "Ł": - return "L"; - case "ɫ": - return "l"; - case "Ɫ": - return "l"; - default: - return c; - } - }) - .join(""); - return s; -} - -/** - * Return a measure of how close two strings are to each another. - * - * Algorithm is similar to edit distance; see CLRS “Introduction to Algorithms” second edition, Problem 15-3. - */ -export function stringDistance(a: string, b: string) { - const m = a.length; - const n = b.length; - const dist = Array(m + 1); - for (let i = 0; i <= m; i++) { - dist[i] = Array(n + 1); - } - - for (let i = 0; i <= m; i++) { - for (let j = 0; j <= n; j++) { - if (i === 0) { - // Edit distance between empty string and string of length j is j - dist[i][j] = j; - } else if (j === 0) { - dist[i][j] = i; - } else { - let c = a.charAt(i - 1); - let d = b.charAt(j - 1); - - let thisDist; - - if (c == d) { - // exact match - thisDist = 0; - dist[i][j] = thisDist + dist[i - 1][j - 1]; - } else if ( - // close match - toBaseCharacters(c).toLowerCase() === - toBaseCharacters(d).toLowerCase() - ) { - thisDist = 0.2; - dist[i][j] = thisDist + dist[i - 1][j - 1]; - } else if ( - i >= 2 && - j >= 2 && - c === b.charAt(j - 2) && - d === a.charAt(i - 2) - ) { - // twiddle aka transposition - thisDist = 1; - dist[i][j] = thisDist + dist[i - 2][j - 2]; - } else { - // no match; take the lowest edit distance possible by skipping a char - // in one or both input strings - thisDist = 1; - dist[i][j] = - thisDist + - Math.min(dist[i - 1][j], dist[i][j - 1], dist[i - 1][j - 1]); - } - } - } - } - return dist[m][n]; -} - -export function resourceDir(languagePair: string) { - return resolvePath(__dirname, "..", "..", "src", languagePair, "resources"); -} diff --git a/munge/srseng/run.js b/munge/srseng/run.js deleted file mode 100755 index 0bd3400cd..000000000 --- a/munge/srseng/run.js +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env node - -/* - * A launcher script for toimportjson.ts that sets up run-time transpilation. - */ - -const { runHelper } = require("../shared/run-helper"); - -runHelper("toimportjson.ts"); diff --git a/munge/srseng/testdata/OS-Vocabulary sample.tsv b/munge/srseng/testdata/OS-Vocabulary sample.tsv deleted file mode 100644 index 2d66606f9..000000000 --- a/munge/srseng/testdata/OS-Vocabulary sample.tsv +++ /dev/null @@ -1,5 +0,0 @@ -Folio ID Original order on folio Part of speech Bruce - Tsuut'ina text Bruce - English text Bruce - Notes on entry Tsuut'ina headword English headword English reversal Topic Alternative Topic In lexical database? Editors - Questions for Bruce Editors - Other questions and comments Sapir - Tsuut'ina transcription Sapir - Tsuut'ina expansion Sapir - English transcription Sapir - English expansion Sapir - Notes on entries Sapir - Other notes on page Tsuut'ina item ungrammatical? Tsuut'ina item no longer in use? Not for school edition? OLD - Praat pitch measurements OLD - Musical tone notation OLD - Unparsed text -OS_I-37b os01941 dītł'á he/she/it will run Dì∙ⁱtɫ'áᶜ 3. [EMPTY] [he'll run] 1 1 1 1 -OS_I-37b os01943 dāstł'á youᵖˡ∙ will run Dāstɫ'āᶜ 2. [EMPTY] [ye'll run] 1 1 1 1 -OS_I-37b os01946 dàdāàtł'á we each and every one will run 1 1 1 1 -OS_I-37b os01947 dàdāstł'á youᵖᴵ∙ each and every one will run Dāstɫ'āᶜ 2. youᵖᴵ∙ will run 1 1 1 1 diff --git a/munge/srseng/toimportjson-test.ts b/munge/srseng/toimportjson-test.ts deleted file mode 100644 index 14f26716d..000000000 --- a/munge/srseng/toimportjson-test.ts +++ /dev/null @@ -1,61 +0,0 @@ -import { expect } from "chai"; -import { munge } from "./toimportjson"; -import { loadTsvFile } from "../shared/util"; -import { join as joinPath } from "path"; - -describe("srseng toimportjson", function () { - it("works on a basic input", async function () { - const sample = await loadTsvFile( - joinPath(__dirname, "testdata", "OS-Vocabulary sample.tsv") - ); - - const munged = munge(sample); - expect(JSON.parse(munged)).to.eql([ - { - head: "dītł'á", - senses: [ - { - definition: "he/she/it will run", - sources: ["OS"], - }, - ], - analysis: [[], "dītł'á", ["+V", "+I", "+Ipfv", "+SbjSg3"]], - paradigm: "VI", - slug: "dītł'á", - }, - { - head: "dàdāàtł'á", - analysis: [[], "dītł'á", ["+V", "+I", "+Ipfv", "+SbjPl1", "+Distr"]], - senses: [ - { - definition: "we each and every one will run", - sources: ["OS"], - }, - ], - formOf: "dītł'á", - }, - { - head: "dàdāstł'á", - analysis: [[], "dītł'á", ["+V", "+I", "+Ipfv", "+SbjPl2", "+Distr"]], - senses: [ - { - definition: "youᵖᴵ∙ each and every one will run", - sources: ["OS"], - }, - ], - formOf: "dītł'á", - }, - { - head: "dāstł'á", - analysis: [[], "dītł'á", ["+V", "+I", "+Ipfv", "+SbjPl2"]], - senses: [ - { - definition: "youᵖˡ∙ will run", - sources: ["OS"], - }, - ], - formOf: "dītł'á", - }, - ]); - }); -}); diff --git a/munge/srseng/toimportjson.ts b/munge/srseng/toimportjson.ts deleted file mode 100644 index 62079e862..000000000 --- a/munge/srseng/toimportjson.ts +++ /dev/null @@ -1,166 +0,0 @@ -import { join as joinPath } from "path"; -import { writeFile } from "fs/promises"; -import { difference, flatten, intersection, isEqual, min } from "lodash"; -import { execIfMain } from "execifmain"; -import { Command } from "commander"; -import { Transducer } from "hfstol"; -import { loadTsvFile, resourceDir } from "../shared/util"; -import { Analysis, Dictionary } from "../shared/dictionary"; - -const RESOURCE_DIR = resourceDir("srseng"); - -const DICTIONARY_DIR = joinPath(RESOURCE_DIR, "dictionary"); -const FST_DIR = joinPath(RESOURCE_DIR, "fst"); - -const analyzer = new Transducer(joinPath(FST_DIR, "analyser-gt-norm.hfstol")); - -function tagCount(analysis: Analysis) { - return analysis[0].length + analysis[2].length; -} - -/** - * Like Array.findIndex, except: - * - uses lodash isEqual, so can compare arrays and such - * - returns undefined if not found, or multiple matches exist - */ -function findEqualUniqueIndex(array: T[], target: T) { - let matchIndex = null; - for (let i = 0; i < array.length; i++) { - if (isEqual(array[i], target)) { - if (matchIndex !== null) { - // duplicate - return null; - } - matchIndex = i; - } - } - return matchIndex; -} - -/** - * If a single entry matches the tiebreaker rules, return it, otherwise return - * null. - */ -function doTieBreaking(analyses: Analysis[]): Analysis | null { - // Tsuut’ina-specific rules to break ties when there are multiple analyses - - const flattenedAnalyses = analyses.map(flatten); - const commonTags = intersection(...flattenedAnalyses); - const tags = flattenedAnalyses.map((e) => difference(e, commonTags)); - - // Current rule: for each list of tags below, in order, try to see if there is - // exactly one analysis that has all the same tags as the other analyses, plus - // the ones given here. If so, choose it. - // - // XXX: Andrew put these in to try to get *something* working. A linguist - // needs to replace them. - for (const tieBreaker of [ - ["+Ipfv"], - ["+DObjSg3"], - ["+IObjSg3"], - ["+SbjSg1", "+DObjSg3"], - ["+SbjSg3"], - ["+SbjSg1"], - ]) { - const index = findEqualUniqueIndex(tags, tieBreaker); - if (index != null) { - const ret = analyses[index]; - return ret; - } - } - - return null; -} - -async function main() { - const program = new Command(); - program - .option( - "--input-tsv ", - "The original source dictionary to use, in TSV format", - `${DICTIONARY_DIR}/Onespot-Sapir - Vocabulary list - OS-Vocabulary.tsv` - ) - .option( - "--output-file ", - "Where to write the generated importjson file", - `${DICTIONARY_DIR}/srseng_dictionary.importjson` - ); - - program.parse(); - - const options = program.opts(); - - const inputTsv = await loadTsvFile(options.inputTsv); - - const assembled = munge(inputTsv); - await writeFile(options.outputFile, assembled); -} - -export function munge(inputTsv: { [key: string]: string }[]) { - const dictionary = new Dictionary(["+V", "+T", "+I", "+D"]); - let previousHead = ""; - - for (const row of inputTsv) { - let head = row["Bruce - Tsuut'ina text"].normalize("NFC"); - if (!head && !previousHead) { - continue; - } - if (!head) { - head = previousHead; - } - - const definition = row["Bruce - English text"]; - - if (!definition) { - console.log(`Warning: no definition for row with head ${head}`); - } - - const entry = dictionary.getOrCreate({ text: head }); - entry.addDefinition(definition, ["OS"]); - - const analyses = analyzer.lookup_lemma_with_affixes(head); - - let analysis; - if (analyses.length > 1) { - // multiple analyses; start by taking minimum tag count - const minTagCount = min(analyses.map((e) => tagCount(e))); - const withMinTagCount = analyses.filter( - (e) => tagCount(e) === minTagCount - ); - if (withMinTagCount.length === 1) { - analysis = withMinTagCount[0]; - } else { - const tiebreaker = doTieBreaking(withMinTagCount); - if (tiebreaker) { - analysis = tiebreaker; - } else { - console.log( - `Multiple analyses for ${head}; ${JSON.stringify(analyses)}` - ); - } - } - } else if (analyses.length === 1) { - analysis = analyses[0]; - } - - if (analysis) { - let [_prefixTags, _lemma, suffixTags] = analysis; - entry.analysis = analyses[0]; - - if (suffixTags.includes("+V") && suffixTags.includes("+I")) { - entry.paradigm = "VI"; - } else if (suffixTags.includes("+V") && suffixTags.includes("+T")) { - entry.paradigm = "VT"; - } else if (suffixTags.includes("+V") && suffixTags.includes("+D")) { - entry.paradigm = "VD"; - } - // otherwise we don’t know what the paradigm is, so don’t set anything - // here. - } - - previousHead = head; - } - return dictionary.assemble(); -} - -execIfMain(main, module); diff --git a/munge/tsconfig.json b/munge/tsconfig.json deleted file mode 100644 index e67cff134..000000000 --- a/munge/tsconfig.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "compilerOptions": { - "moduleResolution": "Node", - "esModuleInterop": true, - "noImplicitAny": true, - "alwaysStrict": true, - "strict": true, - "noEmit": true, - "lib": ["ES2020"], - "target": "esnext", - "noUnusedLocals": true, - "typeRoots": ["./node_modules/@types"] - } -}