diff --git a/docs/user_guide/assets/licenses/frontend_licenses.txt b/docs/user_guide/assets/licenses/frontend_licenses.txt index 2782dbbba2..f0134c894a 100644 --- a/docs/user_guide/assets/licenses/frontend_licenses.txt +++ b/docs/user_guide/assets/licenses/frontend_licenses.txt @@ -42671,6 +42671,31 @@ This library is a fork of 'better-json-errors' by Kat Marchán, extended and distributed under the terms of the MIT license above. +levenshtein-search 0.1.2 +MIT +MIT License + +Copyright (c) 2018 Tal Einat + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + lines-and-columns 1.2.4 MIT The MIT License (MIT) diff --git a/docs/user_guide/docs/goals.es.md b/docs/user_guide/docs/goals.es.md index a417401426..fc2343e32f 100644 --- a/docs/user_guide/docs/goals.es.md +++ b/docs/user_guide/docs/goals.es.md @@ -27,7 +27,8 @@ There are icons at the top of each column to ![Review Entries column sort icon](../images/reviewEntriesColumnSort.png){width=20} sort the data. In a column with predominantly text content (Vernacular, Glosses, Note, or Flag), you can sort alphabetically or filter -with a text search. +with a text search. By default, the text search is a fuzzy match: it is not case sensitive and it allows for one or two +typos. If you want exact text matches, use quotes around your filter. In the Number of Senses column or Pronunciations column, you can sort or filter by the number of senses or recordings that entries have. In the Pronunciations column, you can also filter by speaker name. diff --git a/docs/user_guide/docs/goals.md b/docs/user_guide/docs/goals.md index fc08a36fe5..b7ff8e3051 100644 --- a/docs/user_guide/docs/goals.md +++ b/docs/user_guide/docs/goals.md @@ -26,7 +26,8 @@ There are icons at the top of each column to ![Review Entries column sort icon](images/reviewEntriesColumnSort.png){width=20} sort the data. In a column with predominantly text content (Vernacular, Glosses, Note, or Flag), you can sort alphabetically or filter -with a text search. +with a text search. By default, the text search is a fuzzy match: it is not case sensitive and it allows for one or two +typos. If you want exact text matches, use quotes around your filter. In the Number of Senses column or Pronunciations column, you can sort or filter by the number of senses or recordings that entries have. In the Pronunciations column, you can also filter by speaker name. diff --git a/docs/user_guide/docs/goals.zh.md b/docs/user_guide/docs/goals.zh.md index a9a17ddcd4..b5cb930de0 100644 --- a/docs/user_guide/docs/goals.zh.md +++ b/docs/user_guide/docs/goals.zh.md @@ -26,7 +26,8 @@ There are icons at the top of each column to ![Review Entries column sort icon](../images/reviewEntriesColumnSort.png){width=20} sort the data. In a column with predominantly text content (Vernacular, Glosses, Note, or Flag), you can sort alphabetically or filter -with a text search. +with a text search. By default, the text search is a fuzzy match: it is not case sensitive and it allows for one or two +typos. If you want exact text matches, use quotes around your filter. In the Number of Senses column or Pronunciations column, you can sort or filter by the number of senses or recordings that entries have. In the Pronunciations column, you can also filter by speaker name. diff --git a/package-lock.json b/package-lock.json index 467c489958..e9845f39a9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -31,6 +31,7 @@ "i18next-browser-languagedetector": "^8.0.0", "i18next-http-backend": "^2.6.0", "js-base64": "^3.7.7", + "levenshtein-search": "^0.1.2", "make-dir": "^4.0.0", "material-react-table": "^2.9.2", "motion": "^10.16.2", @@ -17255,6 +17256,12 @@ "node": ">=6" } }, + "node_modules/levenshtein-search": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/levenshtein-search/-/levenshtein-search-0.1.2.tgz", + "integrity": "sha512-MEwjuzHgVaibXrmH1Kh0maHxLPtkWy+tKaWXa4o3eASKE4rX+vKBnh+ektEIoDm8s4HwOk1cVxlNAdSABi+YlA==", + "license": "MIT" + }, "node_modules/levn": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", diff --git a/package.json b/package.json index 8936b2d427..4d25803aa7 100644 --- a/package.json +++ b/package.json @@ -59,6 +59,7 @@ "i18next-browser-languagedetector": "^8.0.0", "i18next-http-backend": "^2.6.0", "js-base64": "^3.7.7", + "levenshtein-search": "^0.1.2", "make-dir": "^4.0.0", "material-react-table": "^2.9.2", "motion": "^10.16.2", diff --git a/src/goals/ReviewEntries/ReviewEntriesTable/filterFn.ts b/src/goals/ReviewEntries/ReviewEntriesTable/filterFn.ts index 42f7f7ae26..b400d51fb5 100644 --- a/src/goals/ReviewEntries/ReviewEntriesTable/filterFn.ts +++ b/src/goals/ReviewEntries/ReviewEntriesTable/filterFn.ts @@ -10,18 +10,58 @@ import { } from "api/models"; import { type Hash } from "types/hash"; +// eslint-disable-next-line @typescript-eslint/no-var-requires +const { fuzzySearch } = require("levenshtein-search"); + +/** Checks if string starts and ends with quote marks. + * For simplicity, allows mismatched quote types. */ +export function isQuoted(filter: string): boolean { + return /^["'\p{Pi}].*["'\p{Pf}]$/u.test(filter); +} + +/** Number of typos allowed, depending on filter-length. */ +function levDist(len: number): number { + return len < 3 ? 0 : len < 6 ? 1 : 2; +} + +/** Checks if value contains a substring that fuzzy-matches the filter. */ +export function fuzzyContains(value: string, filter: string): boolean { + filter = filter.toLowerCase(); + value = value.toLowerCase(); + // `fuzzySearch(...)` returns a generator; + // `.next()` on a generator always returns an object with boolean property `done` + return !fuzzySearch(filter, value, levDist(filter.length)).next().done; +} + +/** Check if string matches filter. + * If filter quoted, exact match. Otherwise, fuzzy match. */ +export function matchesFilter(value: string, filter: string): boolean { + filter = filter.trim(); + return isQuoted(filter) + ? value.includes(filter.substring(1, filter.length - 1).trim()) + : fuzzyContains(value, filter); +} + /* Custom `filterFn` functions for `MaterialReactTable` columns. * (Can always assume that `filterValue` will be truthy.) */ -/** Requires the accessor return type to be `Dictionary[]`. */ +/** Requires the accessor return type to be `string`. */ +export const filterFnString: MRT_FilterFn = ( + row, + id, + filterValue: string +) => { + return matchesFilter(row.getValue(id), filterValue); +}; + +/** Requires the accessor return type to be `Definition[]`. */ export const filterFnDefinitions: MRT_FilterFn = ( row, id, filterValue: string ) => { const definitions = row.getValue(id); - const filter = filterValue.trim().toLowerCase(); - return definitions.some((d) => d.text.toLowerCase().includes(filter)); + return definitions.some((d) => matchesFilter(d.text, filterValue)); }; /** Requires the accessor return type to be `Gloss[]`. */ @@ -31,8 +71,7 @@ export const filterFnGlosses: MRT_FilterFn = ( filterValue: string ) => { const glosses = row.getValue(id); - const filter = filterValue.trim().toLowerCase(); - return glosses.some((g) => g.def.toLowerCase().includes(filter)); + return glosses.some((g) => matchesFilter(g.def, filterValue)); }; /** Requires the accessor return type to be `SemanticDomain[]`. */ @@ -79,10 +118,15 @@ export const filterFnPronunciations = /* Match either number of pronunciations or a speaker name. * (Whitespace will match all audio, even without a speaker.) */ const audio = row.getValue(id); - const filter = filterValue.trim().toLocaleLowerCase(); + const filter = filterValue.trim(); return ( + (audio.length && !filter) || audio.length === parseInt(filter) || - audio.some((p) => !filter || speakers[p.speakerId]?.includes(filter)) + audio.some( + (p) => + p.speakerId in speakers && + matchesFilter(speakers[p.speakerId], filter) + ) ); }; @@ -97,6 +141,5 @@ export const filterFnFlag: MRT_FilterFn = ( // A filter has been typed and the word isn't flagged return false; } - const filter = filterValue.trim().toLowerCase(); - return flag.text.toLowerCase().includes(filter); + return matchesFilter(flag.text, filterValue); }; diff --git a/src/goals/ReviewEntries/ReviewEntriesTable/index.tsx b/src/goals/ReviewEntries/ReviewEntriesTable/index.tsx index 5bfd9b55a5..c4ffde3060 100644 --- a/src/goals/ReviewEntries/ReviewEntriesTable/index.tsx +++ b/src/goals/ReviewEntries/ReviewEntriesTable/index.tsx @@ -221,6 +221,7 @@ export default function ReviewEntriesTable(props: { Cell: ({ row }: CellProps) => , enableColumnOrdering: false, enableHiding: false, + filterFn: ff.filterFnString, header: t("reviewEntries.columns.vernacular"), id: ColumnId.Vernacular, size: BaselineColumnSize - 40, @@ -327,6 +328,7 @@ export default function ReviewEntriesTable(props: { // Note column columnHelper.accessor((w) => w.note.text || undefined, { Cell: ({ row }: CellProps) => , + filterFn: ff.filterFnString, header: t("reviewEntries.columns.note"), id: ColumnId.Note, size: BaselineColumnSize - 40, diff --git a/src/goals/ReviewEntries/ReviewEntriesTable/tests/filterFn.test.ts b/src/goals/ReviewEntries/ReviewEntriesTable/tests/filterFn.test.ts index 28731cc322..0e982664be 100644 --- a/src/goals/ReviewEntries/ReviewEntriesTable/tests/filterFn.test.ts +++ b/src/goals/ReviewEntries/ReviewEntriesTable/tests/filterFn.test.ts @@ -7,22 +7,136 @@ const mockId = "id"; const mockRow = { getValue: mockGetValue }; describe("filterFn", () => { + describe("isQuoted", () => { + const quotedStrings = [ + "'Single quotes'", + '"Double quotes"', + "“Angled quotes”", + "‹Single-bracket quotes›", + "«Double-bracket quotes»", + ]; + test("With quotes", () => { + quotedStrings.forEach((s) => expect(ff.isQuoted(s)).toBeTruthy()); + }); + + const unquotedStrings = [ + "", + "hi", + '"', + "'Single-quote start", + "“Angled-quote start", + "Angle-quote end”", + ]; + test("Without quotes", () => { + unquotedStrings.forEach((s) => expect(ff.isQuoted(s)).toBeFalsy()); + }); + }); + + describe("fuzzyContains", () => { + const testString = "I am a string with many possible substrings."; + + test("short: no typos allowed", () => { + ["i", "am", "a s"].forEach((s) => + expect(ff.fuzzyContains(testString, s)).toBeTruthy() + ); + ["@", "aq"].forEach((s) => + expect(ff.fuzzyContains(testString, s)).toBeFalsy() + ); + }); + + test("medium: 1 typo allowed", () => { + ["i b", "ama", "strim"].forEach((s) => + expect(ff.fuzzyContains(testString, s)).toBeTruthy() + ); + ["i'm", "astrr"].forEach((s) => + expect(ff.fuzzyContains(testString, s)).toBeFalsy() + ); + }); + + test("long: 2 typos allowed", () => { + ["i'm a string", "with man88"].forEach((s) => + expect(ff.fuzzyContains(testString, s)).toBeTruthy() + ); + ["i'm a ztring", "with man888"].forEach((s) => + expect(ff.fuzzyContains(testString, s)).toBeFalsy() + ); + }); + }); + + describe("matchesFilter", () => { + const value = "Hello world!"; + const filterWithTypo = "H3llo"; + const filterWrongCase = "HELLO"; + const filterExact = "Hello"; + + it("unquoted: trims whitespace, fuzzy match", () => { + expect(ff.matchesFilter(value, "goodbye")).toBeFalsy(); + expect(ff.matchesFilter(value, ` ${filterWithTypo}`)).toBeTruthy(); + expect(ff.matchesFilter(value, `${filterWrongCase}\t`)).toBeTruthy(); + expect(ff.matchesFilter(value, `\t${filterExact} `)).toBeTruthy(); + }); + + it("quoted: trims whitespace, exact match", () => { + expect(ff.matchesFilter(value, `"${filterWithTypo}"`)).toBeFalsy(); + expect(ff.matchesFilter(value, `"${filterWrongCase}"`)).toBeFalsy(); + expect(ff.matchesFilter(value, ` "\t${filterExact} "\n`)).toBeTruthy(); + }); + }); + + describe("filterFnString", () => { + const filterFn = ff.filterFnString as any; + beforeEach(() => { + mockGetValue.mockReturnValue("Hello world!"); + }); + + it("unquoted: trims whitespace, fuzzy match", () => { + expect(filterFn(mockRow, mockId, "goodbye")).toBeFalsy(); + expect(filterFn(mockRow, mockId, " H3LLO")).toBeTruthy(); + }); + + it("quoted: trims whitespace, exact match", () => { + expect(filterFn(mockRow, mockId, '"H3llo"')).toBeFalsy(); + expect(filterFn(mockRow, mockId, '"HELLO"')).toBeFalsy(); + expect(filterFn(mockRow, mockId, '" Hello"\n')).toBeTruthy(); + }); + }); + describe("filterFnDefinitions", () => { const filterFn = ff.filterFnDefinitions as any; - it("trims whitespace and isn't case sensitive", () => { + beforeEach(() => { mockGetValue.mockReturnValue([ newDefinition("hello"), newDefinition("WORLD"), ]); - expect(filterFn(mockRow, mockId, " WoRlD\t")).toBeTruthy(); + }); + + it("unquoted: trims whitespace, fuzzy match", () => { + expect(filterFn(mockRow, mockId, "earth")).toBeFalsy(); + expect(filterFn(mockRow, mockId, " wrld\t")).toBeTruthy(); + }); + + it("quoted: trims whitespace, exact match", () => { + expect(filterFn(mockRow, mockId, '"h3llo"')).toBeFalsy(); + expect(filterFn(mockRow, mockId, '"HELLO"')).toBeFalsy(); + expect(filterFn(mockRow, mockId, '" hello"\n')).toBeTruthy(); }); }); describe("filterFnGlosses", () => { const filterFn = ff.filterFnGlosses as any; - it("trims whitespace and isn't case sensitive", () => { + beforeEach(() => { mockGetValue.mockReturnValue([newGloss("hello"), newGloss("WORLD")]); - expect(filterFn(mockRow, mockId, " WoRlD\t")).toBeTruthy(); + }); + + it("unquoted: trims whitespace, fuzzy match", () => { + expect(filterFn(mockRow, mockId, "earth")).toBeFalsy(); + expect(filterFn(mockRow, mockId, " wrld\t")).toBeTruthy(); + }); + + it("quoted: trims whitespace, exact match", () => { + expect(filterFn(mockRow, mockId, '"h3llo"')).toBeFalsy(); + expect(filterFn(mockRow, mockId, '"HELLO"')).toBeFalsy(); + expect(filterFn(mockRow, mockId, '" hello"\n')).toBeTruthy(); }); }); @@ -73,31 +187,61 @@ describe("filterFn", () => { // filterFnPronunciations returns a filter function when given a speaker dictionary const filterFn = (ff.filterFnPronunciations as any)(speakers); - it("matches number of pronunciations", () => { + it("numeric: matches number of pronunciations", () => { mockGetValue.mockReturnValue([newPronunciation(), newPronunciation()]); expect(filterFn(mockRow, mockId, " 2")).toBeTruthy(); expect(filterFn(mockRow, mockId, "2.0")).toBeTruthy(); expect(filterFn(mockRow, mockId, "1")).toBeFalsy(); }); - it("matches speaker name", () => { + it("whitespace: matches any audio", () => { + mockGetValue.mockReturnValueOnce([]); + expect(filterFn(mockRow, mockId, " ")).toBeFalsy(); + mockGetValue.mockReturnValueOnce([newPronunciation()]); + expect(filterFn(mockRow, mockId, " ")).toBeTruthy(); + }); + + it("unquoted: fuzzy-matches speaker name", () => { mockGetValue.mockReturnValue([newPronunciation("filename", speakerId)]); expect(filterFn(mockRow, mockId, "2")).toBeTruthy(); expect(filterFn(mockRow, mockId, " NAME\t\t")).toBeTruthy(); + expect(filterFn(mockRow, mockId, "numb3r")).toBeTruthy(); expect(filterFn(mockRow, mockId, "other person")).toBeFalsy(); }); + + it("quoted: exact-matches speaker name", () => { + mockGetValue.mockReturnValue([newPronunciation("filename", speakerId)]); + expect(filterFn(mockRow, mockId, "'2'")).toBeTruthy(); + expect(filterFn(mockRow, mockId, "'NAME'")).toBeFalsy(); + expect(filterFn(mockRow, mockId, " '\tname ' \t")).toBeTruthy(); + }); }); describe("filterFnFlag", () => { const filterFn = ff.filterFnFlag as any; - it("trims whitespace and isn't case sensitive", () => { - mockGetValue.mockReturnValue(newFlag("hello, WORLD")); - expect(filterFn(mockRow, mockId, " WoRlD\t")).toBeTruthy(); + beforeEach(() => { + mockGetValue.mockReturnValue(newFlag("Hello world!")); + }); + + it("unquoted: trims whitespace, fuzzy match", () => { + expect(filterFn(mockRow, mockId, "goodbye")).toBeFalsy(); + expect(filterFn(mockRow, mockId, " H3LLO")).toBeTruthy(); + }); + + it("quoted: trims whitespace, exact match", () => { + expect(filterFn(mockRow, mockId, '"H3llo"')).toBeFalsy(); + expect(filterFn(mockRow, mockId, '"HELLO"')).toBeFalsy(); + expect(filterFn(mockRow, mockId, ' "\tHello "\n')).toBeTruthy(); }); it("doesn't match if flag not active", () => { - mockGetValue.mockReturnValue({ active: false, text: "hi" }); - expect(filterFn(mockRow, mockId, " ")).toBeFalsy(); + const text = "hi"; + + mockGetValue.mockReturnValueOnce({ active: true, text }); + expect(filterFn(mockRow, mockId, text)).toBeTruthy(); + + mockGetValue.mockReturnValueOnce({ active: false, text }); + expect(filterFn(mockRow, mockId, text)).toBeFalsy(); }); }); });