From 75f9974d44db367e9a3e2579fd24825cc8d1d942 Mon Sep 17 00:00:00 2001 From: tohidemyname Date: Wed, 5 Jun 2024 08:20:29 +0800 Subject: [PATCH 1/2] FuzzyQuery produces a wrong result when prefix is equal to the term length https://github.com/apache/lucenenet/issues/941 --- src/Lucene.Net.Tests/Search/TestFuzzyQuery.cs | 44 ++++++++++++++++++- src/Lucene.Net/Search/FuzzyQuery.cs | 4 +- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/src/Lucene.Net.Tests/Search/TestFuzzyQuery.cs b/src/Lucene.Net.Tests/Search/TestFuzzyQuery.cs index f63aa4b3b6..580554a9da 100644 --- a/src/Lucene.Net.Tests/Search/TestFuzzyQuery.cs +++ b/src/Lucene.Net.Tests/Search/TestFuzzyQuery.cs @@ -195,8 +195,48 @@ public virtual void TestFuzziness() reader.Dispose(); directory.Dispose(); } - [Test] + public void TestPrefixLengthEqualStringLength() + { + Directory directory = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random, directory); + AddDoc("b*a", writer); + AddDoc("b*ab", writer); + AddDoc("b*abc", writer); + AddDoc("b*abcd", writer); + String multibyte = "아프리카코끼리속"; + AddDoc(multibyte, writer); + IndexReader reader = writer.GetReader(); + IndexSearcher searcher = NewSearcher(reader); + writer.Dispose(); + + int maxEdits = 0; + int prefixLength = 3; + FuzzyQuery query = new FuzzyQuery(new Term("field", "b*a"), maxEdits, prefixLength); + ScoreDoc[] hits = searcher.Search(query, 1000).ScoreDocs; + assertEquals(1, hits.Length); + + maxEdits = 1; + query = new FuzzyQuery(new Term("field", "b*a"), maxEdits, prefixLength); + hits = searcher.Search(query, 1000).ScoreDocs; + assertEquals(2, hits.Length); + + maxEdits = 2; + query = new FuzzyQuery(new Term("field", "b*a"), maxEdits, prefixLength); + hits = searcher.Search(query, 1000).ScoreDocs; + assertEquals(3, hits.Length); + + maxEdits = 1; + prefixLength = multibyte.Length - 1; + query = new FuzzyQuery(new Term("field", multibyte.Substring(0, prefixLength)), maxEdits, prefixLength); + hits = searcher.Search(query, 1000).ScoreDocs; + assertEquals(1, hits.Length); + + reader.DoClose(); + directory.Dispose(); + } + + [Test] public virtual void Test2() { Directory directory = NewDirectory(); @@ -384,4 +424,4 @@ private void AddDoc(string text, RandomIndexWriter writer) writer.AddDocument(doc); } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net/Search/FuzzyQuery.cs b/src/Lucene.Net/Search/FuzzyQuery.cs index 6fafa67f68..60df568aa7 100644 --- a/src/Lucene.Net/Search/FuzzyQuery.cs +++ b/src/Lucene.Net/Search/FuzzyQuery.cs @@ -148,7 +148,7 @@ public FuzzyQuery(Term term) protected override TermsEnum GetTermsEnum(Terms terms, AttributeSource atts) { - if (maxEdits == 0 || prefixLength >= term.Text.Length) // can only match if it's exact + if (maxEdits == 0 ) // can only match if it's exact { return new SingleTermsEnum(terms.GetEnumerator(), term.Bytes); } @@ -262,4 +262,4 @@ public static int SingleToEdits(float minimumSimilarity, int termLen) } } } -} \ No newline at end of file +} From 8ca132270106fb108ee38732ca46063edc2c22ee Mon Sep 17 00:00:00 2001 From: Paul Irwin Date: Mon, 28 Oct 2024 20:58:59 -0600 Subject: [PATCH 2/2] Add LUCENENET-specific backport comment, fix test code style --- src/Lucene.Net.Tests/Search/TestFuzzyQuery.cs | 22 ++++++++++--------- src/Lucene.Net/Search/FuzzyQuery.cs | 3 ++- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/Lucene.Net.Tests/Search/TestFuzzyQuery.cs b/src/Lucene.Net.Tests/Search/TestFuzzyQuery.cs index 580554a9da..6f8a8c81aa 100644 --- a/src/Lucene.Net.Tests/Search/TestFuzzyQuery.cs +++ b/src/Lucene.Net.Tests/Search/TestFuzzyQuery.cs @@ -195,6 +195,8 @@ public virtual void TestFuzziness() reader.Dispose(); directory.Dispose(); } + + // LUCENENET-specific: backported fix from Lucene 9.0.0 (lucene@45611d0, LUCENE-9365) [Test] public void TestPrefixLengthEqualStringLength() { @@ -204,7 +206,7 @@ public void TestPrefixLengthEqualStringLength() AddDoc("b*ab", writer); AddDoc("b*abc", writer); AddDoc("b*abcd", writer); - String multibyte = "아프리카코끼리속"; + const string multibyte = "아프리카코끼리속"; // LUCENENET-specific: made const AddDoc(multibyte, writer); IndexReader reader = writer.GetReader(); IndexSearcher searcher = NewSearcher(reader); @@ -226,17 +228,17 @@ public void TestPrefixLengthEqualStringLength() hits = searcher.Search(query, 1000).ScoreDocs; assertEquals(3, hits.Length); - maxEdits = 1; - prefixLength = multibyte.Length - 1; - query = new FuzzyQuery(new Term("field", multibyte.Substring(0, prefixLength)), maxEdits, prefixLength); - hits = searcher.Search(query, 1000).ScoreDocs; - assertEquals(1, hits.Length); + maxEdits = 1; + prefixLength = multibyte.Length - 1; + query = new FuzzyQuery(new Term("field", multibyte.Substring(0, prefixLength)), maxEdits, prefixLength); + hits = searcher.Search(query, 1000).ScoreDocs; + assertEquals(1, hits.Length); - reader.DoClose(); - directory.Dispose(); - } + reader.Dispose(); + directory.Dispose(); + } - [Test] + [Test] public virtual void Test2() { Directory directory = NewDirectory(); diff --git a/src/Lucene.Net/Search/FuzzyQuery.cs b/src/Lucene.Net/Search/FuzzyQuery.cs index 60df568aa7..a9f8cdf574 100644 --- a/src/Lucene.Net/Search/FuzzyQuery.cs +++ b/src/Lucene.Net/Search/FuzzyQuery.cs @@ -148,7 +148,8 @@ public FuzzyQuery(Term term) protected override TermsEnum GetTermsEnum(Terms terms, AttributeSource atts) { - if (maxEdits == 0 ) // can only match if it's exact + // LUCENENET-specific: backported fix from Lucene 9.0.0 (lucene@45611d0, LUCENE-9365) + if (maxEdits == 0) // can only match if it's exact { return new SingleTermsEnum(terms.GetEnumerator(), term.Bytes); }