diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs index f64c7eab3d..7a253a0bd7 100644 --- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs +++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs @@ -35,10 +35,11 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache /// @lucene.experimental /// // LUCENENET NOTE: The serialization features here are strictly for testing purposes, - // therefore it doesn't make any difference what type of serialization is used. - // To make things simpler, we are using BinaryReader and BinaryWriter since + // therefore it doesn't make any difference what type of serialization is used. + // To make things simpler, we are using BinaryReader and BinaryWriter since // BinaryFormatter is not implemented in .NET Standard 1.x. - internal class CharBlockArray : IAppendable, ICharSequence + internal class CharBlockArray : IAppendable, ICharSequence, + ISpanAppendable /* LUCENENET specific */ { private const long serialVersionUID = 1L; @@ -65,8 +66,6 @@ public object Clone() return clone; } - - // LUCENENET specific public void Serialize(Stream writer) { @@ -235,7 +234,6 @@ public virtual CharBlockArray Append(char[]? value, int startIndex, int length) if (startIndex > value.Length - length) throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(length)} <= {nameof(Length)}."); - int offset = startIndex; int remain = length; while (remain > 0) @@ -310,7 +308,6 @@ public virtual CharBlockArray Append(string? value, int startIndex, int length) if (startIndex > value.Length - length) throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(length)} <= {nameof(Length)}."); - int offset = startIndex; int remain = length; while (remain > 0) @@ -409,6 +406,32 @@ public virtual CharBlockArray Append(StringBuilder? value, int startIndex, int l return this; } + public virtual CharBlockArray Append(ReadOnlySpan value) + { + int offset = 0; + int remain = value.Length; + while (remain > 0) + { + if (this.current.length == this.blockSize) + { + AddBlock(); + } + int toCopy = remain; + int remainingInBlock = this.blockSize - this.current.length; + if (remainingInBlock < toCopy) + { + toCopy = remainingInBlock; + } + value.Slice(offset, toCopy).CopyTo(this.current.chars.AsSpan(this.current.length)); + offset += toCopy; + remain -= toCopy; + this.current.length += toCopy; + } + + this.length += value.Length; + return this; + } + #nullable restore #region IAppendable Members @@ -431,6 +454,11 @@ public virtual CharBlockArray Append(StringBuilder? value, int startIndex, int l IAppendable IAppendable.Append(ICharSequence value, int startIndex, int count) => Append(value, startIndex, count); + #endregion + + #region ISpanAppendable Members + + ISpanAppendable ISpanAppendable.Append(ReadOnlySpan value) => Append(value); #endregion @@ -612,4 +640,4 @@ internal bool Equals(int startIndex, int length, ReadOnlySpan other) return true; } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs b/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs index 8b2c667714..295b01afce 100644 --- a/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs +++ b/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs @@ -44,9 +44,9 @@ public virtual void TestArray() // CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() // .onUnmappableCharacter(CodingErrorAction.REPLACE) // .onMalformedInput(CodingErrorAction.REPLACE); - // - // Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage, - // new EncoderReplacementFallback("?"), + // + // Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage, + // new EncoderReplacementFallback("?"), // new DecoderReplacementFallback("?")); for (int i = 0; i < n; i++) @@ -287,6 +287,30 @@ public virtual void TestAppendableInterfaceWithLongSequences() Assert.AreEqual("4567890123456" + longTestString, t.ToString()); } + [Test] + [LuceneNetSpecific] + public virtual void TestSpanAppendableInterface() + { + CharBlockArray t = new CharBlockArray(); + + // Test with a span + t.Append("12345678".AsSpan()); + Assert.AreEqual("12345678", t.ToString()); + + // test with a span slice + t.Append("0123456789".AsSpan(3, 5 - 3)); + Assert.AreEqual("1234567834", t.ToString()); + + // test with a long span + t = new CharBlockArray(); + t.Append("01234567890123456789012345678901234567890123456789".AsSpan()); + Assert.AreEqual("01234567890123456789012345678901234567890123456789", t.ToString()); + + // test with a long span slice + t.Append("01234567890123456789012345678901234567890123456789".AsSpan(3, 50 - 3)); + Assert.AreEqual("0123456789012345678901234567890123456789012345678934567890123456789012345678901234567890123456789", t.ToString()); + } + private sealed class CharSequenceAnonymousClass : ICharSequence { private string longTestString; @@ -319,4 +343,4 @@ public override string ToString() } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs b/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs index c7fe4d28d1..2df8210f4b 100644 --- a/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs +++ b/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs @@ -1,5 +1,6 @@ using J2N.IO; using J2N.Text; +using Lucene.Net.Attributes; using NUnit.Framework; using System; using System.Collections.Generic; @@ -328,6 +329,29 @@ public virtual void TestAppendableInterfaceWithLongSequences() Assert.AreEqual("4567890123456" + longTestString, t.ToString()); } + [Test] + [LuceneNetSpecific] + public virtual void TestSpanAppendableInterface() + { + CharTermAttribute t = new CharTermAttribute(); + + // Test with a span + t.Append("12345678".AsSpan()); + Assert.AreEqual("12345678", t.ToString()); + + // test with a span slice + t.Append("0123456789".AsSpan(3, 5 - 3)); + Assert.AreEqual("1234567834", t.ToString()); + + // test with a long span + t.SetEmpty().Append("01234567890123456789012345678901234567890123456789".AsSpan()); + Assert.AreEqual("01234567890123456789012345678901234567890123456789", t.ToString()); + + // test with a long span slice + t.Append("01234567890123456789012345678901234567890123456789".AsSpan(3, 50 - 3)); + Assert.AreEqual("0123456789012345678901234567890123456789012345678934567890123456789012345678901234567890123456789", t.ToString()); + } + private sealed class CharSequenceAnonymousClass : ICharSequence { private readonly string longTestString; diff --git a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs index e15b1517d8..fa3cede774 100644 --- a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs +++ b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs @@ -43,7 +43,7 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable /// you can then directly alter. If the array is too /// small for your token, use /// to increase it. After - /// altering the buffer be sure to call + /// altering the buffer be sure to call /// to record the number of valid /// characters that were placed into the termBuffer. /// @@ -76,15 +76,15 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable /// the termBuffer array. Use this to truncate the termBuffer /// or to synchronize with external manipulation of the termBuffer. /// Note: to grow the size of the array, - /// use first. - /// NOTE: This is exactly the same operation as calling the setter, the primary + /// use first. + /// NOTE: This is exactly the same operation as calling the setter, the primary /// difference is that this method returns a reference to the current object so it can be chained. /// /// obj.SetLength(30).Append("hey you"); /// /// /// the truncated length - ICharTermAttribute SetLength(int length); + ICharTermAttribute SetLength(int length); /// /// Sets the length of the termBuffer to zero. @@ -197,8 +197,8 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable /// /// The sequence of characters to append. /// - /// LUCENENET specific method, added because the .NET data type - /// doesn't implement . + /// LUCENENET specific method, added because the .NET data type + /// doesn't implement . /// new ICharTermAttribute Append(string value); @@ -228,8 +228,8 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable /// + is greater than the length of . /// /// - /// LUCENENET specific method, added because the .NET data type - /// doesn't implement . + /// LUCENENET specific method, added because the .NET data type + /// doesn't implement . /// new ICharTermAttribute Append(string value, int startIndex, int count); // LUCENENET TODO: API - change to startIndex/length to match .NET @@ -270,7 +270,7 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable /// + is greater than the length of . /// /// - /// LUCENENET specific method, added because the .NET data type + /// LUCENENET specific method, added because the .NET data type /// doesn't implement . /// new ICharTermAttribute Append(StringBuilder value, int startIndex, int count); diff --git a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs index bd04b96d0a..8ab9aca194 100644 --- a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs +++ b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs @@ -34,7 +34,8 @@ namespace Lucene.Net.Analysis.TokenAttributes /// /// Default implementation of . - public class CharTermAttribute : Attribute, ICharTermAttribute, ITermToBytesRefAttribute, IAppendable // LUCENENET specific: Not implementing ICloneable per Microsoft's recommendation + public class CharTermAttribute : Attribute, ICharTermAttribute, ITermToBytesRefAttribute, IAppendable, // LUCENENET specific: Not implementing ICloneable per Microsoft's recommendation + ISpanAppendable /* LUCENENET specific */ { private const int MIN_BUFFER_SIZE = 10; @@ -85,7 +86,7 @@ public char[] ResizeBuffer(int newSize) { // Not big enough; create a new array with slight // over allocation and preserve content - + // LUCENENET: Resize rather than copy Array.Resize(ref termBuffer, ArrayUtil.Oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)); } @@ -196,7 +197,6 @@ public ICharSequence Subsequence(int startIndex, int length) // *** Appendable interface *** - public CharTermAttribute Append(string value, int startIndex, int charCount) { // LUCENENET: Changed semantics to be the same as the StringBuilder in .NET @@ -358,6 +358,17 @@ public CharTermAttribute Append(ICharSequence value, int startIndex, int charCou return this; } + public CharTermAttribute Append(ReadOnlySpan value) + { + if (value.Length == 0) + return this; + + value.CopyTo(InternalResizeBuffer(termLength + value.Length).AsSpan(termLength)); + Length += value.Length; + + return this; + } + private char[] InternalResizeBuffer(int length) { if (termBuffer.Length < length) @@ -524,7 +535,12 @@ public override void CopyTo(IAttribute target) // LUCENENET specific - intention IAppendable IAppendable.Append(ICharSequence value, int startIndex, int count) => Append(value, startIndex, count); + #endregion + + #region ISpanAppendable Members + + ISpanAppendable ISpanAppendable.Append(ReadOnlySpan value) => Append(value); #endregion } -} \ No newline at end of file +}