Skip to content

Commit

Permalink
Implement ISpanAppendable in CharBlockArray and CharTermAttributeImpl (
Browse files Browse the repository at this point in the history
…#1028)

* Implement ISpanAppendable in CharBlockArray and CharTermAttributeImpl

* Add tests for ReadOnlySpan overloads of CharBlockArray.Append and CharTermAttributeImpl.Append
  • Loading branch information
paulirwin authored Nov 19, 2024
1 parent 2d4d332 commit 0e54d66
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 25 deletions.
44 changes: 36 additions & 8 deletions src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
/// @lucene.experimental
/// </summary>
// LUCENENET NOTE: The serialization features here are strictly for testing purposes,
// therefore it doesn't make any difference what type of serialization is used.
// To make things simpler, we are using BinaryReader and BinaryWriter since
// therefore it doesn't make any difference what type of serialization is used.
// To make things simpler, we are using BinaryReader and BinaryWriter since
// BinaryFormatter is not implemented in .NET Standard 1.x.
internal class CharBlockArray : IAppendable, ICharSequence
internal class CharBlockArray : IAppendable, ICharSequence,
ISpanAppendable /* LUCENENET specific */
{
private const long serialVersionUID = 1L;

Expand All @@ -65,8 +66,6 @@ public object Clone()
return clone;
}



// LUCENENET specific
public void Serialize(Stream writer)
{
Expand Down Expand Up @@ -235,7 +234,6 @@ public virtual CharBlockArray Append(char[]? value, int startIndex, int length)
if (startIndex > value.Length - length)
throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(length)} <= {nameof(Length)}.");


int offset = startIndex;
int remain = length;
while (remain > 0)
Expand Down Expand Up @@ -310,7 +308,6 @@ public virtual CharBlockArray Append(string? value, int startIndex, int length)
if (startIndex > value.Length - length)
throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(length)} <= {nameof(Length)}.");


int offset = startIndex;
int remain = length;
while (remain > 0)
Expand Down Expand Up @@ -409,6 +406,32 @@ public virtual CharBlockArray Append(StringBuilder? value, int startIndex, int l
return this;
}

public virtual CharBlockArray Append(ReadOnlySpan<char> value)
{
int offset = 0;
int remain = value.Length;
while (remain > 0)
{
if (this.current.length == this.blockSize)
{
AddBlock();
}
int toCopy = remain;
int remainingInBlock = this.blockSize - this.current.length;
if (remainingInBlock < toCopy)
{
toCopy = remainingInBlock;
}
value.Slice(offset, toCopy).CopyTo(this.current.chars.AsSpan(this.current.length));
offset += toCopy;
remain -= toCopy;
this.current.length += toCopy;
}

this.length += value.Length;
return this;
}

#nullable restore

#region IAppendable Members
Expand All @@ -431,6 +454,11 @@ public virtual CharBlockArray Append(StringBuilder? value, int startIndex, int l

IAppendable IAppendable.Append(ICharSequence value, int startIndex, int count) => Append(value, startIndex, count);

#endregion

#region ISpanAppendable Members

ISpanAppendable ISpanAppendable.Append(ReadOnlySpan<char> value) => Append(value);

#endregion

Expand Down Expand Up @@ -612,4 +640,4 @@ internal bool Equals(int startIndex, int length, ReadOnlySpan<char> other)
return true;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ public virtual void TestArray()
// CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
// .onUnmappableCharacter(CodingErrorAction.REPLACE)
// .onMalformedInput(CodingErrorAction.REPLACE);
//
// Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage,
// new EncoderReplacementFallback("?"),
//
// Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage,
// new EncoderReplacementFallback("?"),
// new DecoderReplacementFallback("?"));

for (int i = 0; i < n; i++)
Expand Down Expand Up @@ -287,6 +287,30 @@ public virtual void TestAppendableInterfaceWithLongSequences()
Assert.AreEqual("4567890123456" + longTestString, t.ToString());
}

[Test]
[LuceneNetSpecific]
public virtual void TestSpanAppendableInterface()
{
CharBlockArray t = new CharBlockArray();

// Test with a span
t.Append("12345678".AsSpan());
Assert.AreEqual("12345678", t.ToString());

// test with a span slice
t.Append("0123456789".AsSpan(3, 5 - 3));
Assert.AreEqual("1234567834", t.ToString());

// test with a long span
t = new CharBlockArray();
t.Append("01234567890123456789012345678901234567890123456789".AsSpan());
Assert.AreEqual("01234567890123456789012345678901234567890123456789", t.ToString());

// test with a long span slice
t.Append("01234567890123456789012345678901234567890123456789".AsSpan(3, 50 - 3));
Assert.AreEqual("0123456789012345678901234567890123456789012345678934567890123456789012345678901234567890123456789", t.ToString());
}

private sealed class CharSequenceAnonymousClass : ICharSequence
{
private string longTestString;
Expand Down Expand Up @@ -319,4 +343,4 @@ public override string ToString()
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using J2N.IO;
using J2N.Text;
using Lucene.Net.Attributes;
using NUnit.Framework;
using System;
using System.Collections.Generic;
Expand Down Expand Up @@ -328,6 +329,29 @@ public virtual void TestAppendableInterfaceWithLongSequences()
Assert.AreEqual("4567890123456" + longTestString, t.ToString());
}

[Test]
[LuceneNetSpecific]
public virtual void TestSpanAppendableInterface()
{
CharTermAttribute t = new CharTermAttribute();

// Test with a span
t.Append("12345678".AsSpan());
Assert.AreEqual("12345678", t.ToString());

// test with a span slice
t.Append("0123456789".AsSpan(3, 5 - 3));
Assert.AreEqual("1234567834", t.ToString());

// test with a long span
t.SetEmpty().Append("01234567890123456789012345678901234567890123456789".AsSpan());
Assert.AreEqual("01234567890123456789012345678901234567890123456789", t.ToString());

// test with a long span slice
t.Append("01234567890123456789012345678901234567890123456789".AsSpan(3, 50 - 3));
Assert.AreEqual("0123456789012345678901234567890123456789012345678934567890123456789012345678901234567890123456789", t.ToString());
}

private sealed class CharSequenceAnonymousClass : ICharSequence
{
private readonly string longTestString;
Expand Down
18 changes: 9 additions & 9 deletions src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// you can then directly alter. If the array is too
/// small for your token, use <see cref="ResizeBuffer(int)"/>
/// to increase it. After
/// altering the buffer be sure to call <see cref="SetLength(int)"/>
/// altering the buffer be sure to call <see cref="SetLength(int)"/>
/// to record the number of valid
/// characters that were placed into the termBuffer.
/// <para>
Expand Down Expand Up @@ -76,15 +76,15 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// the termBuffer array. Use this to truncate the termBuffer
/// or to synchronize with external manipulation of the termBuffer.
/// Note: to grow the size of the array,
/// use <see cref="ResizeBuffer(int)"/> first.
/// NOTE: This is exactly the same operation as calling the <see cref="Length"/> setter, the primary
/// use <see cref="ResizeBuffer(int)"/> first.
/// NOTE: This is exactly the same operation as calling the <see cref="Length"/> setter, the primary
/// difference is that this method returns a reference to the current object so it can be chained.
/// <code>
/// obj.SetLength(30).Append("hey you");
/// </code>
/// </summary>
/// <param name="length"> the truncated length </param>
ICharTermAttribute SetLength(int length);
ICharTermAttribute SetLength(int length);

/// <summary>
/// Sets the length of the termBuffer to zero.
Expand Down Expand Up @@ -197,8 +197,8 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// </summary>
/// <param name="value">The sequence of characters to append.</param>
/// <remarks>
/// LUCENENET specific method, added because the .NET <see cref="string"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// LUCENENET specific method, added because the .NET <see cref="string"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// </remarks>
new ICharTermAttribute Append(string value);

Expand Down Expand Up @@ -228,8 +228,8 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// <paramref name="startIndex"/> + <paramref name="count"/> is greater than the length of <paramref name="value"/>.
/// </exception>
/// <remarks>
/// LUCENENET specific method, added because the .NET <see cref="string"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// LUCENENET specific method, added because the .NET <see cref="string"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// </remarks>
new ICharTermAttribute Append(string value, int startIndex, int count); // LUCENENET TODO: API - change to startIndex/length to match .NET

Expand Down Expand Up @@ -270,7 +270,7 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// <paramref name="startIndex"/> + <paramref name="count"/> is greater than the length of <paramref name="value"/>.
/// </exception>
/// <remarks>
/// LUCENENET specific method, added because the .NET <see cref="StringBuilder"/> data type
/// LUCENENET specific method, added because the .NET <see cref="StringBuilder"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// </remarks>
new ICharTermAttribute Append(StringBuilder value, int startIndex, int count);
Expand Down
24 changes: 20 additions & 4 deletions src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ namespace Lucene.Net.Analysis.TokenAttributes

/// <summary>
/// Default implementation of <see cref="ICharTermAttribute"/>. </summary>
public class CharTermAttribute : Attribute, ICharTermAttribute, ITermToBytesRefAttribute, IAppendable // LUCENENET specific: Not implementing ICloneable per Microsoft's recommendation
public class CharTermAttribute : Attribute, ICharTermAttribute, ITermToBytesRefAttribute, IAppendable, // LUCENENET specific: Not implementing ICloneable per Microsoft's recommendation
ISpanAppendable /* LUCENENET specific */
{
private const int MIN_BUFFER_SIZE = 10;

Expand Down Expand Up @@ -85,7 +86,7 @@ public char[] ResizeBuffer(int newSize)
{
// Not big enough; create a new array with slight
// over allocation and preserve content

// LUCENENET: Resize rather than copy
Array.Resize(ref termBuffer, ArrayUtil.Oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR));
}
Expand Down Expand Up @@ -196,7 +197,6 @@ public ICharSequence Subsequence(int startIndex, int length)

// *** Appendable interface ***


public CharTermAttribute Append(string value, int startIndex, int charCount)
{
// LUCENENET: Changed semantics to be the same as the StringBuilder in .NET
Expand Down Expand Up @@ -358,6 +358,17 @@ public CharTermAttribute Append(ICharSequence value, int startIndex, int charCou
return this;
}

public CharTermAttribute Append(ReadOnlySpan<char> value)
{
if (value.Length == 0)
return this;

value.CopyTo(InternalResizeBuffer(termLength + value.Length).AsSpan(termLength));
Length += value.Length;

return this;
}

private char[] InternalResizeBuffer(int length)
{
if (termBuffer.Length < length)
Expand Down Expand Up @@ -524,7 +535,12 @@ public override void CopyTo(IAttribute target) // LUCENENET specific - intention

IAppendable IAppendable.Append(ICharSequence value, int startIndex, int count) => Append(value, startIndex, count);

#endregion

#region ISpanAppendable Members

ISpanAppendable ISpanAppendable.Append(ReadOnlySpan<char> value) => Append(value);

#endregion
}
}
}

0 comments on commit 0e54d66

Please sign in to comment.