Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ISpanAppendable in CharBlockArray and CharTermAttributeImpl #1028

Merged
merged 4 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 36 additions & 8 deletions src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
/// @lucene.experimental
/// </summary>
// LUCENENET NOTE: The serialization features here are strictly for testing purposes,
// therefore it doesn't make any difference what type of serialization is used.
// To make things simpler, we are using BinaryReader and BinaryWriter since
// therefore it doesn't make any difference what type of serialization is used.
// To make things simpler, we are using BinaryReader and BinaryWriter since
// BinaryFormatter is not implemented in .NET Standard 1.x.
internal class CharBlockArray : IAppendable, ICharSequence
internal class CharBlockArray : IAppendable, ICharSequence,
ISpanAppendable /* LUCENENET specific */
{
private const long serialVersionUID = 1L;

Expand All @@ -65,8 +66,6 @@ public object Clone()
return clone;
}



// LUCENENET specific
public void Serialize(Stream writer)
{
Expand Down Expand Up @@ -235,7 +234,6 @@ public virtual CharBlockArray Append(char[]? value, int startIndex, int length)
if (startIndex > value.Length - length)
throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(length)} <= {nameof(Length)}.");


int offset = startIndex;
int remain = length;
while (remain > 0)
Expand Down Expand Up @@ -310,7 +308,6 @@ public virtual CharBlockArray Append(string? value, int startIndex, int length)
if (startIndex > value.Length - length)
throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(length)} <= {nameof(Length)}.");


int offset = startIndex;
int remain = length;
while (remain > 0)
Expand Down Expand Up @@ -409,6 +406,32 @@ public virtual CharBlockArray Append(StringBuilder? value, int startIndex, int l
return this;
}

public virtual CharBlockArray Append(ReadOnlySpan<char> value)
{
int offset = 0;
int remain = value.Length;
while (remain > 0)
{
if (this.current.length == this.blockSize)
{
AddBlock();
}
int toCopy = remain;
int remainingInBlock = this.blockSize - this.current.length;
if (remainingInBlock < toCopy)
{
toCopy = remainingInBlock;
}
value.Slice(offset, toCopy).CopyTo(this.current.chars.AsSpan(this.current.length));
offset += toCopy;
remain -= toCopy;
this.current.length += toCopy;
}

this.length += value.Length;
return this;
}

#nullable restore

#region IAppendable Members
Expand All @@ -431,6 +454,11 @@ public virtual CharBlockArray Append(StringBuilder? value, int startIndex, int l

IAppendable IAppendable.Append(ICharSequence value, int startIndex, int count) => Append(value, startIndex, count);

#endregion

#region ISpanAppendable Members

ISpanAppendable ISpanAppendable.Append(ReadOnlySpan<char> value) => Append(value);

#endregion

Expand Down Expand Up @@ -612,4 +640,4 @@ internal bool Equals(int startIndex, int length, ReadOnlySpan<char> other)
return true;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ public virtual void TestArray()
// CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
// .onUnmappableCharacter(CodingErrorAction.REPLACE)
// .onMalformedInput(CodingErrorAction.REPLACE);
//
// Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage,
// new EncoderReplacementFallback("?"),
//
// Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage,
// new EncoderReplacementFallback("?"),
// new DecoderReplacementFallback("?"));

for (int i = 0; i < n; i++)
Expand Down Expand Up @@ -287,6 +287,30 @@ public virtual void TestAppendableInterfaceWithLongSequences()
Assert.AreEqual("4567890123456" + longTestString, t.ToString());
}

[Test]
[LuceneNetSpecific]
public virtual void TestSpanAppendableInterface()
{
CharBlockArray t = new CharBlockArray();

// Test with a span
t.Append("12345678".AsSpan());
Assert.AreEqual("12345678", t.ToString());

// test with a span slice
t.Append("0123456789".AsSpan(3, 5 - 3));
Assert.AreEqual("1234567834", t.ToString());

// test with a long span
t = new CharBlockArray();
t.Append("01234567890123456789012345678901234567890123456789".AsSpan());
Assert.AreEqual("01234567890123456789012345678901234567890123456789", t.ToString());

// test with a long span slice
t.Append("01234567890123456789012345678901234567890123456789".AsSpan(3, 50 - 3));
Assert.AreEqual("0123456789012345678901234567890123456789012345678934567890123456789012345678901234567890123456789", t.ToString());
}

private sealed class CharSequenceAnonymousClass : ICharSequence
{
private string longTestString;
Expand Down Expand Up @@ -319,4 +343,4 @@ public override string ToString()
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using J2N.IO;
using J2N.Text;
using Lucene.Net.Attributes;
using NUnit.Framework;
using System;
using System.Collections.Generic;
Expand Down Expand Up @@ -328,6 +329,29 @@ public virtual void TestAppendableInterfaceWithLongSequences()
Assert.AreEqual("4567890123456" + longTestString, t.ToString());
}

[Test]
[LuceneNetSpecific]
public virtual void TestSpanAppendableInterface()
{
CharTermAttribute t = new CharTermAttribute();

// Test with a span
t.Append("12345678".AsSpan());
Assert.AreEqual("12345678", t.ToString());

// test with a span slice
t.Append("0123456789".AsSpan(3, 5 - 3));
Assert.AreEqual("1234567834", t.ToString());

// test with a long span
t.SetEmpty().Append("01234567890123456789012345678901234567890123456789".AsSpan());
Assert.AreEqual("01234567890123456789012345678901234567890123456789", t.ToString());
paulirwin marked this conversation as resolved.
Show resolved Hide resolved

// test with a long span slice
t.Append("01234567890123456789012345678901234567890123456789".AsSpan(3, 50 - 3));
Assert.AreEqual("0123456789012345678901234567890123456789012345678934567890123456789012345678901234567890123456789", t.ToString());
}

private sealed class CharSequenceAnonymousClass : ICharSequence
{
private readonly string longTestString;
Expand Down
18 changes: 9 additions & 9 deletions src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// you can then directly alter. If the array is too
/// small for your token, use <see cref="ResizeBuffer(int)"/>
/// to increase it. After
/// altering the buffer be sure to call <see cref="SetLength(int)"/>
/// altering the buffer be sure to call <see cref="SetLength(int)"/>
/// to record the number of valid
/// characters that were placed into the termBuffer.
/// <para>
Expand Down Expand Up @@ -76,15 +76,15 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// the termBuffer array. Use this to truncate the termBuffer
/// or to synchronize with external manipulation of the termBuffer.
/// Note: to grow the size of the array,
/// use <see cref="ResizeBuffer(int)"/> first.
/// NOTE: This is exactly the same operation as calling the <see cref="Length"/> setter, the primary
/// use <see cref="ResizeBuffer(int)"/> first.
/// NOTE: This is exactly the same operation as calling the <see cref="Length"/> setter, the primary
/// difference is that this method returns a reference to the current object so it can be chained.
/// <code>
/// obj.SetLength(30).Append("hey you");
/// </code>
/// </summary>
/// <param name="length"> the truncated length </param>
ICharTermAttribute SetLength(int length);
ICharTermAttribute SetLength(int length);
paulirwin marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// Sets the length of the termBuffer to zero.
Expand Down Expand Up @@ -197,8 +197,8 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// </summary>
/// <param name="value">The sequence of characters to append.</param>
/// <remarks>
/// LUCENENET specific method, added because the .NET <see cref="string"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// LUCENENET specific method, added because the .NET <see cref="string"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// </remarks>
new ICharTermAttribute Append(string value);

Expand Down Expand Up @@ -228,8 +228,8 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// <paramref name="startIndex"/> + <paramref name="count"/> is greater than the length of <paramref name="value"/>.
/// </exception>
/// <remarks>
/// LUCENENET specific method, added because the .NET <see cref="string"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// LUCENENET specific method, added because the .NET <see cref="string"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// </remarks>
new ICharTermAttribute Append(string value, int startIndex, int count); // LUCENENET TODO: API - change to startIndex/length to match .NET

Expand Down Expand Up @@ -270,7 +270,7 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// <paramref name="startIndex"/> + <paramref name="count"/> is greater than the length of <paramref name="value"/>.
/// </exception>
/// <remarks>
/// LUCENENET specific method, added because the .NET <see cref="StringBuilder"/> data type
/// LUCENENET specific method, added because the .NET <see cref="StringBuilder"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// </remarks>
new ICharTermAttribute Append(StringBuilder value, int startIndex, int count);
Expand Down
24 changes: 20 additions & 4 deletions src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ namespace Lucene.Net.Analysis.TokenAttributes

/// <summary>
/// Default implementation of <see cref="ICharTermAttribute"/>. </summary>
public class CharTermAttribute : Attribute, ICharTermAttribute, ITermToBytesRefAttribute, IAppendable // LUCENENET specific: Not implementing ICloneable per Microsoft's recommendation
public class CharTermAttribute : Attribute, ICharTermAttribute, ITermToBytesRefAttribute, IAppendable, // LUCENENET specific: Not implementing ICloneable per Microsoft's recommendation
ISpanAppendable /* LUCENENET specific */
{
private const int MIN_BUFFER_SIZE = 10;

Expand Down Expand Up @@ -85,7 +86,7 @@ public char[] ResizeBuffer(int newSize)
{
// Not big enough; create a new array with slight
// over allocation and preserve content

// LUCENENET: Resize rather than copy
Array.Resize(ref termBuffer, ArrayUtil.Oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR));
}
Expand Down Expand Up @@ -196,7 +197,6 @@ public ICharSequence Subsequence(int startIndex, int length)

// *** Appendable interface ***


public CharTermAttribute Append(string value, int startIndex, int charCount)
{
// LUCENENET: Changed semantics to be the same as the StringBuilder in .NET
Expand Down Expand Up @@ -358,6 +358,17 @@ public CharTermAttribute Append(ICharSequence value, int startIndex, int charCou
return this;
}

public CharTermAttribute Append(ReadOnlySpan<char> value)
{
if (value.Length == 0)
return this;

value.CopyTo(InternalResizeBuffer(termLength + value.Length).AsSpan(termLength));
Length += value.Length;

return this;
}

private char[] InternalResizeBuffer(int length)
{
if (termBuffer.Length < length)
Expand Down Expand Up @@ -524,7 +535,12 @@ public override void CopyTo(IAttribute target) // LUCENENET specific - intention

IAppendable IAppendable.Append(ICharSequence value, int startIndex, int count) => Append(value, startIndex, count);

#endregion

#region ISpanAppendable Members

ISpanAppendable ISpanAppendable.Append(ReadOnlySpan<char> value) => Append(value);

#endregion
}
}
}
Loading