Skip to content

Commit

Permalink
composite buffer refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
Jack Dermody committed Aug 12, 2024
1 parent 385fa71 commit 28b84b5
Show file tree
Hide file tree
Showing 28 changed files with 492 additions and 382 deletions.
78 changes: 53 additions & 25 deletions BrightData.UnitTests/BufferTests.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using BrightData.Types;
using BrightData.UnitTests.Helper;
Expand Down Expand Up @@ -47,7 +48,7 @@ public async Task<uint> ReadAsync(Memory<byte> data1, uint offset)
return (uint)ret;
}
}
public class InMemoryStreamProvider : IProvideDataBlocks
public class InMemoryStreamProvider : IProvideByteBlocks
{
readonly Dictionary<Guid, TempData> _data = [];

Expand Down Expand Up @@ -89,20 +90,47 @@ public override string ToString()
}
}

struct TestStruct
[Fact]
public void TestIncrementingBlockSizes()
{
public int Property { get; set; }
var uintBuffer = _streamProvider.CreateCompositeBuffer<uint>(blockSize: 2, maxBlockSize: 32, maxInMemoryBlocks:128);
for (var i = 0U; i < 1024; i++)
uintBuffer.Append(i);
var blockSizes = uintBuffer.BlockSizes;
blockSizes.Sum(x => x).Should().Be(1024);
blockSizes[0].Should().Be(2);
blockSizes[1].Should().Be(4);
blockSizes[2].Should().Be(8);
blockSizes[3].Should().Be(16);
blockSizes[4].Should().Be(32);
blockSizes[5].Should().Be(32);
}

[Fact]
public void TestDistinctCount()
{
var uintBuffer = _streamProvider.CreateCompositeBuffer<uint>(32, 32, 32, 32);
for (var i = 0U; i < 8; i++)
uintBuffer.Append(i);
uintBuffer.DistinctItems.Should().Be(8);

for (var i = 8U; i < 32; i++)
uintBuffer.Append(i);
uintBuffer.DistinctItems.Should().Be(32);

uintBuffer.Append(32U);
uintBuffer.DistinctItems.Should().BeNull();
}

[Fact]
public void VectorBuffer()
{
var data = new[] {
new TestClass(new byte[] { 1, 2, 3 }),
new TestClass(new byte[] { 4, 5, 6 }),
new TestClass(new byte[] { 7, 8, 9 })
new TestClass([1, 2, 3]),
new TestClass([4, 5, 6]),
new TestClass([7, 8, 9])
};
var vectorBuffer = _streamProvider.CreateCompositeBuffer<TestClass>(x => new(x), 2, 0);
var vectorBuffer = _streamProvider.CreateCompositeBuffer<TestClass>(x => new(x), blockSize: 2, maxBlockSize: 2, maxInMemoryBlocks:0);
vectorBuffer.Append(data);
var index = 0;
vectorBuffer.ForEachBlock(x => {
Expand All @@ -121,7 +149,7 @@ public async Task StringBuffer()
"this is a final test",
"this is a test",
};
var stringBuffer = _streamProvider.CreateCompositeBuffer(2, 0, 128);
var stringBuffer = _streamProvider.CreateCompositeBuffer(blockSize: 2, maxBlockSize: 2, maxInMemoryBlocks:0, 128);
stringBuffer.Append(data);
var index = 0;
for (uint i = 0, len = (uint)stringBuffer.BlockSizes.Length; i < len; i++) {
Expand All @@ -142,7 +170,7 @@ public async Task StringBuffer()
[Fact]
public async Task IntBuffer()
{
var intBuffer = _streamProvider.CreateCompositeBuffer<int>(2, 0);
var intBuffer = _streamProvider.CreateCompositeBuffer<int>(blockSize: 2, maxBlockSize: 2, maxInMemoryBlocks:0);
intBuffer.Append(1);
intBuffer.Append(new ReadOnlySpan<int>([2, 3]));
var index = 0;
Expand Down Expand Up @@ -175,10 +203,10 @@ await intBuffer.ForEachBlock(block => {
/// <summary>
/// Buffer size configurations to test
/// </summary>
public static readonly (int numItems, int bufferSize, int inMemoryReadSize, int numDistinct)[] Configurations = [
(32768, 1024, 256, 4),
(32768, 32768, 1024, 1024),
(32768, 128, 32768, 32768)
public static readonly (int numItems, int bufferSize, int maxBufferSize, int inMemoryReadSize, int numDistinct)[] Configurations = [
(32768, 1024, 32768, 256, 4),
(32768, 32768, 32768, 1024, 1024),
(32768, 128, 1024, 32768, 32768)
];

[Fact]
Expand Down Expand Up @@ -208,25 +236,25 @@ public Task WeightedIndexListBuffer2()
[Fact]
public async Task StringBuffer2()
{
foreach (var (numItems, bufferSize, inMemoryReadSize, numDistinct) in Configurations)
await StringBufferReadWriteTest((uint)numItems, bufferSize, (uint)inMemoryReadSize, (ushort)numDistinct, i => i.ToString());
foreach (var (numItems, bufferSize, maxBufferSize, inMemoryReadSize, numDistinct) in Configurations)
await StringBufferReadWriteTest((uint)numItems, bufferSize, maxBufferSize, (uint)inMemoryReadSize, (ushort)numDistinct, i => i.ToString());
}

async Task ObjectTests<T>(Func<uint, T> indexTranslator, CreateFromReadOnlyByteSpan<T> createItem) where T : IHaveDataAsReadOnlyByteSpan
{
foreach (var (numItems, bufferSize, inMemoryReadSize, numDistinct) in Configurations)
await ObjectBufferReadWriteTest((uint)numItems, bufferSize, (uint)inMemoryReadSize, (ushort)numDistinct, indexTranslator, createItem);
foreach (var (numItems, bufferSize, maxBufferSize, inMemoryReadSize, numDistinct) in Configurations)
await ObjectBufferReadWriteTest((uint)numItems, bufferSize, maxBufferSize, (uint)inMemoryReadSize, (ushort)numDistinct, indexTranslator, createItem);
}

async Task StructTests<T>(Func<uint, T> indexTranslator) where T : unmanaged
{
foreach (var (numItems, bufferSize, inMemoryReadSize, numDistinct) in Configurations)
await StructBufferReadWriteTest((uint)numItems, bufferSize, (uint)inMemoryReadSize, (ushort)numDistinct, indexTranslator);
foreach (var (numItems, bufferSize, maxBufferSize, inMemoryReadSize, numDistinct) in Configurations)
await StructBufferReadWriteTest((uint)numItems, bufferSize, maxBufferSize, (uint)inMemoryReadSize, (ushort)numDistinct, indexTranslator);
}

async Task StringBufferReadWriteTest(uint numItems, int bufferSize, uint inMemorySize, ushort numDistinct, Func<uint, string> indexTranslator)
async Task StringBufferReadWriteTest(uint numItems, int bufferSize, int maxBufferSize, uint inMemorySize, ushort numDistinct, Func<uint, string> indexTranslator)
{
var buffer = _streamProvider.CreateCompositeBuffer(bufferSize, inMemorySize, numDistinct);
var buffer = _streamProvider.CreateCompositeBuffer(bufferSize, maxBufferSize, inMemorySize, numDistinct);
for (uint i = 0; i < numItems; i++)
buffer.Append(indexTranslator(i));

Expand All @@ -240,10 +268,10 @@ async Task StringBufferReadWriteTest(uint numItems, int bufferSize, uint inMemor
item.Should().Be(indexTranslator(index++));
}

async Task ObjectBufferReadWriteTest<T>(uint numItems, int bufferSize, uint inMemorySize, ushort numDistinct, Func<uint, T> indexTranslator, CreateFromReadOnlyByteSpan<T> createItem)
async Task ObjectBufferReadWriteTest<T>(uint numItems, int bufferSize, int maxBufferSize, uint inMemorySize, ushort numDistinct, Func<uint, T> indexTranslator, CreateFromReadOnlyByteSpan<T> createItem)
where T : IHaveDataAsReadOnlyByteSpan
{
var buffer = _streamProvider.CreateCompositeBuffer(createItem, bufferSize, inMemorySize, numDistinct);
var buffer = _streamProvider.CreateCompositeBuffer(createItem, bufferSize, maxBufferSize, inMemorySize, numDistinct);
for (uint i = 0; i < numItems; i++)
buffer.Append(indexTranslator(i));

Expand All @@ -259,9 +287,9 @@ async Task ObjectBufferReadWriteTest<T>(uint numItems, int bufferSize, uint inMe
}
}

async Task StructBufferReadWriteTest<T>(uint numItems, int bufferSize, uint inMemorySize, ushort numDistinct, Func<uint, T> indexTranslator) where T : unmanaged
async Task StructBufferReadWriteTest<T>(uint numItems, int bufferSize, int maxBufferSize, uint inMemorySize, ushort numDistinct, Func<uint, T> indexTranslator) where T : unmanaged
{
var buffer = _streamProvider.CreateCompositeBuffer<T>(bufferSize, inMemorySize, numDistinct);
var buffer = _streamProvider.CreateCompositeBuffer<T>(bufferSize, maxBufferSize, inMemorySize, numDistinct);
for (uint i = 0; i < numItems; i++)
buffer.Append(indexTranslator(i));

Expand Down
3 changes: 1 addition & 2 deletions BrightData/BrightData.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12669,13 +12669,12 @@
<param name="distanceMetric">Distance metric</param>
<returns></returns>
</member>
<member name="M:BrightData.LinearAlgebra.VectorIndexing.VectorSet`1.Create(System.Func{BrightData.IReadOnlyVectorStore{`0},BrightData.ISupportKnnSearch{`0}},System.UInt32,BrightData.DistanceMetric,BrightData.VectorStorageType,System.Nullable{System.UInt32})">
<member name="M:BrightData.LinearAlgebra.VectorIndexing.VectorSet`1.CreateKnnSearch(System.UInt32,System.Func{BrightData.IReadOnlyVectorStore{`0},BrightData.ISupportKnnSearch{`0}},BrightData.VectorStorageType,System.Nullable{System.UInt32})">
<summary>
Creates a vector set that uses a KNN search provider
</summary>
<param name="creator"></param>
<param name="vectorSize"></param>
<param name="distanceMetric"></param>
<param name="storageType"></param>
<param name="capacity"></param>
<returns></returns>
Expand Down
2 changes: 1 addition & 1 deletion BrightData/BrightDataContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public LinearAlgebraProvider<float> LinearAlgebraProvider
/// Creates a new temp stream provider
/// </summary>
/// <returns></returns>
public IProvideDataBlocks CreateTempDataBlockProvider() => new TempFileProvider(Get<string>(Consts.BaseTempPath));
public IProvideByteBlocks CreateTempDataBlockProvider() => new TempFileProvider(Get<string>(Consts.BaseTempPath));

/// <summary>
/// Returns a typed property from the context
Expand Down
Loading

0 comments on commit 28b84b5

Please sign in to comment.