Skip to content

Commit

Permalink
Improve performance of discrete linear intersections (e.g. large DFs)
Browse files Browse the repository at this point in the history
Benchmark                                                       Mode  Cnt    Score    Error  Units
BenchmarkSortedRangeSet.linearDiscreteIntersectDiscreteOnLarge  avgt   10    0,149 ±  0,001  ms/op
BenchmarkSortedRangeSet.linearIntersectDiscreteOnLarge          avgt   10  106,509 ±  1,009  ms/op
  • Loading branch information
sopel39 committed May 8, 2024
1 parent 1763a14 commit d381d01
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.block.DictionaryBlock;
import io.trino.spi.block.LazyBlock;
import io.trino.spi.block.RunLengthEncodedBlock;
import io.trino.spi.block.ValueBlock;
import io.trino.spi.connector.ConnectorSession;
Expand Down Expand Up @@ -552,14 +553,77 @@ public SortedRangeSet intersect(ValueSet other)
int thatRangeCount = that.getRangeCount();

if (max(thisRangeCount, thatRangeCount) * 0.02 < min(thisRangeCount, thatRangeCount)) {
return linearSearchIntersect(that);
if (discreteSetMarker == DISCRETE && that.discreteSetMarker == DISCRETE) {
return linearDiscreteSetIntersect(that);
}
else {
return linearSearchIntersect(that);
}
}
else {
// Binary search is better than linear search for sets with large size difference
return binarySearchIntersect(that);
}
}

// visible for testing
SortedRangeSet linearDiscreteSetIntersect(SortedRangeSet that)
{
int thisRangeCount = this.getRangeCount();
int thatRangeCount = that.getRangeCount();

boolean[] inclusive = new boolean[2 * (thisRangeCount + thatRangeCount)];
BlockBuilder blockBuilder = type.createBlockBuilder(null, 2 * (thisRangeCount + thatRangeCount));
int resultRangeIndex = 0;

int thisNextRangeIndex = 0;
int thatNextRangeIndex = 0;

int currentIntersectionStart = -1;

while (thisNextRangeIndex < thisRangeCount && thatNextRangeIndex < thatRangeCount) {
int compare = compareValues(
comparisonOperator,
sortedRanges,
2 * thisNextRangeIndex,
that.sortedRanges,
2 * thatNextRangeIndex);
if (compare == 0) {
if (currentIntersectionStart == -1) {
currentIntersectionStart = thisNextRangeIndex;
}
thisNextRangeIndex++;
thatNextRangeIndex++;
}
else {
if (currentIntersectionStart != -1) {
int size = thisNextRangeIndex - currentIntersectionStart;
copyBlock(this, currentIntersectionStart * 2, blockBuilder, inclusive, resultRangeIndex * 2, size);
resultRangeIndex += size;
currentIntersectionStart = -1;
}
if (compare < 0) {
thisNextRangeIndex++;
}
else {
thatNextRangeIndex++;
}
}
}

if (currentIntersectionStart != -1) {
int size = thisNextRangeIndex - currentIntersectionStart;
copyBlock(this, currentIntersectionStart * 2, blockBuilder, inclusive, resultRangeIndex * 2, size);
resultRangeIndex += size;
}

if (resultRangeIndex * 2 < inclusive.length) {
inclusive = Arrays.copyOf(inclusive, resultRangeIndex * 2);
}

return new SortedRangeSet(type, inclusive, blockBuilder.build(), resultRangeIndex > 0 ? DISCRETE : NON_DISCRETE);
}

// visible for testing
SortedRangeSet linearSearchIntersect(SortedRangeSet that)
{
Expand Down Expand Up @@ -672,25 +736,10 @@ SortedRangeSet binarySearchIntersect(SortedRangeSet that)
probeIndex++;
}
else {
Block block = probeRangeSet.getSortedRanges();
if (block instanceof DictionaryBlock || block instanceof ValueBlock) {
int size = intersectionEndIndex - probeIndex - 1;
int offset = probeIndex * 2;
if (block instanceof DictionaryBlock) {
copyDictionaryBlock(blockBuilder, inclusive, probeRangeSet, offset, resultIndex, size);
}
else {
copyValueBlock(blockBuilder, inclusive, probeRangeSet, offset, resultIndex, size);
}
probeIndex += size;
resultIndex += size;
}
else {
// RLE
writeRange(type, blockBuilder, inclusive, resultIndex, probeRange);
resultIndex++;
probeIndex++;
}
int size = intersectionEndIndex - probeIndex - 1;
copyBlock(probeRangeSet, probeIndex * 2, blockBuilder, inclusive, resultIndex * 2, size);
probeIndex += size;
resultIndex += size;
}
}
}
Expand All @@ -717,22 +766,37 @@ private DiscreteSetMarker intersectIsDiscreteSet(SortedRangeSet that, boolean no
return UNKNOWN;
}

private static void copyValueBlock(BlockBuilder blockBuilder, boolean[] inclusive, SortedRangeSet source, int sourceOffset, int destinationOffset, int size)
private static void copyBlock(SortedRangeSet source, int sourceOffset, BlockBuilder destination, boolean[] destinationInclusive, int destinationOffset, int size)
{
Block block = source.getSortedRanges();
switch (block) {
case ValueBlock valueBlock -> copyValueBlock(source, valueBlock, sourceOffset, destination, destinationInclusive, destinationOffset, size);
case DictionaryBlock dictionaryBlock -> copyDictionaryBlock(source, dictionaryBlock, sourceOffset, destination, destinationInclusive, destinationOffset, size);
case RunLengthEncodedBlock rleBlock -> copyRleBlock(source, rleBlock, sourceOffset, destination, destinationInclusive, destinationOffset, size);
case LazyBlock ignored -> throw new IllegalArgumentException("Did not expect LazyBlock");
}
}

private static void copyValueBlock(SortedRangeSet source, ValueBlock sourceBlock, int sourceOffset, BlockBuilder destination, boolean[] destinationInclusive, int destinationOffset, int size)
{
ValueBlock valueBlock = (ValueBlock) source.getSortedRanges();
System.arraycopy(source.getInclusive(), sourceOffset, inclusive, destinationOffset * 2, size * 2);
blockBuilder.appendRange(valueBlock.getUnderlyingValueBlock(), sourceOffset, size * 2);
System.arraycopy(source.getInclusive(), sourceOffset, destinationInclusive, destinationOffset, size * 2);
destination.appendRange(sourceBlock, sourceOffset, size * 2);
}

private static void copyDictionaryBlock(BlockBuilder blockBuilder, boolean[] inclusive, SortedRangeSet source, int sourceOffset, int destinationOffset, int size)
private static void copyDictionaryBlock(SortedRangeSet source, DictionaryBlock sourceBlock, int sourceOffset, BlockBuilder destination, boolean[] destinationInclusive, int destinationOffset, int size)
{
DictionaryBlock dictionaryBlock = (DictionaryBlock) source.getSortedRanges();
int[] positions = new int[size * 2];
for (int position = 0; position < size * 2; position++) {
positions[position] = dictionaryBlock.getUnderlyingValuePosition(position + sourceOffset);
positions[position] = sourceBlock.getUnderlyingValuePosition(position + sourceOffset);
}
System.arraycopy(source.getInclusive(), sourceOffset, inclusive, destinationOffset * 2, positions.length);
blockBuilder.appendPositions(dictionaryBlock.getUnderlyingValueBlock(), positions, 0, positions.length);
System.arraycopy(source.getInclusive(), sourceOffset, destinationInclusive, destinationOffset, positions.length);
destination.appendPositions(sourceBlock.getUnderlyingValueBlock(), positions, 0, positions.length);
}

private static void copyRleBlock(SortedRangeSet source, RunLengthEncodedBlock sourceBlock, int sourceOffset, BlockBuilder destination, boolean[] destinationInclusive, int destinationOffset, int size)
{
System.arraycopy(source.getInclusive(), sourceOffset, destinationInclusive, destinationOffset, size * 2);
destination.appendRepeated(sourceBlock.getValue(), 0, size * 2);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,12 @@ public List<ValueSet> linearIntersectDiscreteOnLarge(Data data)
return benchmarkIntersectionSingle(data.largeDiscreteSortedRangeSet, SortedRangeSet::linearSearchIntersect);
}

@Benchmark
public List<ValueSet> linearDiscreteIntersectDiscreteOnLarge(Data data)
{
return benchmarkIntersectionSingle(data.largeDiscreteSortedRangeSet, SortedRangeSet::linearDiscreteSetIntersect);
}

@Benchmark
public List<ValueSet> binaryIntersectRangeOnLarge(Data data)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,25 @@ public void testIntersect()
IntStream.rangeClosed(30, 49).mapToObj(l -> Range.range(VARCHAR, slices.get(l * 5), l % 2 == 1, slices.get((l + 1) * 5 - 1), l % 2 == 0))).toList()));
}

@Test
public void testLinearDiscreteSetIntersect()
{
SortedRangeSet result = SortedRangeSet.of(BIGINT, 1L, 2L, 10L, 11L, 20L, 21L)
.linearDiscreteSetIntersect(SortedRangeSet.of(BIGINT, 1L, 2L, 20L, 21L, 30L));
assertThat(result).isEqualTo(SortedRangeSet.of(BIGINT, 1L, 2L, 20L, 21L));
assertThat(result.isDiscreteSet()).isTrue();

result = SortedRangeSet.of(BIGINT, 1L, 2L, 10L)
.linearDiscreteSetIntersect(SortedRangeSet.of(BIGINT, 1L, 2L, 11L));
assertThat(result).isEqualTo(SortedRangeSet.of(BIGINT, 1L, 2L));
assertThat(result.isDiscreteSet()).isTrue();

result = SortedRangeSet.of(BIGINT, 1L, 2L, 10L)
.linearDiscreteSetIntersect(SortedRangeSet.of(BIGINT, 42L));
assertThat(result).isEqualTo(SortedRangeSet.none(BIGINT));
assertThat(result.isDiscreteSet()).isFalse();
}

private void assertIntersect(SortedRangeSet first, SortedRangeSet second, SortedRangeSet result)
{
assertThat(first.intersect(second)).isEqualTo(result);
Expand Down

0 comments on commit d381d01

Please sign in to comment.