-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Brought back `DynamicVeSqlExpressionEvaluationSpec` to reproduce the Parquest error case. Found several other test failures, documented them in #505. - Created `ColumnarBatchToVeColBatch` & updated `SparkToVectorEnginePlan` to use it. The default mode uses `InternalRow`s as the transformation mechanism, whereas col-to-col implementation is to be implemented separately, as an optimization. - For the col-to-col implementation we add a unit test case to validate it, as part of the optimization implementation.
- Loading branch information
Showing
6 changed files
with
177 additions
and
47 deletions.
There are no files selected for viewing
51 changes: 51 additions & 0 deletions
51
src/main/scala/com/nec/cache/ColumnarBatchToVeColBatch.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
package com.nec.cache | ||
|
||
import com.nec.arrow.ArrowEncodingSettings | ||
import com.nec.ve.{VeColBatch, VeProcess} | ||
import com.nec.ve.VeProcess.OriginalCallingContext | ||
import com.nec.ve.colvector.VeColBatch.VeColVectorSource | ||
import org.apache.arrow.memory.BufferAllocator | ||
import org.apache.arrow.vector.types.pojo.Schema | ||
import org.apache.spark.sql.vectorized.ColumnarBatch | ||
|
||
object ColumnarBatchToVeColBatch { | ||
def toVeColBatchesViaCols( | ||
columnarBatches: Iterator[ColumnarBatch], | ||
arrowSchema: Schema, | ||
completeInSpark: Boolean | ||
)(implicit | ||
bufferAllocator: BufferAllocator, | ||
arrowEncodingSettings: ArrowEncodingSettings, | ||
originalCallingContext: OriginalCallingContext, | ||
veProcess: VeProcess, | ||
veColVectorSource: VeColVectorSource | ||
): Iterator[VeColBatch] = { | ||
??? | ||
} | ||
|
||
def toVeColBatchesViaRows( | ||
columnarBatches: Iterator[ColumnarBatch], | ||
arrowSchema: Schema, | ||
completeInSpark: Boolean | ||
)(implicit | ||
bufferAllocator: BufferAllocator, | ||
arrowEncodingSettings: ArrowEncodingSettings, | ||
originalCallingContext: OriginalCallingContext, | ||
veProcess: VeProcess, | ||
veColVectorSource: VeColVectorSource | ||
): Iterator[VeColBatch] = { | ||
columnarBatches.flatMap { columnarBatch => | ||
import scala.collection.JavaConverters._ | ||
SparkInternalRowsToArrowColumnarBatches | ||
.apply( | ||
rowIterator = columnarBatch.rowIterator().asScala, | ||
arrowSchema = arrowSchema, | ||
completeInSpark = completeInSpark | ||
) | ||
.map { columnarBatch => | ||
/* cleaning up the [[columnarBatch]] is not necessary as the underlying ones does it */ | ||
VeColBatch.fromArrowColumnarBatch(columnarBatch) | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
69 changes: 69 additions & 0 deletions
69
src/test/scala/com/nec/ve/ColumnarBatchToVeColBatchTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
package com.nec.ve | ||
|
||
import com.nec.arrow.{ArrowEncodingSettings, WithTestAllocator} | ||
import com.nec.cache.ColumnarBatchToVeColBatch | ||
import com.nec.spark.SparkAdditions | ||
import com.nec.ve.VeProcess.OriginalCallingContext | ||
import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema} | ||
import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector | ||
import org.apache.spark.sql.types.IntegerType | ||
import org.apache.spark.sql.vectorized.ColumnarBatch | ||
import org.scalatest.Ignore | ||
import org.scalatest.freespec.AnyFreeSpec | ||
|
||
object ColumnarBatchToVeColBatchTest {} | ||
|
||
/** This is a test-case that is currently not passing */ | ||
@Ignore | ||
final class ColumnarBatchToVeColBatchTest | ||
extends AnyFreeSpec | ||
with SparkAdditions | ||
with WithVeProcess { | ||
import OriginalCallingContext.Automatic._ | ||
|
||
"It works" in { | ||
WithTestAllocator { implicit alloc => | ||
implicit val arrowEncodingSettings: ArrowEncodingSettings = | ||
ArrowEncodingSettings("UTC", 3, 10) | ||
|
||
import collection.JavaConverters._ | ||
val schema = new Schema( | ||
List( | ||
new Field( | ||
"test", | ||
new FieldType(false, new ArrowType.Int(8 * 4, true), null), | ||
List.empty.asJava | ||
) | ||
).asJava | ||
) | ||
val col1 = new OnHeapColumnVector(5, IntegerType) | ||
col1.putInt(0, 1) | ||
col1.putInt(1, 34) | ||
col1.putInt(2, 9) | ||
col1.putInt(3, 2) | ||
col1.putInt(4, 3) | ||
val onHeapColB = new ColumnarBatch(Array(col1), 5) | ||
val columnarBatches: List[ColumnarBatch] = onHeapColB :: Nil | ||
val expectedCols: List[String] = ColumnarBatchToVeColBatch | ||
.toVeColBatchesViaRows( | ||
columnarBatches = columnarBatches.iterator, | ||
arrowSchema = schema, | ||
completeInSpark = false | ||
) | ||
.flatMap(_.cols.map(_.toArrowVector().toString)) | ||
.toList | ||
|
||
assert(expectedCols == List("[1, 34, 9]", "[2, 3]")) | ||
|
||
val gotCols: List[String] = ColumnarBatchToVeColBatch | ||
.toVeColBatchesViaCols( | ||
columnarBatches = columnarBatches.iterator, | ||
arrowSchema = schema, | ||
completeInSpark = false | ||
) | ||
.flatMap(_.cols.map(_.toArrowVector().toString)) | ||
.toList | ||
assert(gotCols == expectedCols) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters