diff --git a/mosaic-core/pom.xml b/mosaic-core/pom.xml
new file mode 100644
index 000000000..306930672
--- /dev/null
+++ b/mosaic-core/pom.xml
@@ -0,0 +1,193 @@
+ com.databricks.labs
+ mosaic
+ 0.4.1
+ 4.0.0
+ mosaic-core
+ mosaic-core
+ jar
+ org.scala-lang
+ scala-library
+ junit
+ junit
+ test
+ org.scalatest
+ scalatest_${scala.compat.version}
+ org.scalamock
+ scalamock_${scala.compat.version}
+ org.apache.spark
+ spark-sql_${scala.compat.version}
+ org.apache.spark
+ spark-catalyst_${scala.compat.version}
+ test-jar
+ org.apache.spark
+ spark-core_${scala.compat.version}
+ test-jar
+ src/main/scala
+ src/test/scala
+ org.apache.maven.plugins
+ maven-assembly-plugin
+ 3.6.0
+ jar-with-dependencies
+ assemble-all
+ package
+ single
+ net.alchim31.maven
+ scala-maven-plugin
+ 4.7.1
+ compile
+ testCompile
+ -dependencyfile
+ ${project.build.directory}/.scala_dependencies
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ 3.1.0
+ true
+ org.scalatest
+ scalatest-maven-plugin
+ 2.0.0
+ ${project.build.directory}/test-reports
+ test
+ test
+ org.scalastyle
+ scalastyle-maven-plugin
+ 1.0.0
+ true
+ true
+ false
+ false
+ ${basedir}/src/main/scala
+ ${basedir}/src/test/scala
+ scalastyle-config.xml
+ ${basedir}/target/scalastyle-output.xml
+ ${encoding}
+ ${encoding}
+ check
+ org.scoverage
+ scoverage-maven-plugin
+ 1.4.11
+ scoverage-report
+ package
+ check
+ report-only
+ 0
+ true
+ ${scala.version}
+ skipTests=false
+ org.apache.maven.plugins
+ maven-resources-plugin
+ 3.0.2
+ copy-files-on-build
+ package
+ copy-resources
+ ${basedir}/python/mosaic/lib
+ ${basedir}/target/
+ *.jar
+ false
diff --git a/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.GeometryAPIRegister b/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.GeometryAPIRegister
new file mode 100644
index 000000000..bbf455b40
--- /dev/null
+++ b/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.GeometryAPIRegister
@@ -0,0 +1 @@
\ No newline at end of file
diff --git a/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.IndexSystemRegister b/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.IndexSystemRegister
new file mode 100644
index 000000000..3a06aba56
--- /dev/null
+++ b/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.IndexSystemRegister
@@ -0,0 +1,3 @@
\ No newline at end of file
diff --git a/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.RasterAPIRegister b/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.RasterAPIRegister
new file mode 100644
index 000000000..2585377bc
--- /dev/null
+++ b/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.RasterAPIRegister
@@ -0,0 +1 @@
\ No newline at end of file
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/GenericServiceFactory.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/GenericServiceFactory.scala
new file mode 100644
index 000000000..bf65a6577
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/GenericServiceFactory.scala
@@ -0,0 +1,69 @@
+package com.databricks.labs.mosaic.core
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import com.databricks.labs.mosaic.core.raster.RasterAPI
+import com.databricks.labs.mosaic.core.util.ResourceUtils
+import scala.util.Try
+ * Generic service factory for loading implementations of [[com.databricks.labs.mosaic.core.geometry.api.GeometryAPI]],
+ * [[com.databricks.labs.mosaic.core.index.IndexSystem]] and [[com.databricks.labs.mosaic.core.raster.RasterAPI]].
+ * This class implements the interaction with the META-INF/services directory.
+ * All the implementations are provided via the META-INF/services directory and are loaded at runtime.
+ */
+abstract class GenericServiceFactory[T](registerName: String) {
+ private def fetchClasses: Seq[Class[_]] = {
+ ResourceUtils.readResourceLines(s"/META-INF/services/$registerName")
+ .map(name => Try(Class.forName(name)))
+ .filter(_.isSuccess)
+ .map(_.get)
+ .toSeq
+ }
+ def getService(name: String, params: Array[Object] = Array.empty): T = {
+ val classes = fetchClasses
+ val instance = classes
+ .map(clazz => Try(clazz.getConstructor(params.map(_.getClass): _*)))
+ .map(_.map(_.newInstance(params: _*).asInstanceOf[T]))
+ .filter(_.isSuccess)
+ .map(_.get)
+ .headOption
+ instance.getOrElse(
+ throw new IllegalArgumentException(s"Unable to find service with name $name")
+ )
+ }
+ * This object contains the actual factory instances for [[com.databricks.labs.mosaic.core.geometry.api.GeometryAPI]],
+ * [[com.databricks.labs.mosaic.core.index.IndexSystem]] and [[com.databricks.labs.mosaic.core.raster.RasterAPI]].
+ */
+object GenericServiceFactory {
+ object GeometryAPIFactory
+ extends GenericServiceFactory[GeometryAPI](registerName = "com.databricks.labs.mosaic.GeometryAPIRegister") {
+ def getGeometryAPI(name: String, params: Array[Object] = Array.empty): GeometryAPI = {
+ getService(name, params)
+ }
+ }
+ object IndexSystemFactory
+ extends GenericServiceFactory[IndexSystem](registerName = "com.databricks.labs.mosaic.IndexSystemRegister") {
+ def getIndexSystem(name: String, params: Array[Object] = Array.empty): IndexSystem = {
+ getService(name, params)
+ }
+ }
+ object RasterAPIFactory
+ extends GenericServiceFactory[RasterAPI](registerName = "com.databricks.labs.mosaic.RasterAPIRegister") {
+ def getRasterAPI(name: String, params: Array[Object] = Array.empty): RasterAPI = {
+ getService(name, params)
+ }
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/Mosaic.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/Mosaic.scala
new file mode 100644
index 000000000..41e7d25ab
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/Mosaic.scala
@@ -0,0 +1,286 @@
+package com.databricks.labs.mosaic.core
+import com.databricks.labs.mosaic.core.geometry._
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import com.databricks.labs.mosaic.core.types.GeometryTypeEnum._
+import com.databricks.labs.mosaic.core.types._
+import scala.annotation.tailrec
+ * Single abstracted logic for mosaic fill via [[IndexSystem]]. [[IndexSystem]]
+ * is in charge of implementing the individual steps of the logic.
+ */
+object Mosaic {
+ /**
+ * This method is used to fill a geometry with a given resolution.
+ *
+ * @param geometry The geometry to fill with chips.
+ * @param resolution The resolution to fill the geometry with.
+ * @param keepCoreGeom Whether or not to keep the core geometry.
+ * @param indexSystem The index system to use for filling the geometry.
+ * @param geometryAPI The geometry API to use for manipulating the geometry.
+ * @return A sequence of [[MosaicChip]]s.
+ */
+ def getChips(
+ geometry: MosaicGeometry,
+ resolution: Int,
+ keepCoreGeom: Boolean,
+ indexSystem: IndexSystem,
+ geometryAPI: GeometryAPI
+ ): Seq[MosaicChip] = {
+ GeometryTypeEnum.fromString(geometry.getGeometryType) match {
+ case POINT => pointChip(geometry, resolution, keepCoreGeom, indexSystem)
+ case MULTIPOINT => multiPointChips(geometry, resolution, keepCoreGeom, indexSystem)
+ case LINESTRING => lineFill(geometry, resolution, indexSystem, geometryAPI)
+ case MULTILINESTRING => lineFill(geometry, resolution, indexSystem, geometryAPI)
+ case _ => mosaicFill(geometry, resolution, keepCoreGeom, indexSystem, geometryAPI)
+ }
+ }
+ /**
+ * This method is used to fill a geometry with a given resolution.
+ * This method is designed to be used with a [[MosaicMultiPoint]].
+ *
+ * @param geometry The geometry to fill with chips.
+ * @param resolution The resolution to fill the geometry with.
+ * @param keepCoreGeom Whether or not to keep the core geometry.
+ * @param indexSystem The index system to use for filling the geometry.
+ * @return A sequence of [[MosaicChip]]s.
+ */
+ def multiPointChips(
+ geometry: MosaicGeometry,
+ resolution: Int,
+ keepCoreGeom: Boolean,
+ indexSystem: IndexSystem
+ ): Seq[MosaicChip] = {
+ val points = geometry.asInstanceOf[MosaicMultiPoint].asSeq
+ points.flatMap(point => pointChip(point, resolution, keepCoreGeom, indexSystem))
+ }
+ /**
+ * This method is used to fill a geometry with a given resolution.
+ * This method is designed to be used with a [[MosaicPoint]].
+ *
+ * @param geometry The geometry to fill with chips.
+ * @param resolution The resolution to fill the geometry with.
+ * @param keepCoreGeom Whether or not to keep the core geometry.
+ * @param indexSystem The index system to use for filling the geometry.
+ * @return A sequence of [[MosaicChip]]s.
+ */
+ def pointChip(
+ geometry: MosaicGeometry,
+ resolution: Int,
+ keepCoreGeom: Boolean,
+ indexSystem: IndexSystem
+ ): Seq[MosaicChip] = {
+ val point = geometry.asInstanceOf[MosaicPoint]
+ val chipGeom = if (keepCoreGeom) point else null
+ val cellId = indexSystem.pointToIndex(point.getX, point.getY, resolution)
+ val chip = MosaicChip(isCore = false, Left(cellId), chipGeom)
+ Seq(chip.formatCellId(indexSystem))
+ }
+ /**
+ * This method is used to fill a geometry with a given resolution.
+ * This method is designed to be used with a [[MosaicPolygon]], [[MosaicMultiPolygon]] and [[MosaicGeometryCollection]].
+ *
+ * @param geometry The geometry to fill with chips.
+ * @param resolution The resolution to fill the geometry with.
+ * @param indexSystem The index system to use for filling the geometry.
+ * @param geometryAPI The geometry API to use for manipulating the geometry.
+ * @return A sequence of [[MosaicChip]]s.
+ */
+ def mosaicFill(
+ geometry: MosaicGeometry,
+ resolution: Int,
+ keepCoreGeom: Boolean,
+ indexSystem: IndexSystem,
+ geometryAPI: GeometryAPI
+ ): Seq[MosaicChip] = {
+ val radius = indexSystem.getBufferRadius(geometry, resolution, geometryAPI)
+ // do not modify the radius
+ val carvedGeometry = geometry.buffer(-radius)
+ // add 1% to the radius to ensure union of carved and border geometries does not have holes inside the original geometry areas
+ val borderGeometry =
+ if (carvedGeometry.isEmpty) {
+ geometry.buffer(radius * 1.01).simplify(0.01 * radius)
+ } else {
+ geometry.boundary.buffer(radius * 1.01).simplify(0.01 * radius)
+ }
+ val coreIndices = indexSystem.polyfill(carvedGeometry, resolution, Some(geometryAPI))
+ val borderIndices = indexSystem.polyfill(borderGeometry, resolution, Some(geometryAPI)).diff(coreIndices)
+ val coreChips = indexSystem.getCoreChips(coreIndices, keepCoreGeom, geometryAPI)
+ val borderChips = indexSystem.getBorderChips(geometry, borderIndices, keepCoreGeom, geometryAPI)
+ coreChips ++ borderChips
+ }
+ /**
+ * This method is used to decompose a [[MosaicLineString]] or [[MosaicMultiLineString]] into a sequence of [[MosaicChip]]s.
+ *
+ * @param geometry The line to decompose.
+ * @param resolution The resolution to decompose the line with.
+ * @param indexSystem The index system to use for decomposing the line.
+ * @param geometryAPI The geometry API to use for manipulating the geometry.
+ * @return A sequence of [[MosaicChip]]s.
+ */
+ def lineFill(geometry: MosaicGeometry, resolution: Int, indexSystem: IndexSystem, geometryAPI: GeometryAPI): Seq[MosaicChip] = {
+ GeometryTypeEnum.fromString(geometry.getGeometryType) match {
+ case LINESTRING => lineDecompose(geometry.asInstanceOf[MosaicLineString], resolution, indexSystem, geometryAPI)
+ val multiLine = geometry.asInstanceOf[MosaicMultiLineString]
+ multiLine.flatten.flatMap(line => lineDecompose(line.asInstanceOf[MosaicLineString], resolution, indexSystem, geometryAPI))
+ case gt => throw new Error(s"$gt not supported for line fill/decompose operation.")
+ }
+ }
+ /**
+ * @param geometry
+ * Geometry to get k ring cells for.
+ * @param resolution
+ * Resolution of the cells to get.
+ * @param indexSystem
+ * Index system to use.
+ * @param geometryAPI
+ * Geometry API to use.
+ * @return
+ * A set of k ring cells for the geometry.
+ */
+ //noinspection DuplicatedCode
+ def geometryKRing(geometry: MosaicGeometry, resolution: Int, k: Int, indexSystem: IndexSystem, geometryAPI: GeometryAPI): Set[Long] = {
+ val (coreCells, borderCells) = getCellSets(geometry, resolution, indexSystem, geometryAPI)
+ val borderKRing = borderCells.flatMap(indexSystem.kRing(_, k))
+ val kRing = coreCells ++ borderKRing
+ kRing
+ }
+ /**
+ * @param geometry
+ * Geometry to get k loop around
+ * @param resolution
+ * Resolution of the cells
+ * @param indexSystem
+ * Index system to use
+ * @param geometryAPI
+ * Geometry API to use
+ * @return
+ * Set of cells that form a k loop around geometry
+ */
+ //noinspection DuplicatedCode
+ def geometryKLoop(geometry: MosaicGeometry, resolution: Int, k: Int, indexSystem: IndexSystem, geometryAPI: GeometryAPI): Set[Long] = {
+ val n: Int = k - 1
+ // This would be much more efficient if we could use the
+ // pre-computed tessellation of the geometry for repeated calls.
+ val (coreCells, borderCells) = getCellSets(geometry, resolution, indexSystem, geometryAPI)
+ // We use nRing as naming for kRing where k = n
+ val borderNRing = borderCells.flatMap(indexSystem.kRing(_, n))
+ val nRing = coreCells ++ borderNRing
+ val borderKLoop = borderCells.flatMap(indexSystem.kLoop(_, k))
+ val kLoop = borderKLoop -- nRing
+ kLoop
+ }
+ /**
+ * This method is used to decompose a [[MosaicLineString]] into a sequence of [[MosaicChip]]s.
+ * Note that this method only works on [[MosaicLineString]]s.
+ * For [[MosaicMultiLineString]]s, use [[lineFill]].
+ *
+ * @param line The line to decompose.
+ * @param resolution The resolution to decompose the line with.
+ * @param indexSystem The index system to use for decomposing the line.
+ * @param geometryAPI The geometry API to use for manipulating the geometry.
+ * @return A sequence of [[MosaicChip]]s.
+ */
+ private def lineDecompose(
+ line: MosaicLineString,
+ resolution: Int,
+ indexSystem: IndexSystem,
+ geometryAPI: GeometryAPI
+ ): Seq[MosaicChip] = {
+ val start = line.getShells.head.asSeq.head
+ val startIndex = indexSystem.pointToIndex(start.getX, start.getY, resolution)
+ @tailrec
+ def traverseLine(
+ line: MosaicLineString,
+ queue: Seq[Long],
+ traversed: Set[Long],
+ chips: Seq[MosaicChip]
+ ): Seq[MosaicChip] = {
+ val newTraversed = traversed ++ queue
+ val (newQueue, newChips) = queue.foldLeft(
+ (Seq.empty[Long], chips)
+ )((accumulator: (Seq[Long], Seq[MosaicChip]), current: Long) => {
+ val indexGeom = indexSystem.indexToGeometry(current, geometryAPI)
+ val lineSegment = line.intersection(indexGeom)
+ if (!lineSegment.isEmpty) {
+ val chip = MosaicChip(isCore = false, Left(current), lineSegment)
+ val kRing = indexSystem.kRing(current, 1)
+ // Ignore already processed chips and those which are already in the
+ // queue to be processed
+ val toQueue = kRing.filterNot((newTraversed ++ accumulator._1).contains)
+ (accumulator._1 ++ toQueue, accumulator._2 ++ Seq(chip))
+ } else if (newTraversed.size == 1) {
+ // The line segment intersection was empty, but we only intersected the first point
+ // with a single cell.
+ // We need to run an intersection with a first ring because the starting point might be laying
+ // exactly on the cell boundary.
+ val kRing = indexSystem.kRing(current, 1)
+ val toQueue = kRing.filterNot(newTraversed.contains)
+ (toQueue, accumulator._2)
+ } else {
+ accumulator
+ }
+ })
+ if (newQueue.isEmpty) {
+ newChips
+ } else {
+ traverseLine(line, newQueue, newTraversed, newChips)
+ }
+ }
+ val result = traverseLine(line, Seq(startIndex), Set.empty[Long], Seq.empty[MosaicChip])
+ result
+ }
+ /**
+ * Returns core cells and border cells as a sets of Longs. The
+ * implementation currently depends on [[getChips()]] method.
+ *
+ * @param geometry
+ * Geometry to fill with cells.
+ * @param resolution
+ * Resolution of the cells.
+ * @param indexSystem
+ * Index system to use.
+ * @param geometryAPI
+ * Geometry API to use.
+ * @return
+ * Tuple of core cells and border cells.
+ */
+ private def getCellSets(
+ geometry: MosaicGeometry,
+ resolution: Int,
+ indexSystem: IndexSystem,
+ geometryAPI: GeometryAPI
+ ): (Set[Long], Set[Long]) = {
+ val chips = Mosaic.getChips(geometry, resolution, keepCoreGeom = false, indexSystem, geometryAPI)
+ val (coreChips, borderChips) = chips.partition(_.isCore)
+ val coreCells = coreChips.map(_.cellIdAsLong(indexSystem)).toSet
+ val borderCells = borderChips.map(_.cellIdAsLong(indexSystem)).toSet
+ (coreCells, borderCells)
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/MosaicException.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/MosaicException.scala
new file mode 100644
index 000000000..f16a87bcc
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/MosaicException.scala
@@ -0,0 +1,12 @@
+package com.databricks.labs.mosaic.core
+object MosaicException {
+ def GeometryEncodingNotSupported(supportedGeometryEncodings: Seq[String], suppliedGeometryEncoding: String): Exception =
+ new Exception(
+ s"This expression only supports geometries encoded as ${supportedGeometryEncodings.mkString(",")}." +
+ s"$suppliedGeometryEncoding was supplied as input."
+ )
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/ConvertToCodeGen.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/ConvertToCodeGen.scala
new file mode 100644
index 000000000..02988c428
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/ConvertToCodeGen.scala
@@ -0,0 +1,132 @@
+package com.databricks.labs.mosaic.core.codegen.format
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.types._
+import org.apache.spark.sql.catalyst.expressions.codegen._
+import org.apache.spark.sql.types._
+ * This class is used to generate CodeGen for converting between different geometry formats.
+ */
+object ConvertToCodeGen {
+ /**
+ * This method generates code to construct a geometry from the input format.
+ * Then the method generates code to write the geometry to the output format.
+ * There is currently no support for conversion without constructing a geometry.
+ *
+ * @param ctx CodegenContext used for code generation.
+ * @param ev ExprCode that will store the reference to the output.
+ * @param eval Reference to the input.
+ * @param inputDataType DataType of the input.
+ * @param outputDataTypeName Name of the output DataType.
+ * @param geometryAPI GeometryAPI used to manipulate the geometry.
+ * @return Code to construct the geometry from the input format and write the geometry to the output format.
+ */
+ def fromEval(
+ ctx: CodegenContext,
+ ev: ExprCode,
+ eval: String,
+ inputDataType: DataType,
+ outputDataTypeName: String,
+ geometryAPI: GeometryAPI
+ ): String = {
+ if (inputDataType.simpleString == outputDataTypeName) {
+ s"""
+ |${ev.value} = $eval;
+ |""".stripMargin
+ } else {
+ val (inCode, geomInRef) = readGeometryCode(ctx, eval, inputDataType, geometryAPI)
+ val (outCode, geomOutRef) = writeGeometryCode(ctx, geomInRef, outputDataTypeName, geometryAPI)
+ geometryAPI.codeGenTryWrap(
+ s"""
+ |$inCode
+ |$outCode
+ |${ev.value} = $geomOutRef;
+ |""".stripMargin)
+ }
+ }
+ /**
+ * This method executes the actual code generation.
+ * We need this nesting to allow for testing through scalamock.
+ *
+ * @param ctx CodegenContext used for code generation.
+ * @param ev ExprCode that will store the reference to the output.
+ * @param nullSafeCodeGen Code to generate the output.
+ * @param inputDataType DataType of the input.
+ * @param outputDataTypeName Name of the output DataType.
+ * @param geometryAPI GeometryAPI used to manipulate the geometry.
+ * @return Code to construct the geometry from the input format and write the geometry to the output format.
+ */
+ def doCodeGen(
+ ctx: CodegenContext,
+ ev: ExprCode,
+ nullSafeCodeGen: (CodegenContext, ExprCode, String => String) => ExprCode,
+ inputDataType: DataType,
+ outputDataTypeName: String,
+ geometryAPI: GeometryAPI
+ ): ExprCode = {
+ nullSafeCodeGen(
+ ctx,
+ ev,
+ eval => fromEval(ctx, ev, eval, inputDataType, outputDataTypeName, geometryAPI)
+ )
+ }
+ /**
+ * This method generates code to read the geometry from the input format.
+ *
+ * @param ctx CodegenContext used for code generation.
+ * @param eval Reference to the input.
+ * @param inputDataType DataType of the input.
+ * @param geometryAPI GeometryAPI used to manipulate the geometry.
+ * @return Code to construct the geometry from the input format.
+ */
+ def readGeometryCode(ctx: CodegenContext, eval: String, inputDataType: DataType, geometryAPI: GeometryAPI): (String, String) = {
+ val geometryCodeGen = geometryAPI.ioCodeGen
+ inputDataType match {
+ case BinaryType => geometryCodeGen.fromWKB(ctx, eval, geometryAPI)
+ case StringType => geometryCodeGen.fromWKT(ctx, eval, geometryAPI)
+ case HexType => geometryCodeGen.fromHex(ctx, eval, geometryAPI)
+ case GeoJSONType => geometryCodeGen.fromGeoJSON(ctx, eval, geometryAPI)
+ case _ => throw new Error(s"Geometry API unsupported: ${inputDataType.typeName}.")
+ }
+ }
+ /**
+ * This method generates code to write the geometry to the output format.
+ *
+ * @param ctx CodegenContext used for code generation.
+ * @param eval Reference to the input.
+ * @param outputDataType DataType of the output.
+ * @param geometryAPI GeometryAPI used to manipulate the geometry.
+ * @return Code to write the geometry to the output format.
+ */
+ def writeGeometryCode(ctx: CodegenContext, eval: String, outputDataType: DataType, geometryAPI: GeometryAPI): (String, String) = {
+ val outDataFormat = GeometryFormat.getDefaultFormat(outputDataType)
+ writeGeometryCode(ctx, eval, outDataFormat, geometryAPI)
+ }
+ /**
+ * This method generates code to write the geometry to the output format.
+ *
+ * @param ctx CodegenContext used for code generation.
+ * @param eval Reference to the input.
+ * @param outputDataFormatName Name of the output format.
+ * @param geometryAPI GeometryAPI used to manipulate the geometry.
+ * @return Code to write the geometry to the output format.
+ */
+ def writeGeometryCode(ctx: CodegenContext, eval: String, outputDataFormatName: String, geometryAPI: GeometryAPI): (String, String) = {
+ val geometryCodeGen = geometryAPI.ioCodeGen
+ outputDataFormatName match {
+ case "WKB" => geometryCodeGen.toWKB(ctx, eval, geometryAPI)
+ case "WKT" => geometryCodeGen.toWKT(ctx, eval, geometryAPI)
+ case "HEX" => geometryCodeGen.toHEX(ctx, eval, geometryAPI)
+ case "GEOJSON" => geometryCodeGen.toGeoJSON(ctx, eval, geometryAPI)
+ case _ => throw new Error(s"Data type unsupported: $outputDataFormatName.")
+ }
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryFormat.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryFormat.scala
new file mode 100644
index 000000000..2437daf90
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryFormat.scala
@@ -0,0 +1,24 @@
+package com.databricks.labs.mosaic.core.codegen.format
+import com.databricks.labs.mosaic.core.types._
+import org.apache.spark.sql.types._
+/** Utility object for handling default formats for data types */
+object GeometryFormat {
+ /**
+ * Get the default format for a given data type.
+ *
+ * @param outputDataType The data type to get the default format for.
+ * @return The default format for the given data type.
+ */
+ def getDefaultFormat(outputDataType: DataType): String = {
+ outputDataType match {
+ case BinaryType => "WKB"
+ case StringType => "WKT"
+ case HexType => "HEX"
+ case GeoJSONType => "GEOJSON"
+ case _ => throw new Error(s"Unsupported data type ${outputDataType.typeName}.")
+ }
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryIOCodeGen.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryIOCodeGen.scala
new file mode 100644
index 000000000..54324e591
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryIOCodeGen.scala
@@ -0,0 +1,29 @@
+package com.databricks.labs.mosaic.core.codegen.format
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+ * GeometryIOCodeGen is a trait that defines the interface for generating code for the various geometry formats.
+ * To support a new format toFormat and fromFormat methods need to be added to this trait.
+ * This is the IO CodeGen contract for all Geometry implementations.
+ */
+trait GeometryIOCodeGen {
+ def fromWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String)
+ def fromWKB(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String)
+ def fromGeoJSON(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String)
+ def fromHex(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String)
+ def toWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String)
+ def toWKB(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String)
+ def toGeoJSON(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String)
+ def toHEX(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String)
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/crs/CRSBounds.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/crs/CRSBounds.scala
new file mode 100644
index 000000000..b91688b2b
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/crs/CRSBounds.scala
@@ -0,0 +1,47 @@
+package com.databricks.labs.mosaic.core.crs
+import com.databricks.labs.mosaic.core.geometry.MosaicPoint
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+ * CRSBounds captures lower left and upper right extreme points for a given
+ * CRS. Extreme points are provided as MosaicPoints. The CRSBounds instances
+ * are constructed via geometry API.
+ *
+ * @param lowerLeft
+ * Lower left extreme point (xmin, ymin).
+ * @param upperRight
+ * Upper right extreme point (xmax, ymax).
+ */
+case class CRSBounds(lowerLeft: MosaicPoint, upperRight: MosaicPoint) {
+ def getLowerX: Double = lowerLeft.getX
+ def getLowerY: Double = lowerLeft.getY
+ def getUpperX: Double = upperRight.getX
+ def getUpperY: Double = upperRight.getY
+object CRSBounds {
+ /**
+ * Construct CRSBounds instance for give extreme coordinate values.
+ * Construction is bound for the selected geometry API at runtime.
+ *
+ * @param geometryAPI
+ * Geometry API attached to Mosaic Context.
+ * @param x1
+ * Minimum x coordinate value.
+ * @param y1
+ * Minimum y coordinate value.
+ * @param x2
+ * Maximum x coordinate value.
+ * @param y2
+ * Maximum y coordinate value.
+ * @return
+ */
+ def apply(geometryAPI: GeometryAPI, x1: Double, y1: Double, x2: Double, y2: Double): CRSBounds = {
+ CRSBounds(geometryAPI.fromCoords(Seq(x1, y1)), geometryAPI.fromCoords(Seq(x2, y2)))
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/crs/CRSBoundsProvider.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/crs/CRSBoundsProvider.scala
new file mode 100644
index 000000000..e78983c82
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/crs/CRSBoundsProvider.scala
@@ -0,0 +1,87 @@
+package com.databricks.labs.mosaic.core.crs
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import java.io.InputStream
+ * CRSBoundsProvider provides APIs to get bounds extreme points based on CRS
+ * dataset name (ie. EPSG) and CRS id (ie. 4326). The lookup is not exhaustive
+ * and it is generated based on a resource file. Resource file is sourced based
+ * on spatial reference.org. Not all CRSs available at spatial reference.org have
+ * bounds specified. Those are skipped in the resource file.
+ *
+ * @see
+ * https://spatialreference.org/
+ * @param lookup
+ * A map of (crs_dataset, id) -> (bounds, reprojected_bounds) pairs.
+ */
+case class CRSBoundsProvider(private val lookup: Map[(String, Int), (CRSBounds, CRSBounds)]) {
+ /**
+ * Returns bounds for provided CRS dataset and ID pair. Bounds are provided
+ * as (longitude, latitude) points. Only lower left and upper right points
+ * are supplied since they correspond to xmin, ymin and xmax and ymax
+ * extremes.
+ *
+ * @param dataset
+ * CRS dataset, e.g. EPSG.
+ * @param id
+ * CRS id within the CRS dataset, e.g. 4326.
+ * @return
+ * an instance of [[CRSBounds]] corresponding to supplied (crs_dataset,
+ * id) pair.
+ */
+ def bounds(dataset: String, id: Int): CRSBounds = {
+ require(lookup.contains((dataset, id)), s"Requested CRS does not have boundaries defined: ${(dataset, id)}")
+ lookup((dataset, id))._1
+ }
+ /**
+ * Returns reprojected bounds for provided CRS dataset and ID pair. Bounds
+ * are provided as (longitude, latitude) points equivalents. Only lower
+ * left and upper right points are supplied since they correspond to xmin,
+ * ymin and xmax and ymax extremes.
+ *
+ * @param dataset
+ * CRS dataset, e.g. EPSG.
+ * @param id
+ * CRS id within the CRS dataset, e.g. 27700.
+ * @return
+ * an instance of [[CRSBounds]] corresponding to supplied (crs_dataset,
+ * id) pair.
+ */
+ def reprojectedBounds(dataset: String, id: Int): CRSBounds = {
+ require(lookup.contains((dataset, id)), s"Requested CRS does not have boundaries defined: ${(dataset, id)}")
+ lookup((dataset, id))._2
+ }
+object CRSBoundsProvider {
+ /**
+ * Creates an instance of [[CRSBoundsProvider]] based on a resource file
+ * containing the bounds' lower left and upper right extreme points. The
+ * lookup contains longitude and latitude bounds and reprojected equivalent
+ * values. The bounds values have been sourced from spatial reference.org.
+ *
+ * @see
+ * https://spatialreference.org/
+ */
+ def apply(geometryAPI: GeometryAPI): CRSBoundsProvider = {
+ val stream: InputStream = getClass.getResourceAsStream("/CRSBounds.csv")
+ val lines: List[String] = scala.io.Source.fromInputStream(stream).getLines.toList.drop(1)
+ val lookupItems = lines
+ .map(line => {
+ val lineItems = line.split(",")
+ val nameItems = lineItems(0).split(":")
+ val (crsDataset, id) = (nameItems(0), nameItems(1).toInt)
+ val (x1, y1, x2, y2) = (lineItems(1).toDouble, lineItems(2).toDouble, lineItems(3).toDouble, lineItems(4).toDouble)
+ val (x3, y3, x4, y4) = (lineItems(5).toDouble, lineItems(6).toDouble, lineItems(7).toDouble, lineItems(8).toDouble)
+ (crsDataset, id) -> (CRSBounds(geometryAPI, x1, y1, x2, y2), CRSBounds(geometryAPI, x3, y3, x4, y4))
+ })
+ val lookup = lookupItems.toMap
+ CRSBoundsProvider(lookup)
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/GenericExpressionFactory.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/GenericExpressionFactory.scala
new file mode 100644
index 000000000..0f7479cc8
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/GenericExpressionFactory.scala
@@ -0,0 +1,94 @@
+package com.databricks.labs.mosaic.core.expressions
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
+import org.apache.spark.sql.catalyst.expressions.Expression
+import scala.reflect.ClassTag
+import scala.util.Try
+ * GenericExpressionFactory is a factory that creates a function builder for a
+ * given expression. It handles the boilerplate code needed to create a
+ * function builder for a given expression. The functions reflect the types and
+ * identify the correct constructors to be used.
+ */
+object GenericExpressionFactory {
+ /**
+ * Implements the makeCopy in a generic way so we dont need to repeat the
+ * same code over and over again.
+ * @param toCopy
+ * The expression to copy.
+ * @param newArgs
+ * The new arguments for the expression.
+ * @param nChildren
+ * The number of children expressions the expression has in the logical
+ * tree.
+ * @param expressionConfig
+ * Additional arguments for the expression (expressionConfigs).
+ * @tparam T
+ * The type of the expression.
+ * @return
+ * A copy of the expression.
+ */
+ def makeCopyImpl[T <: Expression: ClassTag](
+ toCopy: Expression,
+ newArgs: Array[AnyRef],
+ nChildren: Int,
+ expressionConfig: MosaicExpressionConfig
+ ): Expression = {
+ val newInstance = construct[T](newArgs.take(nChildren).map(_.asInstanceOf[Expression]), expressionConfig)
+ newInstance.copyTagsFrom(toCopy)
+ newInstance
+ }
+ /**
+ * Constructs an expression with the given arguments. It identifies the
+ * correct constructor to be used.
+ * @param args
+ * The arguments for the expression.
+ * @param expressionConfig
+ * Additional arguments for the expression (expressionConfigs).
+ * @tparam T
+ * The type of the expression.
+ * @return
+ * An instance of the expression.
+ */
+ def construct[T <: Expression: ClassTag](args: Array[_ <: Expression], expressionConfig: MosaicExpressionConfig): Expression = {
+ val clazz = implicitly[ClassTag[T]].runtimeClass
+ val allArgs = args ++ Seq(expressionConfig)
+ val constructors = clazz.getConstructors
+ constructors
+ .map(constructor =>
+ Try {
+ val argClasses = constructor.getParameterTypes
+ val castedArgs = allArgs
+ .take(argClasses.length)
+ .zip(argClasses)
+ .map { case (arg, tpe) => tpe.cast(arg) }
+ .toSeq
+ .asInstanceOf[Seq[AnyRef]]
+ constructor.newInstance(castedArgs: _*)
+ }
+ )
+ .filter(_.isSuccess)
+ .head
+ .get
+ .asInstanceOf[Expression]
+ }
+ /**
+ * Creates a function builder for a given expression. It identifies the
+ * correct constructor to be used.
+ * @param expressionConfig
+ * Additional arguments for the expression (expressionConfigs).
+ * @tparam T
+ * The type of the expression.
+ * @return
+ * A function builder for the expression.
+ */
+ def getBaseBuilder[T <: Expression: ClassTag](nChildren: Int, expressionConfig: MosaicExpressionConfig): FunctionBuilder =
+ (children: Seq[Expression]) => GenericExpressionFactory.construct[T](children.take(nChildren).toArray, expressionConfig)
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/MosaicExpressionConfig.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/MosaicExpressionConfig.scala
new file mode 100644
index 000000000..0184566f4
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/MosaicExpressionConfig.scala
@@ -0,0 +1,75 @@
+package com.databricks.labs.mosaic.core.expressions
+import com.databricks.labs.mosaic.core.GenericServiceFactory.{GeometryAPIFactory, IndexSystemFactory, RasterAPIFactory}
+import com.databricks.labs.mosaic.core._
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import com.databricks.labs.mosaic.core.raster.RasterAPI
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.types.DataType
+ * Mosaic Expression Config is a class that contains the configuration for the
+ * Mosaic Expression. Singleton objects are not accessible outside the JVM, so
+ * this is the mechanism to allow for shared context. This is used to control
+ * for the Mosaic runtime APIs and checkpoint locations.
+ *
+ * @param configs
+ * The configuration map for the Mosaic Expression.
+ */
+case class MosaicExpressionConfig(configs: Map[String, String]) {
+ def updateSparkConf(): Unit = {
+ val spark = SparkSession.builder().getOrCreate()
+ val sparkConf = spark.sparkContext.getConf
+ configs.foreach { case (k, v) => sparkConf.set(k, v) }
+ }
+ def getGeometryAPI(params: Array[Object] = Array.empty): GeometryAPI = GeometryAPIFactory.getGeometryAPI(configs(MOSAIC_GEOMETRY_API), params)
+ def getIndexSystem(params: Array[Object] = Array.empty): IndexSystem = IndexSystemFactory.getIndexSystem(configs(MOSAIC_INDEX_SYSTEM), params)
+ def getRasterAPI(params: Array[Object] = Array.empty): RasterAPI = RasterAPIFactory.getRasterAPI(configs(MOSAIC_RASTER_API), params)
+ def getRasterCheckpoint: String = configs.getOrElse(MOSAIC_RASTER_CHECKPOINT, MOSAIC_RASTER_CHECKPOINT_DEFAULT)
+ def getCellIdType: DataType = getIndexSystem().cellIdType
+ def setGeometryAPI(api: String): MosaicExpressionConfig = {
+ MosaicExpressionConfig(configs + (MOSAIC_GEOMETRY_API -> api))
+ }
+ def setIndexSystem(system: String): MosaicExpressionConfig = {
+ MosaicExpressionConfig(configs + (MOSAIC_INDEX_SYSTEM -> system))
+ }
+ def setRasterAPI(api: String): MosaicExpressionConfig = {
+ MosaicExpressionConfig(configs + (MOSAIC_RASTER_API -> api))
+ }
+ def setRasterCheckpoint(checkpoint: String): MosaicExpressionConfig = {
+ MosaicExpressionConfig(configs + (MOSAIC_RASTER_CHECKPOINT -> checkpoint))
+ }
+ def setConfig(key: String, value: String): MosaicExpressionConfig = {
+ MosaicExpressionConfig(configs + (key -> value))
+ }
+ * Companion object for the Mosaic Expression Config. Provides constructors
+ * from spark session configuration.
+ */
+object MosaicExpressionConfig {
+ def apply(spark: SparkSession): MosaicExpressionConfig = {
+ val expressionConfig = new MosaicExpressionConfig(Map.empty[String, String])
+ expressionConfig
+ .setGeometryAPI(spark.conf.get(MOSAIC_GEOMETRY_API))
+ .setIndexSystem(spark.conf.get(MOSAIC_INDEX_SYSTEM))
+ .setRasterAPI(spark.conf.get(MOSAIC_RASTER_API))
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/WithExpressionInfo.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/WithExpressionInfo.scala
new file mode 100644
index 000000000..02f8b32b1
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/WithExpressionInfo.scala
@@ -0,0 +1,56 @@
+package com.databricks.labs.mosaic.core.expressions
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
+import scala.reflect.ClassTag
+ * WithExpressionInfo is a trait that defines the interface for adding
+ * expression to spark SQL. Any expression that needs to be added to spark SQL
+ * should extend this trait.
+ */
+trait WithExpressionInfo {
+ def name: String
+ def database: Option[String] = None
+ def usage: String = ""
+ def example: String = ""
+ def group: String = "misc_funcs"
+ /**
+ * Returns the expression builder (parser for spark SQL).
+ * @return
+ * An expression builder.
+ */
+ def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder
+ /**
+ * Returns the expression info for the expression based on the expression's
+ * type. Simplifies the amount of boilerplate code needed to add an
+ * expression to spark SQL.
+ * @return
+ * An ExpressionInfo object.
+ */
+ final def getExpressionInfo[T <: Expression: ClassTag](database: Option[String] = None): ExpressionInfo = {
+ val clazz = implicitly[ClassTag[T]].runtimeClass
+ new ExpressionInfo(
+ clazz.getCanonicalName,
+ database.getOrElse(this.database.orNull),
+ name,
+ usage,
+ "",
+ example,
+ "",
+ group,
+ "1.0",
+ "",
+ "built-in"
+ )
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/BinaryVectorExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/BinaryVectorExpression.scala
new file mode 100644
index 000000000..e1e85d28f
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/BinaryVectorExpression.scala
@@ -0,0 +1,132 @@
+package com.databricks.labs.mosaic.core.expressions.geometry
+import com.databricks.labs.mosaic.core.codegen.format.ConvertToCodeGen
+import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig}
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression, NullIntolerant}
+import scala.reflect.ClassTag
+ * Base class for all unary geometry expressions. It provides the boilerplate
+ * for creating a function builder for a given expression. It minimises amount
+ * of code needed to create a new expression.
+ *
+ * @param leftGeometryExpr
+ * The expression for the left/first geometry.
+ * @param rightGeometryExpr
+ * The expression for the right/second geometry.
+ * @param returnsGeometry
+ * Whether the expression returns a geometry or not.
+ * @param expressionConfig
+ * Additional arguments for the expression (expressionConfigs).
+ * @tparam T
+ * The type of the extending class.
+ */
+abstract class BinaryVectorExpression[T <: Expression: ClassTag](
+ leftGeometryExpr: Expression,
+ rightGeometryExpr: Expression,
+ returnsGeometry: Boolean,
+ expressionConfig: MosaicExpressionConfig
+) extends BinaryExpression
+ with VectorExpression
+ with NullIntolerant
+ with Serializable {
+ override def left: Expression = leftGeometryExpr
+ override def right: Expression = rightGeometryExpr
+ override def geometryAPI: GeometryAPI = expressionConfig.getGeometryAPI()
+ /**
+ * The function to be overriden by the extending class. It is called when
+ * the expression is evaluated. It provides the vector geometries to the
+ * expression. It abstracts spark serialization from the caller.
+ * @param leftGeometry
+ * The left/first geometry.
+ * @param rightGeometry
+ * The right/second geometry.
+ * @return
+ * A result of the expression.
+ */
+ def geometryTransform(leftGeometry: MosaicGeometry, rightGeometry: MosaicGeometry): Any
+ /**
+ * Evaluation of the expression. It evaluates the geometry and deserialises
+ * the geometry.
+ *
+ * @param leftGeometryRow
+ * The row containing the left/first geometry.
+ * @param rightGeometryRow
+ * The row containing the right/second geometry.
+ * @return
+ * The result of the expression.
+ */
+ //noinspection DuplicatedCode
+ override def nullSafeEval(leftGeometryRow: Any, rightGeometryRow: Any): Any = {
+ val leftGeometry = geometryAPI.valueToGeometry(leftGeometryRow, leftGeometryExpr.dataType)
+ val rightGeometry = geometryAPI.valueToGeometry(rightGeometryRow, rightGeometryExpr.dataType)
+ val result = geometryTransform(leftGeometry, rightGeometry)
+ serialise(result, returnsGeometry, leftGeometryExpr.dataType)
+ }
+ /**
+ * The function to be overriden by the extending class. It is called when
+ * the expression codegen is evaluated. It abstracts spark serialization
+ * and deserialization from the caller codegen.
+ * @param leftMosaicGeometryRef
+ * The left/first mosaic geometry reference.
+ * @param rightMosaicGeometryRef
+ * The right/second mosaic geometry reference.
+ * @param ctx
+ * The codegen context.
+ * @return
+ * A tuple containing the code and the reference to the result.
+ */
+ def geometryCodeGen(leftMosaicGeometryRef: String, rightMosaicGeometryRef: String, ctx: CodegenContext): (String, String)
+ override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 2, expressionConfig)
+ override def withNewChildrenInternal(
+ newFirst: Expression,
+ newSecond: Expression
+ ): Expression = makeCopy(Array(newFirst, newSecond))
+ /**
+ * The actual codegen implementation. It abstracts spark serialization and
+ * deserialization from the caller codegen. The extending class does not
+ * need to override this method.
+ *
+ * @param ctx
+ * The codegen context.
+ * @param ev
+ * The expression code.
+ * @return
+ * The result of the expression.
+ */
+ //noinspection DuplicatedCode
+ override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+ nullSafeCodeGen(
+ ctx,
+ ev,
+ (leftEval, rightEval) => {
+ val (leftInCode, leftGeomInRef) = ConvertToCodeGen.readGeometryCode(ctx, leftEval, leftGeometryExpr.dataType, geometryAPI)
+ val (rightInCode, rightGeomInRef) = ConvertToCodeGen.readGeometryCode(ctx, rightEval, rightGeometryExpr.dataType, geometryAPI)
+ val leftMosaicGeomRef = mosaicGeometryRef(leftGeomInRef)
+ val rightMosaicGeomRef = mosaicGeometryRef(rightGeomInRef)
+ val (expressionCode, resultRef) = geometryCodeGen(leftMosaicGeomRef, rightMosaicGeomRef, ctx)
+ val (serialiseCode, serialisedRef) = serialiseCodegen(resultRef, returnsGeometry, leftGeometryExpr.dataType, ctx)
+ geometryAPI.codeGenTryWrap(s"""
+ |$leftInCode
+ |$rightInCode
+ |$expressionCode
+ |$serialiseCode
+ |${ev.value} = $serialisedRef;
+ |""".stripMargin)
+ }
+ )
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/RequiresCRS.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/RequiresCRS.scala
new file mode 100644
index 000000000..e064c490e
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/RequiresCRS.scala
@@ -0,0 +1,24 @@
+package com.databricks.labs.mosaic.core.expressions.geometry
+import com.databricks.labs.mosaic.core.MosaicException
+import com.databricks.labs.mosaic.core.codegen.format.GeometryFormat
+import org.apache.spark.sql.types._
+import scala.collection.immutable
+ * Trait for checking if the input geometry is in a supported CRS.
+ * Currently only supports GEOJSON.
+ */
+trait RequiresCRS {
+ val encodings: immutable.Seq[String] = List("GEOJSON")
+ def checkEncoding(dataType: DataType): Unit = {
+ val inputTypeEncoding = GeometryFormat.getDefaultFormat(dataType)
+ if (!encodings.contains(inputTypeEncoding)) {
+ throw MosaicException.GeometryEncodingNotSupported(encodings, inputTypeEncoding)
+ }
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector1ArgExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector1ArgExpression.scala
new file mode 100644
index 000000000..97db51d02
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector1ArgExpression.scala
@@ -0,0 +1,131 @@
+package com.databricks.labs.mosaic.core.expressions.geometry
+import com.databricks.labs.mosaic.core.codegen.format.ConvertToCodeGen
+import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig}
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression, NullIntolerant}
+import scala.reflect.ClassTag
+ * Base class for all unary geometry expressions that require 1 additional
+ * argument. It provides the boilerplate for creating a function builder for a
+ * given expression. It minimises amount of code needed to create a new
+ * expression. The term unary refers to number of input geometries. By
+ * convention the number of arguments will be handled via number in the class
+ * name.
+ *
+ * @param geometryExpr
+ * The expression for the geometry.
+ * @param argExpr
+ * The expression for the argument.
+ * @param returnsGeometry
+ * Whether the expression returns a geometry or not.
+ * @param expressionConfig
+ * Additional arguments for the expression (expressionConfigs).
+ * @tparam T
+ * The type of the extending class.
+ */
+abstract class UnaryVector1ArgExpression[T <: Expression : ClassTag](
+ geometryExpr: Expression,
+ argExpr: Expression,
+ returnsGeometry: Boolean,
+ expressionConfig: MosaicExpressionConfig
+ ) extends BinaryExpression
+ with VectorExpression
+ with NullIntolerant
+ with Serializable {
+ override def left: Expression = geometryExpr
+ override def right: Expression = argExpr
+ override def geometryAPI: GeometryAPI = expressionConfig.getGeometryAPI()
+ /**
+ * The function to be overriden by the extending class. It is called when
+ * the expression is evaluated. It provides the vector geometry to the
+ * expression. It abstracts spark serialization from the caller.
+ *
+ * @param geometry
+ * The geometry.
+ * @param arg
+ * The argument.
+ * @return
+ * A result of the expression.
+ */
+ def geometryTransform(geometry: MosaicGeometry, arg: Any): Any
+ /**
+ * Evaluation of the expression. It evaluates the geometry and deserialises
+ * the geometry.
+ *
+ * @param geometryRow
+ * The row containing the geometry.
+ * @return
+ * The result of the expression.
+ */
+ //noinspection DuplicatedCode
+ override def nullSafeEval(geometryRow: Any, arg: Any): Any = {
+ val geometry = geometryAPI.valueToGeometry(geometryRow, geometryExpr.dataType)
+ val result = geometryTransform(geometry, arg)
+ serialise(result, returnsGeometry, geometryExpr.dataType)
+ }
+ /**
+ * The function to be overriden by the extending class. It is called when
+ * the expression codegen is evaluated. It abstracts spark serialization
+ * and deserialization from the caller codegen.
+ *
+ * @param geometryRef
+ * The geometry reference.
+ * @param argRef
+ * The argument reference.
+ * @param ctx
+ * The codegen context.
+ * @return
+ * A tuple containing the code and the reference to the result.
+ */
+ def geometryCodeGen(geometryRef: String, argRef: String, ctx: CodegenContext): (String, String)
+ override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 2, expressionConfig)
+ override def withNewChildrenInternal(
+ newFirst: Expression,
+ newSecond: Expression
+ ): Expression = makeCopy(Array(newFirst, newSecond))
+ /**
+ * The actual codegen implementation. It abstracts spark serialization and
+ * deserialization from the caller codegen. The extending class does not
+ * need to override this method.
+ *
+ * @param ctx
+ * The codegen context.
+ * @param ev
+ * The expression code.
+ * @return
+ * The result of the expression.
+ */
+ override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+ nullSafeCodeGen(
+ ctx,
+ ev,
+ (leftEval, rightEval) => {
+ val (inCode, geomInRef) = ConvertToCodeGen.readGeometryCode(ctx, leftEval, geometryExpr.dataType, geometryAPI)
+ val mosaicGeomRef = mosaicGeometryRef(geomInRef)
+ val (expressionCode, resultRef) = geometryCodeGen(mosaicGeomRef, rightEval, ctx)
+ val (serialiseCode, serialisedRef) = serialiseCodegen(resultRef, returnsGeometry, geometryExpr.dataType, ctx)
+ geometryAPI.codeGenTryWrap(
+ s"""
+ |$inCode
+ |$expressionCode
+ |$serialiseCode
+ |${ev.value} = $serialisedRef;
+ |""".stripMargin)
+ }
+ )
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector2ArgExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector2ArgExpression.scala
new file mode 100644
index 000000000..25a1e6643
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector2ArgExpression.scala
@@ -0,0 +1,141 @@
+package com.databricks.labs.mosaic.core.expressions.geometry
+import com.databricks.labs.mosaic.core.codegen.format.ConvertToCodeGen
+import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig}
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant, TernaryExpression}
+import scala.reflect.ClassTag
+ * Base class for all unary geometry expressions that require 2 additional
+ * argument. It provides the boilerplate for creating a function builder for a
+ * given expression. It minimises amount of code needed to create a new
+ * expression. The term unary refers to number of input geometries. By
+ * convention the number of arguments will be handled via number in the class
+ * name.
+ *
+ * @param geometryExpr
+ * The expression for the geometry.
+ * @param arg1Expr
+ * The expression for the first argument.
+ * @param arg2Expr
+ * The expression for the second argument.
+ * @param returnsGeometry
+ * Whether the expression returns a geometry or not.
+ * @param expressionConfig
+ * Additional arguments for the expression (expressionConfigs).
+ * @tparam T
+ * The type of the extending class.
+ */
+abstract class UnaryVector2ArgExpression[T <: Expression: ClassTag](
+ geometryExpr: Expression,
+ arg1Expr: Expression,
+ arg2Expr: Expression,
+ returnsGeometry: Boolean,
+ expressionConfig: MosaicExpressionConfig
+) extends TernaryExpression
+ with VectorExpression
+ with NullIntolerant
+ with Serializable {
+ override def first: Expression = geometryExpr
+ override def second: Expression = arg1Expr
+ override def third: Expression = arg2Expr
+ override def geometryAPI: GeometryAPI = expressionConfig.getGeometryAPI()
+ /**
+ * The function to be overriden by the extending class. It is called when
+ * the expression is evaluated. It provides the vector geometry to the
+ * expression. It abstracts spark serialization from the caller.
+ * @param geometry
+ * The geometry.
+ * @param arg1
+ * The first argument.
+ * @param arg2
+ * The second argument.
+ * @return
+ * A result of the expression.
+ */
+ def geometryTransform(geometry: MosaicGeometry, arg1: Any, arg2: Any): Any
+ /**
+ * Evaluation of the expression. It evaluates the geometry and deserialises
+ * the geometry.
+ * @param geometryRow
+ * The row containing the geometry.
+ * @param arg1
+ * The first argument.
+ * @param arg2
+ * The second argument.
+ * @return
+ * The result of the expression.
+ */
+ override def nullSafeEval(geometryRow: Any, arg1: Any, arg2: Any): Any = {
+ val geometry = geometryAPI.valueToGeometry(geometryRow, geometryExpr.dataType)
+ val result = geometryTransform(geometry, arg1, arg2)
+ serialise(result, returnsGeometry, geometryExpr.dataType)
+ }
+ /**
+ * The function to be overriden by the extending class. It is called when
+ * the expression codegen is evaluated. It abstracts spark serialization
+ * and deserialization from the caller codegen.
+ * @param mosaicGeometryRef
+ * The reference to mosaic geometry.
+ * @param arg1Ref
+ * The first argument reference.
+ * @param arg2Ref
+ * The second argument reference.
+ * @param ctx
+ * The codegen context.
+ * @return
+ * A tuple containing the code and the reference to the result.
+ */
+ def geometryCodeGen(mosaicGeometryRef: String, arg1Ref: String, arg2Ref: String, ctx: CodegenContext): (String, String)
+ override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 3, expressionConfig)
+ override def withNewChildrenInternal(
+ newFirst: Expression,
+ newSecond: Expression,
+ newThird: Expression
+ ): Expression = makeCopy(Array(newFirst, newSecond, newThird))
+ /**
+ * The actual codegen implementation. It abstracts spark serialization and
+ * deserialization from the caller codegen. The extending class does not
+ * need to override this method.
+ *
+ * @param ctx
+ * The codegen context.
+ * @param ev
+ * The expression code.
+ * @return
+ * The result of the expression.
+ */
+ //noinspection DuplicatedCode
+ override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+ nullSafeCodeGen(
+ ctx,
+ ev,
+ (geomEval, arg1Eval, arg2Eval) => {
+ val (inCode, geomInRef) = ConvertToCodeGen.readGeometryCode(ctx, geomEval, geometryExpr.dataType, geometryAPI)
+ val mosaicGeomRef = mosaicGeometryRef(geomInRef)
+ val (expressionCode, resultRef) = geometryCodeGen(mosaicGeomRef, arg1Eval, arg2Eval, ctx)
+ val (serialiseCode, serialisedRef) = serialiseCodegen(resultRef, returnsGeometry, geometryExpr.dataType, ctx)
+ geometryAPI.codeGenTryWrap(s"""
+ |$inCode
+ |$expressionCode
+ |$serialiseCode
+ |${ev.value} = $serialisedRef;
+ |""".stripMargin)
+ }
+ )
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVectorExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVectorExpression.scala
new file mode 100644
index 000000000..09f3816c6
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVectorExpression.scala
@@ -0,0 +1,115 @@
+package com.databricks.labs.mosaic.core.expressions.geometry
+import com.databricks.labs.mosaic.core.codegen.format.ConvertToCodeGen
+import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig}
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant, UnaryExpression}
+import scala.reflect.ClassTag
+ * Base class for all unary geometry expressions. It provides the boilerplate
+ * for creating a function builder for a given expression. It minimises amount
+ * of code needed to create a new expression.
+ *
+ * @param geometryExpr
+ * The expression for the geometry.
+ * @param returnsGeometry
+ * Whether the expression returns a geometry or not.
+ * @param expressionConfig
+ * Additional arguments for the expression (expressionConfigs).
+ * @tparam T
+ * The type of the extending class.
+ */
+abstract class UnaryVectorExpression[T <: Expression: ClassTag](
+ geometryExpr: Expression,
+ returnsGeometry: Boolean,
+ expressionConfig: MosaicExpressionConfig
+) extends UnaryExpression
+ with VectorExpression
+ with NullIntolerant
+ with Serializable {
+ override def child: Expression = geometryExpr
+ override def geometryAPI: GeometryAPI = expressionConfig.getGeometryAPI()
+ /**
+ * The function to be overriden by the extending class. It is called when
+ * the expression is evaluated. It provides the vector geometry to the
+ * expression. It abstracts spark serialization from the caller.
+ * @param geometry
+ * The geometry.
+ * @return
+ * A result of the expression.
+ */
+ def geometryTransform(geometry: MosaicGeometry): Any
+ /**
+ * Evaluation of the expression. It evaluates the geometry and deserialises
+ * the geometry.
+ * @param geometryRow
+ * The row containing the geometry.
+ *
+ * @return
+ * The result of the expression.
+ */
+ override def nullSafeEval(geometryRow: Any): Any = {
+ val geometry = geometryAPI.valueToGeometry(geometryRow, geometryExpr.dataType)
+ val result = geometryTransform(geometry)
+ serialise(result, returnsGeometry, geometryExpr.dataType)
+ }
+ /**
+ * The function to be overriden by the extending class. It is called when
+ * the expression codegen is evaluated. It abstracts spark serialization
+ * and deserialization from the caller codegen.
+ * @param mosaicGeometryRef
+ * The reference to mosaic geometry.
+ * @param ctx
+ * The codegen context.
+ * @return
+ * A tuple containing the code and the reference to the result.
+ */
+ def geometryCodeGen(mosaicGeometryRef: String, ctx: CodegenContext): (String, String)
+ override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 1, expressionConfig)
+ override def withNewChildInternal(
+ newFirst: Expression
+ ): Expression = makeCopy(Array(newFirst))
+ /**
+ * The actual codegen implementation. It abstracts spark serialization and
+ * deserialization from the caller codegen. The extending class does not
+ * need to override this method.
+ *
+ * @param ctx
+ * The codegen context.
+ * @param ev
+ * The expression code.
+ * @return
+ * The result of the expression.
+ */
+ //noinspection DuplicatedCode
+ override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+ nullSafeCodeGen(
+ ctx,
+ ev,
+ eval => {
+ val (inCode, geomInRef) = ConvertToCodeGen.readGeometryCode(ctx, eval, geometryExpr.dataType, geometryAPI)
+ val mosaicGeomRef = mosaicGeometryRef(geomInRef)
+ val (expressionCode, resultRef) = geometryCodeGen(mosaicGeomRef, ctx)
+ val (serialiseCode, serialisedRef) = serialiseCodegen(resultRef, returnsGeometry, geometryExpr.dataType, ctx)
+ geometryAPI.codeGenTryWrap(s"""
+ |$inCode
+ |$expressionCode
+ |$serialiseCode
+ |${ev.value} = $serialisedRef;
+ |""".stripMargin)
+ }
+ )
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/VectorExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/VectorExpression.scala
new file mode 100644
index 000000000..c11a03015
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/VectorExpression.scala
@@ -0,0 +1,95 @@
+package com.databricks.labs.mosaic.core.expressions.geometry
+import com.databricks.labs.mosaic.core.codegen.format.ConvertToCodeGen
+import com.databricks.labs.mosaic.core.crs.CRSBoundsProvider
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+import org.apache.spark.sql.types.DataType
+ * Base class for all vector expressions. It provides the boilerplate for
+ * creating a function builder for a given expression. It minimises amount of
+ * code needed to create a new expression.
+ */
+trait VectorExpression {
+ def geometryAPI: GeometryAPI
+ def mosaicGeomClass: String = geometryAPI.mosaicGeometryClass
+ def geomClass: String = geometryAPI.geometryClass
+ def CRSBoundsProviderClass: String = classOf[CRSBoundsProvider].getName
+ def geometryAPIClass: String = classOf[GeometryAPI].getName
+ /**
+ * Generic serialisation method for the expression result. It serialises
+ * the geometry if the expression returns a geometry. It passes the result
+ * through if the expression returns a non-geometry.
+ *
+ * @param result
+ * The result of the expression.
+ * @param returnsGeometry
+ * Whether the expression returns a geometry.
+ * @param dataType
+ * The data type of the result.
+ * @return
+ * The serialised result.
+ */
+ def serialise(result: Any, returnsGeometry: Boolean, dataType: DataType): Any = {
+ if (returnsGeometry) {
+ geometryAPI.serialize(result.asInstanceOf[MosaicGeometry], dataType)
+ } else {
+ result
+ }
+ }
+ /**
+ * Generic serialisation codegen method for the expression. It provide
+ * serialisation codegen for the geometry if the expression returns a
+ * geometry. It yields empty codegen if the expression returns a
+ * non-geometry.
+ *
+ * @param resultRef
+ * The result of the expression.
+ * @param returnsGeometry
+ * Whether the expression returns a geometry.
+ * @param dataType
+ * The data type of the result.
+ * @param ctx
+ * The codegen context.
+ * @return
+ * The serialised result.
+ */
+ def serialiseCodegen(resultRef: String, returnsGeometry: Boolean, dataType: DataType, ctx: CodegenContext): (String, String) = {
+ if (returnsGeometry) {
+ val baseGeometryRef = ctx.freshName("baseGeometry")
+ val (code, outputRef) = ConvertToCodeGen.writeGeometryCode(ctx, baseGeometryRef, dataType, geometryAPI)
+ (
+ s"""
+ |$geomClass $baseGeometryRef = $resultRef.getGeom();
+ |$code
+ |""".stripMargin,
+ outputRef
+ )
+ } else {
+ ("", resultRef) // noop code
+ }
+ }
+ /**
+ * Simplifies the creation of a geometry reference for the expression
+ * codegen
+ *
+ * @param geometryRef
+ * The geometry variable reference.
+ * @return
+ * The mosaic geometry instance in codegen.
+ */
+ def mosaicGeometryRef(geometryRef: String): String = {
+ s"${geometryAPI.mosaicGeometryClass}.apply($geometryRef)"
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster1ArgExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster1ArgExpression.scala
new file mode 100644
index 000000000..e3bc9ba9b
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster1ArgExpression.scala
@@ -0,0 +1,96 @@
+package com.databricks.labs.mosaic.core.expressions.raster
+import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig}
+import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI}
+import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression, NullIntolerant}
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.unsafe.types.UTF8String
+import scala.reflect.ClassTag
+import scala.util.Try
+ * Base class for all raster expressions that take two arguments. It provides
+ * the boilerplate code needed to create a function builder for a given
+ * expression. It minimises amount of code needed to create a new expression.
+ *
+ * @param pathExpr
+ * The expression for the raster path.
+ * @param arg1Expr
+ * The expression for the first argument.
+ * @param outputType
+ * The output type of the result.
+ * @param expressionConfig
+ * Additional arguments for the expression (expressionConfigs).
+ * @tparam T
+ * The type of the extending class.
+ */
+abstract class Raster1ArgExpression[T <: Expression : ClassTag](
+ pathExpr: Expression,
+ arg1Expr: Expression,
+ outputType: DataType,
+ expressionConfig: MosaicExpressionConfig
+ ) extends BinaryExpression
+ with NullIntolerant
+ with Serializable {
+ /**
+ * The raster API to be used. Enable the raster so that subclasses dont
+ * need to worry about this.
+ */
+ protected val rasterAPI: RasterAPI = expressionConfig.getRasterAPI()
+ Try {
+ rasterAPI.enable()
+ }
+ override def left: Expression = pathExpr
+ override def right: Expression = arg1Expr
+ /** Output Data Type */
+ override def dataType: DataType = outputType
+ /**
+ * The function to be overriden by the extending class. It is called when
+ * the expression is evaluated. It provides the raster and the arguments to
+ * the expression. It abstracts spark serialization from the caller.
+ *
+ * @param raster
+ * The raster to be used.
+ * @param arg1
+ * The first argument.
+ * @return
+ * A result of the expression.
+ */
+ def rasterTransform(raster: MosaicRaster, arg1: Any): Any
+ /**
+ * Evaluation of the expression. It evaluates the raster path and the loads
+ * the raster from the path. It handles the clean up of the raster before
+ * returning the results.
+ *
+ * @param inputPath
+ * The path to the raster. It is a UTF8String.
+ * @param arg1
+ * The first argument.
+ * @return
+ * The result of the expression.
+ */
+ override def nullSafeEval(inputPath: Any, arg1: Any): Any = {
+ val path = inputPath.asInstanceOf[UTF8String].toString
+ val raster = rasterAPI.raster(path)
+ val result = rasterTransform(raster, arg1)
+ raster.cleanUp()
+ result
+ }
+ override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 2, expressionConfig)
+ override def withNewChildrenInternal(
+ newFirst: Expression,
+ newArg1: Expression
+ ): Expression = makeCopy(Array(newFirst, newArg1))
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster2ArgExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster2ArgExpression.scala
new file mode 100644
index 000000000..5aa03ed07
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster2ArgExpression.scala
@@ -0,0 +1,106 @@
+package com.databricks.labs.mosaic.core.expressions.raster
+import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig}
+import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI}
+import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant, TernaryExpression}
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.unsafe.types.UTF8String
+import scala.reflect.ClassTag
+import scala.util.Try
+ * Base class for all raster expressions that take two arguments. It provides
+ * the boilerplate code needed to create a function builder for a given
+ * expression. It minimises amount of code needed to create a new expression.
+ *
+ * @param pathExpr
+ * The expression for the raster path.
+ * @param arg1Expr
+ * The expression for the first argument.
+ * @param arg2Expr
+ * The expression for the second argument.
+ * @param outputType
+ * The output type of the result.
+ * @param expressionConfig
+ * Additional arguments for the expression (expressionConfigs).
+ * @tparam T
+ * The type of the extending class.
+ */
+abstract class Raster2ArgExpression[T <: Expression : ClassTag](
+ pathExpr: Expression,
+ arg1Expr: Expression,
+ arg2Expr: Expression,
+ outputType: DataType,
+ expressionConfig: MosaicExpressionConfig
+ ) extends TernaryExpression
+ with NullIntolerant
+ with Serializable {
+ /**
+ * The raster API to be used. Enable the raster so that subclasses dont
+ * need to worry about this.
+ */
+ protected val rasterAPI: RasterAPI = expressionConfig.getRasterAPI()
+ Try {
+ rasterAPI.enable()
+ }
+ override def first: Expression = pathExpr
+ override def second: Expression = arg1Expr
+ override def third: Expression = arg2Expr
+ /** Output Data Type */
+ override def dataType: DataType = outputType
+ /**
+ * The function to be overriden by the extending class. It is called when
+ * the expression is evaluated. It provides the raster and the arguments to
+ * the expression. It abstracts spark serialization from the caller.
+ *
+ * @param raster
+ * The raster to be used.
+ * @param arg1
+ * The first argument.
+ * @param arg2
+ * The second argument.
+ * @return
+ * A result of the expression.
+ */
+ def rasterTransform(raster: MosaicRaster, arg1: Any, arg2: Any): Any
+ /**
+ * Evaluation of the expression. It evaluates the raster path and the loads
+ * the raster from the path. It handles the clean up of the raster before
+ * returning the results.
+ *
+ * @param inputPath
+ * The path to the raster. It is a UTF8String.
+ * @param arg1
+ * The first argument.
+ * @param arg2
+ * The second argument.
+ * @return
+ * The result of the expression.
+ */
+ override def nullSafeEval(inputPath: Any, arg1: Any, arg2: Any): Any = {
+ val path = inputPath.asInstanceOf[UTF8String].toString
+ val raster = rasterAPI.raster(path)
+ val result = rasterTransform(raster, arg1, arg2)
+ raster.cleanUp()
+ result
+ }
+ override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 3, expressionConfig)
+ override def withNewChildrenInternal(
+ newFirst: Expression,
+ newArg1: Expression,
+ newArg2: Expression
+ ): Expression = makeCopy(Array(newFirst, newArg1, newArg2))
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterBandExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterBandExpression.scala
new file mode 100644
index 000000000..432012a4a
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterBandExpression.scala
@@ -0,0 +1,99 @@
+package com.databricks.labs.mosaic.core.expressions.raster
+import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig}
+import com.databricks.labs.mosaic.core.raster.{MosaicRaster, MosaicRasterBand, RasterAPI}
+import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression, NullIntolerant}
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.unsafe.types.UTF8String
+import scala.reflect.ClassTag
+import scala.util.Try
+ * Base class for all raster band expressions that take no arguments. It
+ * provides the boilerplate code needed to create a function builder for a
+ * given expression. It minimises amount of code needed to create a new
+ * expression.
+ *
+ * @param pathExpr
+ * The expression for the raster path.
+ * @param bandExpr
+ * The expression for the band index.
+ * @param outputType
+ * The output type of the result.
+ * @param expressionConfig
+ * Additional arguments for the expression (expressionConfigs).
+ * @tparam T
+ * The type of the extending class.
+ */
+abstract class RasterBandExpression[T <: Expression : ClassTag](
+ pathExpr: Expression,
+ bandExpr: Expression,
+ outputType: DataType,
+ expressionConfig: MosaicExpressionConfig
+ ) extends BinaryExpression
+ with NullIntolerant
+ with Serializable {
+ /**
+ * The raster API to be used. Enable the raster so that subclasses dont
+ * need to worry about this.
+ */
+ protected val rasterAPI: RasterAPI = expressionConfig.getRasterAPI()
+ Try {
+ rasterAPI.enable()
+ }
+ override def left: Expression = pathExpr
+ override def right: Expression = bandExpr
+ /** Output Data Type */
+ override def dataType: DataType = outputType
+ /**
+ * The function to be overriden by the extending class. It is called when
+ * the expression is evaluated. It provides the raster band to the
+ * expression. It abstracts spark serialization from the caller.
+ *
+ * @param raster
+ * The raster to be used.
+ * @param band
+ * The band to be used.
+ * @return
+ * The result of the expression.
+ */
+ def bandTransform(raster: MosaicRaster, band: MosaicRasterBand): Any
+ /**
+ * Evaluation of the expression. It evaluates the raster path and the loads
+ * the raster from the path. It evaluates the band index and loads the
+ * specified band. It handles the clean up of the raster before returning
+ * the results.
+ *
+ * @param inputPath
+ * The path to the raster. It is a UTF8String.
+ * @param inputBand
+ * The band index to be used. It is an Int.
+ * @return
+ * The result of the expression.
+ */
+ override def nullSafeEval(inputPath: Any, inputBand: Any): Any = {
+ val path = inputPath.asInstanceOf[UTF8String].toString
+ val bandIndex = inputBand.asInstanceOf[Int]
+ val raster = rasterAPI.raster(path)
+ val band = raster.getBand(bandIndex)
+ val result = bandTransform(raster, band)
+ raster.cleanUp()
+ result
+ }
+ override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 2, expressionConfig)
+ override def withNewChildrenInternal(newFirst: Expression, newSecond: Expression): Expression =
+ makeCopy(Array[AnyRef](newFirst, newSecond))
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterExpression.scala
new file mode 100644
index 000000000..edd1597f5
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterExpression.scala
@@ -0,0 +1,84 @@
+package com.databricks.labs.mosaic.core.expressions.raster
+import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig}
+import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI}
+import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant, UnaryExpression}
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.unsafe.types.UTF8String
+import scala.reflect.ClassTag
+import scala.util.Try
+ * Base class for all raster expressions that take no arguments. It provides
+ * the boilerplate code needed to create a function builder for a given
+ * expression. It minimises amount of code needed to create a new expression.
+ *
+ * @param pathExpr
+ * The expression for the raster path.
+ * @param outputType
+ * The output type of the result.
+ * @param expressionConfig
+ * Additional arguments for the expression (expressionConfigs).
+ * @tparam T
+ * The type of the extending class.
+ */
+abstract class RasterExpression[T <: Expression : ClassTag](
+ pathExpr: Expression,
+ outputType: DataType,
+ expressionConfig: MosaicExpressionConfig
+ ) extends UnaryExpression
+ with NullIntolerant
+ with Serializable {
+ /**
+ * The raster API to be used. Enable the raster so that subclasses dont
+ * need to worry about this.
+ */
+ protected val rasterAPI: RasterAPI = expressionConfig.getRasterAPI()
+ Try {
+ rasterAPI.enable()
+ }
+ override def child: Expression = pathExpr
+ /** Output Data Type */
+ override def dataType: DataType = outputType
+ /**
+ * The function to be overriden by the extending class. It is called when
+ * the expression is evaluated. It provides the raster to the expression.
+ * It abstracts spark serialization from the caller.
+ *
+ * @param raster
+ * The raster to be used.
+ * @return
+ * The result of the expression.
+ */
+ def rasterTransform(raster: MosaicRaster): Any
+ /**
+ * Evaluation of the expression. It evaluates the raster path and the loads
+ * the raster from the path. It handles the clean up of the raster before
+ * returning the results.
+ *
+ * @param inputPath
+ * The path to the raster. It is a UTF8String.
+ * @return
+ * The result of the expression.
+ */
+ override def nullSafeEval(inputPath: Any): Any = {
+ val path = inputPath.asInstanceOf[UTF8String].toString
+ val raster = rasterAPI.raster(path)
+ val result = rasterTransform(raster)
+ raster.cleanUp()
+ result
+ }
+ override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 1, expressionConfig)
+ override def withNewChildInternal(newFirst: Expression): Expression = makeCopy(Array(newFirst))
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterGeneratorExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterGeneratorExpression.scala
new file mode 100644
index 000000000..e09afb19f
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterGeneratorExpression.scala
@@ -0,0 +1,90 @@
+package com.databricks.labs.mosaic.core.expressions.raster
+import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig}
+import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{CollectionGenerator, Expression, NullIntolerant}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+import scala.reflect.ClassTag
+import scala.util.Try
+ * Base class for all raster generator expressions that take no arguments. It
+ * provides the boilerplate code needed to create a function builder for a
+ * given expression. It minimises amount of code needed to create a new
+ * expression. These expressions are used to generate a collection of new
+ * rasters based on the input raster. The new rasters are written in the
+ * checkpoint directory. The files are written as GeoTiffs. Subdatasets are not
+ * supported, please flatten beforehand.
+ *
+ * @param inPathExpr
+ * The expression for the raster path.
+ * @param expressionConfig
+ * Additional arguments for the expression (expressionConfigs).
+ * @tparam T
+ * The type of the extending class.
+ */
+abstract class RasterGeneratorExpression[T <: Expression : ClassTag](
+ inPathExpr: Expression,
+ expressionConfig: MosaicExpressionConfig
+ ) extends CollectionGenerator
+ with NullIntolerant
+ with Serializable {
+ val uuid: String = java.util.UUID.randomUUID().toString.replace("-", "_")
+ /**
+ * The raster API to be used. Enable the raster so that subclasses dont
+ * need to worry about this.
+ */
+ protected val rasterAPI: RasterAPI = expressionConfig.getRasterAPI()
+ Try {
+ rasterAPI.enable()
+ }
+ override def position: Boolean = false
+ override def inline: Boolean = false
+ /**
+ * Generators expressions require an abstraction for element type. Always
+ * needs to be wrapped in a StructType. The actually type is that of the
+ * structs element.
+ */
+ override def elementSchema: StructType = StructType(Array(StructField("path", StringType)))
+ /**
+ * The function to be overriden by the extending class. It is called when
+ * the expression is evaluated. It provides the raster band to the
+ * expression. It abstracts spark serialization from the caller.
+ *
+ * @param raster
+ * The raster to be used.
+ * @return
+ * Sequence of subrasters = (id, reference to the input raster, extent of
+ * the output raster, unified mask for all bands).
+ */
+ def rasterGenerator(raster: MosaicRaster): Seq[(Long, (Int, Int, Int, Int))]
+ override def eval(input: InternalRow): TraversableOnce[InternalRow] = {
+ val inPath = inPathExpr.eval(input).asInstanceOf[UTF8String].toString
+ val checkpointPath = expressionConfig.getRasterCheckpoint
+ val raster = rasterAPI.raster(inPath)
+ val result = rasterGenerator(raster)
+ for ((id, extent) <- result) yield {
+ val outPath = raster.saveCheckpoint(uuid, id, extent, checkpointPath)
+ InternalRow.fromSeq(Seq(UTF8String.fromString(outPath)))
+ }
+ }
+ override def makeCopy(newArgs: Array[AnyRef]): Expression =
+ GenericExpressionFactory.makeCopyImpl[T](this, newArgs, children.length, expressionConfig)
+ override def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = makeCopy(newChildren.toArray)
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterToGridExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterToGridExpression.scala
new file mode 100644
index 000000000..54045e977
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterToGridExpression.scala
@@ -0,0 +1,119 @@
+package com.databricks.labs.mosaic.core.expressions.raster
+import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import com.databricks.labs.mosaic.core.raster.{MosaicRaster, MosaicRasterBand}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant}
+import org.apache.spark.sql.catalyst.util.ArrayData
+import org.apache.spark.sql.types.DataType
+import scala.reflect.ClassTag
+ * Base class for all raster to grid expressions that take no arguments. It
+ * provides the boilerplate code needed to create a function builder for a
+ * given expression. It minimises amount of code needed to create a new
+ * expression. These expressions project rasters to grid index system of
+ * Mosaic. All cells are projected to spatial coordinates and then to grid
+ * index system. The pixels are grouped by cell ids and then combined to form a
+ * grid -> value/measure collection per band of the raster.
+ *
+ * @param pathExpr
+ * The expression for the raster path.
+ * @param measureType
+ * The output type of the result.
+ * @param expressionConfig
+ * Additional arguments for the expression (expressionConfigs).
+ * @tparam T
+ * The type of the extending class.
+ */
+abstract class RasterToGridExpression[T <: Expression : ClassTag, P](
+ pathExpr: Expression,
+ resolution: Expression,
+ measureType: DataType,
+ expressionConfig: MosaicExpressionConfig
+ ) extends Raster1ArgExpression[T](pathExpr, resolution, RasterToGridType(expressionConfig.getCellIdType, measureType), expressionConfig)
+ with NullIntolerant
+ with Serializable {
+ /** The index system to be used. */
+ val indexSystem: IndexSystem = expressionConfig.getIndexSystem()
+ val geometryAPI: GeometryAPI = expressionConfig.getGeometryAPI()
+ /**
+ * It projects the pixels to the grid and groups by the results so that the
+ * result is a Sequence of (cellId, measure) of each band of the raster. It
+ * applies the values combiner on the measures of each cell. For no
+ * combine, use the identity function.
+ *
+ * @param raster
+ * The raster to be used.
+ * @return
+ * Sequence of (cellId, measure) of each band of the raster.
+ */
+ override def rasterTransform(raster: MosaicRaster, arg1: Any): Any = {
+ val gt = raster.getGeoTransform
+ val resolution = arg1.asInstanceOf[Int]
+ val bandTransform = bandTransformer(_, resolution, gt)
+ val transformed = raster.transformBands(bandTransform)
+ serialize(transformed)
+ }
+ /**
+ * The method to be overriden to specify how the pixel values are combined
+ * within a cell.
+ *
+ * @param values
+ * The values to be combined.
+ * @return
+ * The combined value/values.
+ */
+ def valuesCombiner(values: Seq[Double]): P
+ //noinspection ZeroIndexToHead
+ protected def pixelTransformer(gt: Seq[Double], resolution: Int)(x: Int, y: Int, value: Double): (Long, Double) = {
+ val offset = 0.5 // This centers the point to the pixel centroid
+ val xOffset = offset + x
+ val yOffset = offset + y
+ val xGeo = gt(0) + xOffset * gt(1) + yOffset * gt(2)
+ val yGeo = gt(3) + xOffset * gt(4) + yOffset * gt(5)
+ val cellID = indexSystem.pointToIndex(xGeo, yGeo, resolution)
+ (cellID, value)
+ }
+ protected def bandTransformer(band: MosaicRasterBand, resolution: Int, gt: Seq[Double]): Map[Long, P] = {
+ val results = band.transformValues[(Long, Double)](pixelTransformer(gt, resolution), (0L, -1.0))
+ results
+ // Filter out default cells. We don't want to return them since they are masked in original raster.
+ // We use 0L as a dummy cell ID for default cells.
+ .map(row => row.filter(_._1 != 0L))
+ .filterNot(_.isEmpty)
+ .flatten
+ .groupBy(_._1) // Group by cell ID.
+ .mapValues(values => valuesCombiner(values.map(_._2))) // Apply combiner that is overridden in subclasses.
+ }
+ /**
+ * Serializes the result of the raster transform to the desired output
+ * type.
+ *
+ * @param cellsWithMeasure
+ * The result of the raster transform to be serialized to spark internal
+ * types.
+ * @return
+ * The serialized result.
+ */
+ private def serialize(cellsWithMeasure: Traversable[Traversable[(Any, P)]]) = {
+ val serialized = ArrayData.toArrayData(
+ cellsWithMeasure.map(result =>
+ ArrayData.toArrayData(
+ result.map { case (cellID, value) => InternalRow.fromSeq(Seq(indexSystem.serializeCellId(cellID), value)) }
+ )
+ )
+ )
+ serialized
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/raster.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/raster.scala
new file mode 100644
index 000000000..1ea9452f9
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/raster.scala
@@ -0,0 +1,67 @@
+package com.databricks.labs.mosaic.core.expressions
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapBuilder, ArrayBasedMapData, ArrayData}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+/** Utility methods for raster expressions. */
+package object raster {
+ /** Datatype representing pixels in a raster. */
+ val PixelCoordsType: DataType = StructType(Seq(StructField("x", IntegerType), StructField("y", IntegerType)))
+ /** Datatype representing pixels in a raster. */
+ val WorldCoordsType: DataType = StructType(Seq(StructField("x", DoubleType), StructField("y", DoubleType)))
+ /**
+ * Datatype representing a raster projected to a grid.
+ * @param cellIDType
+ * The cell ID type of the index system.
+ * @param measureType
+ * The measure type of the resulting pixel value.
+ *
+ * @return
+ * The datatype to be used for serialization of the result of
+ * [[RasterToGridExpression]].
+ */
+ def RasterToGridType(cellIDType: DataType, measureType: DataType): DataType = {
+ ArrayType(
+ ArrayType(
+ StructType(
+ Seq(StructField("cellID", cellIDType), StructField("measure", measureType))
+ )
+ )
+ )
+ }
+ /**
+ * Builds a spark map from a scala Map[String, String].
+ * @param metaData
+ * The metadata to be used.
+ * @return
+ * Serialized map.
+ */
+ def buildMapString(metaData: Map[String, String]): ArrayBasedMapData = {
+ val keys = ArrayData.toArrayData(metaData.keys.toArray[String].map(UTF8String.fromString))
+ val values = ArrayData.toArrayData(metaData.values.toArray[String].map(UTF8String.fromString))
+ val mapBuilder = new ArrayBasedMapBuilder(StringType, StringType)
+ mapBuilder.putAll(keys, values)
+ mapBuilder.build()
+ }
+ /**
+ * Builds a spark map from a scala Map[String, Double].
+ * @param metaData
+ * The metadata to be used.
+ * @return
+ * Serialized map.
+ */
+ def buildMapDouble(metaData: Map[String, Double]): ArrayBasedMapData = {
+ val keys = ArrayData.toArrayData(metaData.keys.toArray[String].map(UTF8String.fromString))
+ val values = ArrayData.toArrayData(metaData.values.toArray[Double])
+ val mapBuilder = new ArrayBasedMapBuilder(StringType, DoubleType)
+ mapBuilder.putAll(keys, values)
+ mapBuilder.build()
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometry.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometry.scala
new file mode 100644
index 000000000..056da865a
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometry.scala
@@ -0,0 +1,327 @@
+package com.databricks.labs.mosaic.core.geometry
+import com.databricks.labs.mosaic.core.crs.CRSBoundsProvider
+import com.databricks.labs.mosaic.core.geometry.api.GeometryWriter
+import java.util.Locale
+ * A trait that defines supported operations on geometries.
+ * Additional methods can be available in specific geometry types.
+ * Every geometry framework should implement this trait.
+ */
+trait MosaicGeometry extends GeometryWriter with Serializable {
+ /**
+ * @return Returns the number of geometries in this geometry.
+ */
+ def getNumGeometries: Int
+ /**
+ * Returns the shells of this geometry as a sequence of sequences of points.
+ * Each sequence of points represents a shell.
+ *
+ * @return Returns the shells of this geometry as a sequence of sequences of points.
+ */
+ def getShellPoints: Seq[Seq[MosaicPoint]]
+ /**
+ * Returns the holes of this geometry as a sequence of sequences of sequences of points.
+ * Each sequence of points represents a hole.
+ * Each sequence of holes is related to a single shell.
+ * If the geometry has 3 shells, 2 holes in first shell, 0 holes in second shell and
+ * 1 hole in third shell, the returned sequence will be as follows:
+ * Seq( Seq( Seq( hole1, hole2 ) ), Seq( Seq() ), Seq( Seq( hole3 ) ) )
+ * where hole1, hole2 and hole3 are sequences of points.
+ *
+ * @return Returns the holes of this geometry as a sequence of sequences of sequences of points.
+ */
+ def getHolePoints: Seq[Seq[Seq[MosaicPoint]]]
+ /**
+ * @return Returns the number of points in this geometry.
+ */
+ def numPoints: Int
+ /**
+ * Translates this geometry by given x and y distances.
+ *
+ * @param xd x distance
+ * @param yd y distance
+ * @return Returns the translated geometry.
+ */
+ def translate(xd: Double, yd: Double): MosaicGeometry
+ /**
+ * Scales this geometry by given x and y distances.
+ *
+ * @param xd x distance
+ * @param yd y distance
+ * @return Returns the scaled geometry.
+ */
+ def scale(xd: Double, yd: Double): MosaicGeometry
+ /**
+ * Rotates this geometry by given angle.
+ *
+ * @param td angle in degrees
+ * @return Returns the rotated geometry.
+ */
+ def rotate(td: Double): MosaicGeometry
+ /**
+ * @return Returns the length of this geometry.
+ */
+ def getLength: Double
+ /**
+ * @param geom2 geometry to calculate distance to.
+ * @return Returns the distance between this geometry and given geometry.
+ */
+ def distance(geom2: MosaicGeometry): Double
+ /**
+ * @param geom geometry to calculate difference to.
+ * @return Returns the difference between this geometry and given geometry.
+ */
+ def difference(geom: MosaicGeometry): MosaicGeometry
+ /**
+ * @return Returns the validity of this geometry.
+ */
+ def isValid: Boolean
+ /**
+ * @return Returns the geometry type of this geometry.
+ */
+ def getGeometryType: String
+ /**
+ * @return Returns the area of this geometry.
+ */
+ def getArea: Double
+ /**
+ * @return Returns the centroid of this geometry.
+ */
+ def getCentroid: MosaicPoint
+ /**
+ * @return Returns the flag indicating if this geometry is empty.
+ */
+ def isEmpty: Boolean
+ /**
+ * @return Returns the boundary of this geometry. The boundary is also a geometry.
+ */
+ def getBoundary: MosaicGeometry
+ /**
+ * @return Returns shells of the geometry as a sequence of LineStrings. Each LineString is a shell.
+ */
+ def getShells: Seq[MosaicLineString]
+ /**
+ * Returns holes of the geometry as a sequence of sequences of LineStrings. Each LineString is a hole.
+ * Each inner sequence corresponds to a single shell.
+ *
+ * @return Returns holes of the geometry as a sequence of sequences of LineStrings.
+ */
+ def getHoles: Seq[Seq[MosaicLineString]]
+ /**
+ * Applies given function to each point of this geometry.
+ *
+ * @param f function to apply
+ * @return Returns the geometry with points transformed by given function.
+ */
+ def mapXY(f: (Double, Double) => (Double, Double)): MosaicGeometry
+ /**
+ * @return Returns the boundary of this geometry.
+ */
+ def boundary: MosaicGeometry
+ /**
+ * Buffer this geometry by provided distance.
+ *
+ * @param distance distance to buffer
+ * @return Returns the buffer of this geometry.
+ */
+ def buffer(distance: Double): MosaicGeometry
+ /**
+ * Simplifies this geometry with given tolerance.
+ *
+ * @param tolerance tolerance to use
+ * @return Returns the simplified geometry.
+ */
+ def simplify(tolerance: Double): MosaicGeometry
+ /**
+ * Computes intersection of this geometry with given geometry.
+ *
+ * @param other geometry to intersect with
+ * @return Returns the intersection of this geometry with given geometry.
+ */
+ def intersection(other: MosaicGeometry): MosaicGeometry
+ /**
+ * Computes the intersects flag of this geometry with given geometry.
+ *
+ * @param other geometry to union with
+ * @return Returns the intersects flag of this geometry with given geometry.
+ */
+ def intersects(other: MosaicGeometry): Boolean
+ /**
+ * @return Returns the envelope of this geometry.
+ */
+ def envelope: MosaicGeometry
+ /**
+ * Computes union of this geometry with given geometry.
+ *
+ * @param other geometry to union with
+ * @return Returns the union of this geometry with given geometry.
+ */
+ def union(other: MosaicGeometry): MosaicGeometry
+ /**
+ * @return Returns the unary union of this geometry.
+ */
+ def unaryUnion: MosaicGeometry
+ /**
+ * Computes the contains flag of this geometry with given geometry.
+ *
+ * @param other geometry to union with
+ * @return Returns the contains flag of this geometry with given geometry.
+ */
+ def contains(other: MosaicGeometry): Boolean
+ /**
+ * Flattens this geometry into a collection of geometries.
+ *
+ * @return Returns the flattened geometry sequence.
+ */
+ def flatten: Seq[MosaicGeometry]
+ /**
+ * @return Returns the equality flag of this geometry with given geometry.
+ */
+ def equals(other: MosaicGeometry): Boolean
+ /**
+ * @return Returns the equality flag of this geometry with given geometry.
+ */
+ def equals(other: java.lang.Object): Boolean
+ /**
+ * @return Returns the equality flag of this geometry with given geometry.
+ */
+ def equalsTopo(other: MosaicGeometry): Boolean
+ /**
+ * @return Returns the hash code of this geometry.
+ */
+ def hashCode: Int
+ /**
+ * @return Returns the convex hull of this geometry.
+ */
+ def convexHull: MosaicGeometry
+ /**
+ * Computes MIN or MAX coordinate of this geometry.
+ * The coordinate is selected by given dimension.
+ * The function is selected by given func.
+ *
+ * @param dimension dimension to select coordinate from
+ * (X, Y or Z)
+ * (case insensitive)
+ * @param func function to select coordinate by
+ * (MIN or MAX)
+ * @return Returns the MIN or MAX coordinate of this geometry.
+ */
+ def minMaxCoord(dimension: String, func: String): Double = {
+ val coordArray = this.getShellPoints.map(shell => {
+ val unitArray = dimension.toUpperCase(Locale.ROOT) match {
+ case "X" => shell.map(_.getX)
+ case "Y" => shell.map(_.getY)
+ case "Z" => shell.map(_.getZ)
+ }
+ func.toUpperCase(Locale.ROOT) match {
+ case "MIN" => unitArray.min
+ case "MAX" => unitArray.max
+ }
+ })
+ func.toUpperCase(Locale.ROOT) match {
+ case "MIN" => coordArray.min
+ case "MAX" => coordArray.max
+ }
+ }
+ /**
+ * Transforms this geometry to given CRS.
+ *
+ * @param sridTo target CRS
+ * @return Returns the transformed geometry.
+ */
+ def transformCRSXY(sridTo: Int): MosaicGeometry
+ /**
+ * Transforms this geometry from given CRS to given CRS.
+ *
+ * @param sridTo target CRS
+ * @param sridFrom source CRS
+ * @return Returns the transformed geometry.
+ */
+ def transformCRSXY(sridTo: Int, sridFrom: Int): MosaicGeometry = {
+ transformCRSXY(sridTo, Some(sridFrom))
+ }
+ /**
+ * Transforms this geometry from given CRS to given CRS.
+ *
+ * @param sridTo target CRS
+ * @param sridFrom source CRS
+ * @return Returns the transformed geometry.
+ */
+ def transformCRSXY(sridTo: Int, sridFrom: Option[Int]): MosaicGeometry
+ /**
+ * @return Returns the spatial reference of this geometry.
+ */
+ def getSpatialReference: Int
+ /**
+ * Sets the spatial reference of this geometry.
+ *
+ * @param srid spatial reference to set
+ */
+ def setSpatialReference(srid: Int): Unit
+ /**
+ * Checks if this geometry has all valid coordinates in given CRS.
+ *
+ * @param crsBoundsProvider CRS bounds provider
+ * (to get bounds of given CRS)
+ * @param crsCode CRS code to check coordinates in (e.g. EPSG:4326)
+ * @param which which bounds to check (bounds or reprojected_bounds)
+ * @return Returns the geometry type of this geometry.
+ */
+ def hasValidCoords(crsBoundsProvider: CRSBoundsProvider, crsCode: String, which: String): Boolean = {
+ val crsCodeIn = crsCode.split(":")
+ val crsBounds = which.toLowerCase(Locale.ROOT) match {
+ case "bounds" => crsBoundsProvider.bounds(crsCodeIn(0), crsCodeIn(1).toInt)
+ case "reprojected_bounds" => crsBoundsProvider.reprojectedBounds(crsCodeIn(0), crsCodeIn(1).toInt)
+ case _ => throw new Error("Only boundary and reprojected_boundary supported for which argument.")
+ }
+ (Seq(getShellPoints) ++ getHolePoints).flatten.flatten.forall(point =>
+ crsBounds.getLowerX <= point.getX && point.getX <= crsBounds.getUpperX &&
+ crsBounds.getLowerY <= point.getY && point.getY <= crsBounds.getUpperY
+ )
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryCollection.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryCollection.scala
new file mode 100644
index 000000000..58f905a97
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryCollection.scala
@@ -0,0 +1,17 @@
+package com.databricks.labs.mosaic.core.geometry
+ * A trait that adds GeometryCollection functionality to MosaicGeometry.
+ */
+//noinspection DuplicatedCode
+trait MosaicGeometryCollection extends MosaicGeometry {
+ def asSeq: Seq[MosaicGeometry]
+ override def flatten: Seq[MosaicGeometry]
+ override def getShellPoints: Seq[Seq[MosaicPoint]]
+ override def getHolePoints: Seq[Seq[Seq[MosaicPoint]]]
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicLineString.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicLineString.scala
new file mode 100644
index 000000000..be93bac8e
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicLineString.scala
@@ -0,0 +1,18 @@
+package com.databricks.labs.mosaic.core.geometry
+ * A trait that adds LineString functionality to MosaicGeometry.
+ */
+trait MosaicLineString extends MosaicGeometry {
+ def asSeq: Seq[MosaicPoint]
+ override def getHoles: Seq[Seq[MosaicLineString]]
+ override def getShells: Seq[MosaicLineString]
+ override def flatten: Seq[MosaicGeometry]
+ override def getHolePoints: Seq[Seq[Seq[MosaicPoint]]]
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiLineString.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiLineString.scala
new file mode 100644
index 000000000..cc1d80ea8
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiLineString.scala
@@ -0,0 +1,18 @@
+package com.databricks.labs.mosaic.core.geometry
+ * A trait that adds MultiLineString functionality to MosaicGeometry.
+ */
+trait MosaicMultiLineString extends MosaicGeometry {
+ def asSeq: Seq[MosaicLineString]
+ override def getHolePoints: Seq[Seq[Seq[MosaicPoint]]]
+ override def getShellPoints: Seq[Seq[MosaicPoint]]
+ override def getHoles: Seq[Seq[MosaicLineString]]
+ override def flatten: Seq[MosaicGeometry]
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiPoint.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiPoint.scala
new file mode 100644
index 000000000..1334a3b60
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiPoint.scala
@@ -0,0 +1,20 @@
+package com.databricks.labs.mosaic.core.geometry
+ * A trait that adds MultiPoint functionality to MosaicGeometry.
+ */
+trait MosaicMultiPoint extends MosaicGeometry {
+ def asSeq: Seq[MosaicPoint]
+ override def getHoles: Seq[Seq[MosaicLineString]]
+ override def flatten: Seq[MosaicGeometry]
+ override def getHolePoints: Seq[Seq[Seq[MosaicPoint]]]
+ override def getShellPoints: Seq[Seq[MosaicPoint]]
+ override def getShells: Seq[MosaicLineString]
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiPolygon.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiPolygon.scala
new file mode 100644
index 000000000..d2f294334
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiPolygon.scala
@@ -0,0 +1,17 @@
+package com.databricks.labs.mosaic.core.geometry
+ * A trait that adds MultiPolygon functionality to MosaicGeometry.
+ */
+//noinspection DuplicatedCode
+trait MosaicMultiPolygon extends MosaicGeometry {
+ def asSeq: Seq[MosaicGeometry]
+ override def flatten: Seq[MosaicGeometry]
+ override def getShellPoints: Seq[Seq[MosaicPoint]]
+ override def getHolePoints: Seq[Seq[Seq[MosaicPoint]]]
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicPoint.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicPoint.scala
new file mode 100644
index 000000000..39af7b84d
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicPoint.scala
@@ -0,0 +1,30 @@
+package com.databricks.labs.mosaic.core.geometry
+import com.databricks.labs.mosaic.core.types.Coordinates
+ * A trait that adds Point functionality to MosaicGeometry.
+ */
+trait MosaicPoint extends MosaicGeometry {
+ def getX: Double
+ def getY: Double
+ def getZ: Double
+ def geoCoord: Coordinates
+ def asSeq: Seq[Double]
+ override def flatten: Seq[MosaicGeometry]
+ override def getShellPoints: Seq[Seq[MosaicPoint]]
+ override def getHolePoints: Seq[Seq[Seq[MosaicPoint]]]
+ override def getShells: Seq[MosaicLineString]
+ override def getHoles: Seq[Seq[MosaicLineString]]
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicPolygon.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicPolygon.scala
new file mode 100644
index 000000000..5f8cdeccd
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicPolygon.scala
@@ -0,0 +1,16 @@
+package com.databricks.labs.mosaic.core.geometry
+ * A trait that adds Polygon functionality to MosaicGeometry.
+ */
+trait MosaicPolygon extends MosaicGeometry {
+ override def getHolePoints: Seq[Seq[Seq[MosaicPoint]]]
+ override def flatten: Seq[MosaicGeometry]
+ override def getShellPoints: Seq[Seq[MosaicPoint]]
+ def asSeq: Seq[MosaicLineString]
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPI.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPI.scala
new file mode 100644
index 000000000..a90a8c6df
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPI.scala
@@ -0,0 +1,140 @@
+package com.databricks.labs.mosaic.core.geometry.api
+import com.databricks.labs.mosaic.core.codegen.format.GeometryIOCodeGen
+import com.databricks.labs.mosaic.core.geometry._
+import com.databricks.labs.mosaic.core.types._
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+ * An abstract class that defines the API for the geometry frameworks.
+ * In order to integrate a new geometry framework, this class must be extended.
+ * The fully qualified name of the class must be added to the META-INF/services/com.databricks.labs.mosaic.core.geometry.api.GeometryAPI file.
+ * This is where [[com.databricks.labs.mosaic.core.GenericServiceFactory.GeometryAPIFactory]] will look for the available geometry frameworks.
+ *
+ * @param reader An instance of [[GeometryReader]].
+ */
+abstract class GeometryAPI(
+ reader: GeometryReader
+ ) extends Serializable {
+ def name: String
+ /**
+ * Constructs an instance of [[MosaicPoint]] based on a collection of
+ * [[Coordinates]].
+ *
+ * @param points An instance of [[Coordinates]].
+ * @param geomType The geometry type.
+ * @return An instance of [[MosaicPoint]].
+ */
+ def pointsToGeometry(points: Seq[MosaicPoint], geomType: GeometryTypeEnum.Value): MosaicGeometry = reader.fromSeq(points, geomType)
+ /**
+ * Constructs an instance of [[MosaicGeometry]] based on an instance of
+ * spark internal data.
+ *
+ * @param inputData
+ * An instance of [[InternalRow]].
+ * @param dataType
+ * A data type of the geometry.
+ * @return
+ * An instance of [[MosaicGeometry]].
+ */
+ def rowToGeometry(inputData: InternalRow, dataType: DataType): MosaicGeometry = {
+ dataType match {
+ case _: BinaryType => reader.fromWKB(inputData.getBinary(0))
+ case _: StringType => reader.fromWKT(inputData.getString(0))
+ case _: HexType => reader.fromHEX(inputData.get(0, HexType).asInstanceOf[InternalRow].getString(0))
+ case _: GeoJSONType => reader.fromJSON(inputData.get(0, GeoJSONType).asInstanceOf[InternalRow].getString(0))
+ case _ => throw new Error(s"$dataType not supported.")
+ }
+ }
+ /**
+ * Constructs an instance of [[MosaicGeometry]] based on Any instance
+ * coming from spark nullSafeEval method.
+ *
+ * @param inputData
+ * An instance of [[InternalRow]].
+ * @param dataType
+ * A data type of the geometry.
+ * @return
+ * An instance of [[MosaicGeometry]].
+ */
+ def valueToGeometry(inputData: Any, dataType: DataType): MosaicGeometry = {
+ dataType match {
+ case _: BinaryType => reader.fromWKB(inputData.asInstanceOf[Array[Byte]])
+ case _: StringType => reader.fromWKT(inputData.asInstanceOf[UTF8String].toString)
+ case _: HexType => reader.fromHEX(inputData.asInstanceOf[InternalRow].getString(0))
+ case _: GeoJSONType => reader.fromJSON(inputData.asInstanceOf[InternalRow].getString(0))
+ case _ => throw new Error(s"$dataType not supported.")
+ }
+ }
+ /**
+ * Serializes an instance of [[MosaicGeometry]] to a spark internal data.
+ * The format is selected based on the data type.
+ *
+ * @param geometry An instance of [[MosaicGeometry]].
+ * @param dataType A data type representing the format.
+ * @return A spark internal data.
+ */
+ def serialize(geometry: MosaicGeometry, dataType: DataType): Any = {
+ dataType match {
+ case _: BinaryType => geometry.toWKB
+ case _: StringType => UTF8String.fromString(geometry.toWKT)
+ case _: HexType => InternalRow.fromSeq(Seq(UTF8String.fromString(geometry.toHEX)))
+ case _: GeoJSONType => InternalRow.fromSeq(Seq(UTF8String.fromString(geometry.toJSON)))
+ case _ => throw new Error(s"$dataType not supported.")
+ }
+ }
+ /**
+ * Constructs an instance of [[MosaicGeometry]] based on a collection of [[Coordinates]].
+ *
+ * @param point An instance of [[Coordinates]].
+ * @return An instance of [[MosaicGeometry]].
+ */
+ def fromGeoCoord(point: Coordinates): MosaicPoint
+ /**
+ * Constructs an instance of [[MosaicGeometry]] based on a collection of [[Double]].
+ *
+ * @param coords A collection of [[Double]].
+ * @return An instance of [[MosaicGeometry]].
+ */
+ def fromCoords(coords: Seq[Double]): MosaicPoint
+ /**
+ * Accessor for the [[GeometryIOCodeGen]].
+ *
+ * @return An instance of [[GeometryIOCodeGen]].
+ */
+ def ioCodeGen: GeometryIOCodeGen
+ /**
+ * Generates a try catch block around the code if required by the geometry framework.
+ * Not all geometry frameworks require this, so it is up to the implementation to decide.
+ *
+ * @param code The code to wrap.
+ * @return The wrapped code.
+ */
+ def codeGenTryWrap(code: String): String
+ /**
+ * The fully qualified class name of the geometry.
+ *
+ * @return The class name.
+ */
+ def geometryClass: String
+ /**
+ * The fully qualified class name of the mosaic geometry.
+ *
+ * @return The class name.
+ */
+ def mosaicGeometryClass: String
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryReader.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryReader.scala
new file mode 100644
index 000000000..9157faf70
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryReader.scala
@@ -0,0 +1,24 @@
+package com.databricks.labs.mosaic.core.geometry.api
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.types.GeometryTypeEnum
+ * A trait that defines the methods for reading geometry data.
+ * If a new format requires support, fromFormat method should be added to this trait.
+ */
+trait GeometryReader {
+ val defaultSpatialReferenceId: Int = 4326
+ def fromWKB(wkb: Array[Byte]): MosaicGeometry
+ def fromWKT(wkt: String): MosaicGeometry
+ def fromJSON(geoJson: String): MosaicGeometry
+ def fromHEX(hex: String): MosaicGeometry
+ def fromSeq[T <: MosaicGeometry](geomSeq: Seq[T], geomType: GeometryTypeEnum.Value): MosaicGeometry
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryWriter.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryWriter.scala
new file mode 100644
index 000000000..95f1b6244
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryWriter.scala
@@ -0,0 +1,17 @@
+package com.databricks.labs.mosaic.core.geometry.api
+ * A trait that defines the methods for writing geometry data.
+ * If a new format requires support, toFormat method should be added to this trait.
+ */
+trait GeometryWriter {
+ def toWKB: Array[Byte]
+ def toWKT: String
+ def toJSON: String
+ def toHEX: String
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/index/IndexSystem.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/index/IndexSystem.scala
new file mode 100644
index 000000000..1c7d8b83d
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/index/IndexSystem.scala
@@ -0,0 +1,385 @@
+package com.databricks.labs.mosaic.core.index
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.types.{Coordinates, GeometryTypeEnum, MosaicChip}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+ * Defines the API that all index systems need to respect for Mosaic to support
+ * them.
+ */
+abstract class IndexSystem(var cellIdType: DataType) extends Serializable {
+ /**
+ * Computes the distance between two cell IDs in the given index system.
+ *
+ * @param cellId Cell ID 1
+ * @param cellId2 Cell ID 2
+ * @return Distance between the two cell IDs measured in the index system's unit (number of cells).
+ */
+ def distance(cellId: Long, cellId2: Long): Long
+ /**
+ * @return Returns the data type of the cell IDs.
+ */
+ def getCellIdDataType: DataType = cellIdType
+ /**
+ * Sets the data type of the cell IDs.
+ *
+ * @param dataType Data type of the cell IDs.
+ */
+ def setCellIdDataType(dataType: DataType): Unit = {
+ cellIdType = dataType
+ }
+ /**
+ * Used for index systems that have human-readable resolutions formats.
+ *
+ * @return Returns the resolution as a string.
+ */
+ def getResolutionStr(resolution: Int): String
+ /**
+ * Converts a cell ID to a human-readable string.
+ *
+ * @return Returns the cell ID as a string.
+ */
+ def formatCellId(cellId: Any, dt: DataType): Any =
+ (dt, cellId) match {
+ case (LongType, _: Long) => cellId
+ case (LongType, cid: String) => parse(cid)
+ case (LongType, cid: UTF8String) => parse(cid.toString)
+ case (StringType, cid: Long) => format(cid)
+ case (StringType, cid: UTF8String) => cid.toString
+ case (StringType, _: String) => cellId
+ case _ => throw new Error("Cell ID data type not supported.")
+ }
+ /**
+ * Converts a cell ID to a human-readable string.
+ *
+ * @param cellId Cell ID to be converted.
+ * @return Returns the cell ID as a string.
+ */
+ def formatCellId(cellId: Any): Any = formatCellId(cellId, getCellIdDataType)
+ /**
+ * Serializes a cell ID to a type that is supported by spark.
+ *
+ * @param cellId Cell ID to be serialized.
+ * @return Returns the serialized cell ID.
+ */
+ def serializeCellId(cellId: Any): Any =
+ (getCellIdDataType, cellId) match {
+ case (LongType, _: Long) => cellId
+ case (LongType, cid: String) => parse(cid)
+ case (LongType, cid: UTF8String) => parse(cid.toString)
+ case (StringType, cid: Long) => UTF8String.fromString(format(cid))
+ case (StringType, _: UTF8String) => cellId
+ case (StringType, cid: String) => UTF8String.fromString(cid)
+ case _ => throw new Error("Cell ID data type not supported.")
+ }
+ /**
+ * Converts a cell ID to a human-readable string.
+ *
+ * @param id Cell ID to be converted.
+ * @return Returns the cell ID as a string.
+ */
+ def format(id: Long): String
+ /**
+ * Parses a cell ID provided in a human-readable string.
+ *
+ * @param id Cell ID to be parsed.
+ * @return Returns the cell ID as a Long.
+ */
+ def parse(id: String): Long
+ /**
+ * Get the k ring of indices around the provided index id.
+ *
+ * @param index
+ * Index ID to be used as a center of k ring.
+ * @param n
+ * Number of k rings to be generated around the input index.
+ * @return
+ * A collection of index IDs forming a k ring.
+ */
+ def kRing(index: Long, n: Int): Seq[Long]
+ def kRing(index: String, n: Int): Seq[String] = kRing(parse(index), n).map(format)
+ /**
+ * Get the k loop (hollow ring) of indices around the provided index id.
+ *
+ * @param index
+ * Index ID to be used as a center of k loop.
+ * @param n
+ * Distance of k loop to be generated around the input index.
+ * @return
+ * A collection of index IDs forming a k loop.
+ */
+ def kLoop(index: Long, n: Int): Seq[Long]
+ def kLoop(index: String, n: Int): Seq[String] = kLoop(parse(index), n).map(format)
+ /**
+ * Returns the set of supported resolutions for the given index system.
+ * This doesnt have to be a continuous set of values. Only values provided
+ * in this set are considered valid.
+ *
+ * @return
+ * A set of supported resolutions.
+ */
+ def resolutions: Set[Int]
+ /**
+ * Returns the name of the IndexSystem.
+ *
+ * @return
+ * IndexSystem name.
+ */
+ def name: String
+ /**
+ * Returns the resolution value based on the nullSafeEval method inputs of
+ * type Any. Each Index System should ensure that only valid values of
+ * resolution are accepted.
+ *
+ * @param res
+ * Any type input to be parsed into the Int representation of resolution.
+ * @return
+ * Int value representing the resolution.
+ */
+ @throws[IllegalStateException]
+ def getResolution(res: Any): Int
+ /**
+ * Computes the radius of minimum enclosing circle of the polygon
+ * corresponding to the centroid index of the provided geometry.
+ *
+ * @param geometry
+ * An instance of [[MosaicGeometry]] for which we are computing the
+ * optimal buffer radius.
+ * @param resolution
+ * A resolution to be used to get the centroid index geometry.
+ * @return
+ * An optimal radius to buffer the geometry in order to avoid blind spots
+ * when performing polyfill.
+ */
+ def getBufferRadius(geometry: MosaicGeometry, resolution: Int, geometryAPI: GeometryAPI): Double
+ /**
+ * Returns a set of indices that represent the input geometry. Depending on
+ * the index system this set may include only indices whose centroids fall
+ * inside the input geometry or any index that intersects the input
+ * geometry. When extending make sure which is the guaranteed behavior of
+ * the index system.
+ *
+ * @param geometry
+ * Input geometry to be represented.
+ * @param resolution
+ * A resolution of the indices.
+ * @return
+ * A set of indices representing the input geometry.
+ */
+ def polyfill(geometry: MosaicGeometry, resolution: Int, geometryAPI: Option[GeometryAPI] = None): Seq[Long]
+ /**
+ * @see
+ * [[IndexSystem.getBorderChips()]]
+ * @param geometry
+ * Input geometry whose border is being represented.
+ * @param borderIndices
+ * Indices corresponding to the border area of the input geometry.
+ * @return
+ * A border area representation via [[MosaicChip]] set.
+ */
+ def getBorderChips(
+ geometry: MosaicGeometry,
+ borderIndices: Seq[Long],
+ keepCoreGeom: Boolean,
+ geometryAPI: GeometryAPI
+ ): Seq[MosaicChip] = {
+ val intersections = for (index <- borderIndices) yield {
+ val indexGeom = indexToGeometry(index, geometryAPI)
+ val intersect = geometry.intersection(indexGeom)
+ val coerced = coerceChipGeometry(intersect, index, geometryAPI)
+ val isCore = coerced.equals(indexGeom)
+ val chipGeom = if (!isCore || keepCoreGeom) coerced else null
+ MosaicChip(isCore = isCore, Left(index), chipGeom)
+ }
+ intersections.filterNot(_.isEmpty)
+ }
+ /**
+ * Return a set of [[MosaicChip]] instances computed based on the core
+ * indices. Each index is converted to an instance of [[MosaicChip]]. These
+ * chips do not contain chip geometry since they are full contained by the
+ * geometry whose core they represent.
+ *
+ * @param coreIndices
+ * Indices corresponding to the core area of the input geometry.
+ * @return
+ * A core area representation via [[MosaicChip]] set.
+ */
+ def getCoreChips(coreIndices: Seq[Long], keepCoreGeom: Boolean, geometryAPI: GeometryAPI): Seq[MosaicChip] = {
+ coreIndices.map(index => {
+ val indexGeom = if (keepCoreGeom) indexToGeometry(index, geometryAPI) else null
+ MosaicChip(isCore = true, Left(index), indexGeom)
+ })
+ }
+ /**
+ * Get the geometry corresponding to the index with the input id.
+ *
+ * @param index
+ * Id of the index whose geometry should be returned.
+ * @return
+ * An instance of [[MosaicGeometry]] corresponding to index.
+ */
+ def indexToGeometry(index: Long, geometryAPI: GeometryAPI): MosaicGeometry
+ /**
+ * Get the geometry corresponding to the index with the input id.
+ *
+ * @param index
+ * Id of the index whose geometry should be returned.
+ * @return
+ * An instance of [[MosaicGeometry]] corresponding to index.
+ */
+ def indexToGeometry(index: String, geometryAPI: GeometryAPI): MosaicGeometry
+ /**
+ * Get the index ID corresponding to the provided coordinates.
+ *
+ * @param lon
+ * X coordinate of the point.
+ * @param lat
+ * Y coordinate of the point.
+ * @param resolution
+ * Resolution of the index.
+ * @return
+ * Index ID in this index system.
+ */
+ def pointToIndex(lon: Double, lat: Double, resolution: Int): Long
+ /**
+ * Get the centroid of the index with the input cell id.
+ *
+ * @param index Cell ID in this index system.
+ * @return Centroid of the cell.
+ */
+ def indexToCenter(index: Long): Coordinates
+ def indexToCenter(index: String): Coordinates = indexToCenter(parse(index))
+ /**
+ * Get the boundary of the index with the input cell id.
+ *
+ * @param index Cell ID in this index system.
+ * @return Boundary of the cell.
+ */
+ def indexToBoundary(index: Long): Seq[Coordinates]
+ def indexToBoundary(index: String): Seq[Coordinates] = indexToBoundary(parse(index))
+ /**
+ * Computes the area of the cell with the input cell id.
+ * ASSUMPTION: index cells are convex. If index cells are not convex, you must override this method
+ *
+ * @param index Cell ID in this index system.
+ * @return Area of the cell.
+ */
+ def area(index: Long): Double = {
+ // Haversine distance between two coordinates in radians
+ def haversine(coords1: Coordinates, coords2: Coordinates): Double = {
+ val c = math.Pi / 180
+ val th1 = c * coords1.lat
+ val th2 = c * coords2.lat
+ val dph = c * (coords1.lng - coords2.lng)
+ val dz = math.sin(th1) - math.sin(th2)
+ val dx = math.cos(dph) * math.cos(th1) - math.cos(th2)
+ val dy = math.sin(dph) * math.cos(th1)
+ math.asin(math.sqrt(dx * dx + dy * dy + dz * dz) / 2) * 2
+ }
+ def triangle_area(boundary_coords: Seq[Coordinates], center_coord: Coordinates): Double = {
+ val a = haversine(center_coord, boundary_coords.head)
+ val b = haversine(boundary_coords.head, boundary_coords(1))
+ val c = haversine(boundary_coords(1), center_coord)
+ val s = (a + b + c) / 2
+ val t = math.sqrt(
+ math.tan(s / 2)
+ * math.tan((s - a) / 2)
+ * math.tan((s - b) / 2)
+ * math.tan((s - c) / 2)
+ )
+ val e = 4 * math.atan(t)
+ val r = 6371.0088
+ val area = e * r * r
+ area
+ }
+ val center = indexToCenter(index)
+ val boundary = indexToBoundary(index)
+ val boundary_ring = boundary ++ Seq(boundary.head)
+ val res = boundary_ring.sliding(2).map(b => triangle_area(b, center)).sum
+ res
+ }
+ def area(index: String): Double = area(parse(index))
+ /**
+ * Coerce the geometry with respect to the input cell. This is necessary
+ * if the intersection returned by the geometry framework is a geometry collection.
+ * This occurs when intersection takes into account the boundaries of geometries.
+ * Not all frameworks return a geometry collection.
+ *
+ * @param geom Geometry to coerce
+ * @param cell Cell to coerce to
+ * @param geometryAPI Geometry API to use
+ * @return Coerced geometry
+ */
+ def coerceChipGeometry(geom: MosaicGeometry, cell: Long, geometryAPI: GeometryAPI): MosaicGeometry = {
+ val geomType = GeometryTypeEnum.fromString(geom.getGeometryType)
+ if (geomType == GEOMETRYCOLLECTION) {
+ // This case can occur if partial geometry is a geometry collection
+ // or if the intersection includes a part of the boundary of the cell
+ geom.difference(indexToGeometry(cell, geometryAPI).getBoundary)
+ } else {
+ geom
+ }
+ }
+ // TODO: This logic makes no sense.
+ // This logic makes no sense.
+ // Only one type of geometries is kept
+ // This should be fixed
+ def coerceChipGeometry(geometries: Seq[MosaicGeometry]): Seq[MosaicGeometry] = {
+ val types = geometries.map(_.getGeometryType).map(GeometryTypeEnum.fromString)
+ if (types.contains(MULTIPOLYGON) || types.contains(POLYGON)) {
+ geometries.filter(g => Seq(POLYGON, MULTIPOLYGON).contains(GeometryTypeEnum.fromString(g.getGeometryType)))
+ } else if (types.contains(MULTILINESTRING) || types.contains(LINESTRING)) {
+ geometries.filter(g => Seq(MULTILINESTRING, LINESTRING).contains(GeometryTypeEnum.fromString(g.getGeometryType)))
+ } else if (types.contains(MULTIPOINT) || types.contains(POINT)) {
+ geometries.filter(g => Seq(MULTIPOINT, POINT).contains(GeometryTypeEnum.fromString(g.getGeometryType)))
+ } else {
+ Nil
+ }
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/package.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/package.scala
new file mode 100644
index 000000000..a1705f109
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/package.scala
@@ -0,0 +1,19 @@
+package com.databricks.labs.mosaic
+ * This package object contains all the constants used in the Mosaic library.
+ */
+package object core {
+ val DATABRICKS_SQL_FUNCTIONS_MODULE = "com.databricks.sql.functions"
+ val SPARK_DATABRICKS_GEO_H3_ENABLED = "spark.databricks.geo.h3.enabled"
+ val MOSAIC_INDEX_SYSTEM = "spark.databricks.labs.mosaic.index.system"
+ val MOSAIC_INDEX_SYSTEM_FACTORY: String = "spark.databricks.labs.mosaic.index.system.factory"
+ val MOSAIC_GEOMETRY_API = "spark.databricks.labs.mosaic.geometry.api"
+ val MOSAIC_RASTER_API = "spark.databricks.labs.mosaic.raster.api"
+ val MOSAIC_GDAL_NATIVE = "spark.databricks.labs.mosaic.gdal.native"
+ val MOSAIC_RASTER_CHECKPOINT = "spark.databricks.labs.mosaic.raster.checkpoint"
+ val MOSAIC_RASTER_CHECKPOINT_DEFAULT = "dbfs:/tmp/mosaic/raster/checkpoint"
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/MosaicRaster.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/MosaicRaster.scala
new file mode 100644
index 000000000..fc740c2e6
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/MosaicRaster.scala
@@ -0,0 +1,91 @@
+package com.databricks.labs.mosaic.core.raster
+ * A base API for managing raster data in Mosaic. Any raster abstraction should
+ * extend this trait.
+ *
+ * @param path
+ * The path to the raster file. This has to be a path that can be read by the
+ * worker nodes.
+ * @param memSize
+ * The amount of memory occupied by the file in bytes.
+ */
+abstract class MosaicRaster(path: String, memSize: Long) extends Serializable {
+ /**
+ * @return Returns the geo transform of the raster. The geo transform is a
+ * a Seq[Double] of [xOrigin, xPixelSize, xSkew, yOrigin, ySkew, yPixelSize].
+ */
+ def getGeoTransform: Seq[Double]
+ /**
+ * Writes out the current raster to the given checkpoint path. The raster
+ * is written out as a GeoTiff. Only single subdataset is supported. Apply
+ * mask to all bands. Trim down the raster to the provided extent.
+ *
+ * @param stageId
+ * the UUI of the computation stage generating the raster. Used to avoid
+ * writing collisions.
+ * @param rasterId
+ * the UUID of the raster. Used to avoid writing collisions.
+ * @param extent
+ * The extent to trim the raster to.
+ * @param checkpointPath
+ * The path to write the raster to.
+ * @return
+ * Returns the path to the written raster.
+ */
+ def saveCheckpoint(stageId: String, rasterId: Long, extent: (Int, Int, Int, Int), checkpointPath: String): String
+ /** @return Returns the metadata of the raster file. */
+ def metadata: Map[String, String]
+ /**
+ * @return
+ * Returns the key->value pairs of subdataset->description for the
+ * raster.
+ */
+ def subdatasets: Map[String, String]
+ /** @return Returns the number of bands in the raster. */
+ def numBands: Int
+ /** @return Returns the SRID in the raster. */
+ def SRID: Int
+ /** @return Returns the proj4 projection string in the raster. */
+ def proj4String: String
+ /** @return Returns the x size of the raster. */
+ def xSize: Int
+ /** @return Returns the y size of the raster. */
+ def ySize: Int
+ /** @return Returns the bandId-th Band from the raster. */
+ def getBand(bandId: Int): MosaicRasterBand
+ /** @return Returns the extent(xmin, ymin, xmax, ymax) of the raster. */
+ def extent: Seq[Double]
+ /** Cleans up the raster driver and references. */
+ def cleanUp(): Unit
+ /** @return Returns the amount of memory occupied by the file in bytes. */
+ def getMemSize: Long = memSize
+ /**
+ * A template method for transforming the raster bands into new bands. Each
+ * band is transformed into a new band using the transform function.
+ * Override this method for tiling, clipping, warping, etc. type of
+ * expressions.
+ *
+ * @tparam T
+ * The type of the result from the transformation of a band.
+ * @param f
+ * The transform function. Will be applied on each band.
+ */
+ def transformBands[T](f: MosaicRasterBand => T): Seq[T]
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterBand.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterBand.scala
new file mode 100644
index 000000000..af8f204a0
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterBand.scala
@@ -0,0 +1,132 @@
+package com.databricks.labs.mosaic.core.raster
+ * A base API for managing raster bands in Mosaic. Any raster band abstraction
+ * should extend this trait.
+ */
+trait MosaicRasterBand extends Serializable {
+ /** @return Returns the bandId of the band. */
+ def index: Int
+ /** @return Returns the description of the band. */
+ def description: String
+ /** @return Returns the metadata of the band. */
+ def metadata: Map[String, String]
+ /** @return Returns the unit type of the band pixels. */
+ def units: String
+ /** @return Returns the data type (numeric) of the band pixels. */
+ def dataType: Int
+ /** @return Returns the x size of the band. */
+ def xSize: Int
+ /** @return Returns the y size of the band. */
+ def ySize: Int
+ /** @return Returns the minimum pixel value of the band. */
+ def minPixelValue: Double
+ /** @return Returns the maximum pixel value of the band. */
+ def maxPixelValue: Double
+ /**
+ * @return
+ * Returns the value used to represent transparent pixels of the band.
+ */
+ def noDataValue: Double
+ /**
+ * @return
+ * Returns the scale in which pixels are represented. It is the unit
+ * value of a pixel. If the pixel value is 5.1 and pixel scale is 10.0
+ * then the actual pixel value is 51.0.
+ */
+ def pixelValueScale: Double
+ /**
+ * @return
+ * Returns the offset in which pixels are represented. It is the unit
+ * value of a pixel. If the pixel value is 5.1 and pixel offset is 10.0
+ * then the actual pixel value is 15.1.
+ */
+ def pixelValueOffset: Double
+ /**
+ * @return
+ * Returns the pixel value with scale and offset applied. If the pixel
+ * value is 5.1 and pixel scale is 10.0 and pixel offset is 10.0 then the
+ * actual pixel value is 61.0.
+ */
+ def pixelValueToUnitValue(pixelValue: Double): Double
+ /**
+ * @return
+ * Returns the pixels of the raster as a 1D array.
+ */
+ def values: Array[Double] = values(0, 0, xSize, ySize)
+ /**
+ * @return
+ * Returns the pixels of the raster as a 1D array.
+ */
+ def maskValues: Array[Double] = maskValues(0, 0, xSize, ySize)
+ /**
+ * @param xOffset
+ * The x offset of the raster. The x offset is the number of pixels to
+ * skip from the left. 0 <= xOffset < xSize
+ *
+ * @param yOffset
+ * The y offset of the raster. The y offset is the number of pixels to
+ * skip from the top. 0 <= yOffset < ySize
+ *
+ * @param xSize
+ * The x size of the raster to be read.
+ *
+ * @param ySize
+ * The y size of the raster to be read.
+ * @return
+ * Returns the pixels of the raster as a 1D array with offset and size
+ * applied.
+ */
+ def values(xOffset: Int, yOffset: Int, xSize: Int, ySize: Int): Array[Double]
+ /**
+ * @param xOffset
+ * The x offset of the raster. The x offset is the number of pixels to
+ * skip from the left. 0 <= xOffset < xSize
+ *
+ * @param yOffset
+ * The y offset of the raster. The y offset is the number of pixels to
+ * skip from the top. 0 <= yOffset < ySize
+ *
+ * @param xSize
+ * The x size of the raster to be read.
+ *
+ * @param ySize
+ * The y size of the raster to be read.
+ * @return
+ * Returns the mask pixels of the raster as a 1D array with offset and size
+ * applied.
+ */
+ def maskValues(xOffset: Int, yOffset: Int, xSize: Int, ySize: Int): Array[Double]
+ /**
+ * Apply f to all pixels in the raster. Overridden in subclasses to define
+ * the behavior.
+ * @param f
+ * the function to apply to each pixel.
+ * @param default
+ * the default value to use if the pixel is noData.
+ * @tparam T
+ * the return type of the function.
+ * @return
+ * an array of the results of applying f to each pixel.
+ */
+ def transformValues[T](f: (Int, Int, Double) => T, default: T = null): Seq[Seq[T]]
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/RasterAPI.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/RasterAPI.scala
new file mode 100644
index 000000000..2c84a24cd
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/RasterAPI.scala
@@ -0,0 +1,76 @@
+package com.databricks.labs.mosaic.core.raster
+ * A base trait for all Raster API's.
+ * @param reader
+ * The RasterReader to use for reading the raster.
+ */
+abstract class RasterAPI(reader: RasterReader) extends Serializable {
+ /**
+ * This method should be called in every raster expression if the RasterAPI
+ * requires enablement on worker nodes.
+ */
+ def enable(): Unit
+ /** @return Returns the name of the raster API. */
+ def name: String
+ /**
+ * Reads a raster from the given path.
+ *
+ * @param path
+ * The path to the raster. This path has to be a path to a single raster.
+ * Rasters with subdatasets are supported.
+ * @return
+ * Returns a Raster object.
+ */
+ def raster(path: String): MosaicRaster = reader.readRaster(path)
+ /**
+ * Reads a raster from the given path. It extracts the specified band from
+ * the raster.
+ *
+ * @param path
+ * The path to the raster. This path has to be a path to a single raster.
+ * Rasters with subdatasets are supported.
+ * @param bandIndex
+ * The index of the band to read from the raster.
+ * @return
+ * Returns a Raster band object.
+ */
+ def band(path: String, bandIndex: Int): MosaicRasterBand = reader.readBand(path, bandIndex)
+ /**
+ * Converts raster x, y coordinates to lat, lon coordinates.
+ * @param gt
+ * Geo transform of the raster.
+ * @param x
+ * X coordinate of the raster.
+ * @param y
+ * Y coordinate of the raster.
+ * @return
+ * Returns a tuple of (lat, lon).
+ */
+ def toWorldCoord(gt: Seq[Double], x: Int, y: Int): (Double, Double) = {
+ val (xGeo, yGeo) = reader.toWorldCoord(gt, x, y)
+ (xGeo, yGeo)
+ }
+ /**
+ * Converts lat, lon coordinates to raster x, y coordinates.
+ * @param gt
+ * Geo transform of the raster.
+ * @param x
+ * Latitude of the raster.
+ * @param y
+ * Longitude of the raster.
+ * @return
+ * Returns a tuple of (xPixel, yPixel).
+ */
+ def fromWorldCoord(gt: Seq[Double], x: Double, y: Double): (Int, Int) = {
+ val (xPixel, yPixel) = reader.fromWorldCoord(gt, x, y)
+ (xPixel, yPixel)
+ }
\ No newline at end of file
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/RasterReader.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/RasterReader.scala
new file mode 100644
index 000000000..34d88fd22
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/RasterReader.scala
@@ -0,0 +1,78 @@
+package com.databricks.labs.mosaic.core.raster
+import org.apache.spark.internal.Logging
+ * RasterReader is a trait that defines the interface for reading raster data
+ * from a file system path. It is used by the RasterAPI to read raster and
+ * raster band data.
+ * @note
+ * For subdatasets the path should be the path to the subdataset and not to
+ * the file.
+ */
+trait RasterReader extends Logging {
+ /**
+ * Reads a raster from a file system path. Reads a subdataset if the path
+ * is to a subdataset.
+ *
+ * @example
+ * Raster: path = "file:///path/to/file.tif" Subdataset: path =
+ * "file:///path/to/file.tif:subdataset"
+ * @param path
+ * The path to the raster file.
+ * @return
+ * A MosaicRaster object.
+ */
+ def readRaster(path: String): MosaicRaster
+ /**
+ * Reads a raster band from a file system path. Reads a subdataset band if
+ * the path is to a subdataset.
+ * @example
+ * Raster: path = "file:///path/to/file.tif" Subdataset: path =
+ * "file:///path/to/file.tif:subdataset"
+ * @param path
+ * The path to the raster file.
+ *
+ * @param bandIndex
+ * The band index to read.
+ * @return
+ * A MosaicRaster object.
+ */
+ def readBand(path: String, bandIndex: Int): MosaicRasterBand
+ /**
+ * Take a geo transform matrix and x and y coordinates of a pixel and
+ * returns the x and y coordinates in the projection of the raster.
+ *
+ * @param geoTransform
+ * The geo transform matrix of the raster.
+ *
+ * @param x
+ * The x coordinate of the pixel.
+ * @param y
+ * The y coordinate of the pixel.
+ * @return
+ * A tuple of doubles with the x and y coordinates in the projection of
+ * the raster.
+ */
+ def toWorldCoord(geoTransform: Seq[Double], x: Int, y: Int): (Double, Double)
+ /**
+ * Take a geo transform matrix and x and y coordinates of a point and
+ * returns the x and y coordinates of the raster pixel.
+ *
+ * @param geoTransform
+ * The geo transform matrix of the raster.
+ *
+ * @param x
+ * The x coordinate of the point.
+ * @param y
+ * The y coordinate of the point.
+ * @return
+ * A tuple of integers with the x and y coordinates of the raster pixel.
+ */
+ def fromWorldCoord(geoTransform: Seq[Double], x: Double, y: Double): (Int, Int)
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/ChipType.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/ChipType.scala
new file mode 100644
index 000000000..dc6788271
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/ChipType.scala
@@ -0,0 +1,30 @@
+package com.databricks.labs.mosaic.core.types
+import org.apache.spark.sql.types._
+ * Type definition for Chip. Chip is defined as (is_core: boolean, index_id: long,
+ * wkb: binary).
+ */
+class ChipType(fields: Array[StructField]) extends StructType(fields) {
+ override def simpleString: String = "CHIP"
+ override def typeName: String = "struct"
+object ChipType {
+ def apply(idType: DataType): ChipType = {
+ require(Seq(LongType, IntegerType, StringType).contains(idType))
+ new ChipType(
+ Array(
+ StructField("is_core", BooleanType),
+ StructField("index_id", idType),
+ StructField("wkb", BinaryType)
+ )
+ )
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/Coordinates.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/Coordinates.scala
new file mode 100644
index 000000000..6b9ead00a
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/Coordinates.scala
@@ -0,0 +1,3 @@
+package com.databricks.labs.mosaic.core.types
+case class Coordinates(lat: Double, lng: Double)
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/GeoJSONType.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/GeoJSONType.scala
new file mode 100644
index 000000000..387e946d0
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/GeoJSONType.scala
@@ -0,0 +1,21 @@
+package com.databricks.labs.mosaic.core.types
+import org.apache.spark.sql.types._
+ * Type definition for JSON encoding. JSON encoding is defined as (json:
+ * string). This abstraction over StringType is needed to ensure matching can
+ * distinguish between StringType (WKT) and JSONType (GEOJSON).
+ */
+class GeoJSONType()
+ extends StructType(
+ Array(
+ StructField("json", StringType)
+ )
+ ) {
+ override def simpleString: String = "GEOJSON"
+ override def typeName: String = "struct"
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/GeometryTypeEnum.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/GeometryTypeEnum.scala
new file mode 100644
index 000000000..19279cc33
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/GeometryTypeEnum.scala
@@ -0,0 +1,57 @@
+package com.databricks.labs.mosaic.core.types
+import java.util.Locale
+import scala.collection.immutable
+ * Enumeration of geometry types supported by Mosaic.
+ */
+object GeometryTypeEnum extends Enumeration {
+ val POINT: GeometryTypeEnum.Value = Value(1, "POINT")
+ val MULTIPOINT: GeometryTypeEnum.Value = Value(2, "MULTIPOINT")
+ val LINESTRING: GeometryTypeEnum.Value = Value(3, "LINESTRING")
+ val MULTILINESTRING: GeometryTypeEnum.Value = Value(4, "MULTILINESTRING")
+ val POLYGON: GeometryTypeEnum.Value = Value(5, "POLYGON")
+ val MULTIPOLYGON: GeometryTypeEnum.Value = Value(6, "MULTIPOLYGON")
+ // coercion type JTS boundary returns LinearRing instead of LineString
+ val LINEARRING: GeometryTypeEnum.Value = Value(7, "LINEARRING")
+ val pointGeometries: Seq[GeometryTypeEnum.Value] = List(this.POINT, this.MULTIPOINT)
+ val linestringGeometries: Seq[GeometryTypeEnum.Value] = List(this.LINESTRING, this.MULTILINESTRING)
+ val polygonGeometries: immutable.Seq[GeometryTypeEnum.Value] = List(this.POLYGON, this.MULTIPOLYGON)
+ val singleGeometries: Seq[GeometryTypeEnum.Value] = List(this.POINT, this.LINESTRING, this.POLYGON)
+ val multipleGeometries: Seq[GeometryTypeEnum.Value] = List(this.MULTIPOINT, this.MULTILINESTRING, this.MULTIPOLYGON, this.GEOMETRYCOLLECTION)
+ def fromString(value: String): GeometryTypeEnum.Value =
+ GeometryTypeEnum.values
+ .find(_.toString == value.toUpperCase(Locale.ROOT))
+ .getOrElse(
+ throw new Error(
+ s"Invalid mode for geometry type: $value." +
+ s" Must be one of ${GeometryTypeEnum.values.mkString(",")}"
+ )
+ )
+ def fromId(id: Int): GeometryTypeEnum.Value =
+ GeometryTypeEnum.values
+ .find(_.id == id)
+ .getOrElse(throw new Error(s"Invalid value for geometry type id: $id."))
+ def groupOf(enumerator: GeometryTypeEnum.Value): GeometryTypeEnum.Value =
+ enumerator match {
+ case g if pointGeometries.contains(g) => this.POINT
+ case g if linestringGeometries.contains(g) => this.LINESTRING
+ case g if polygonGeometries.contains(g) => this.POLYGON
+ }
+ def isFlat(enumerator: GeometryTypeEnum.Value): Boolean =
+ enumerator match {
+ case g if singleGeometries.contains(g) => true
+ case g if multipleGeometries.contains(g) => false
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/HexType.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/HexType.scala
new file mode 100644
index 000000000..dfd9ebb8a
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/HexType.scala
@@ -0,0 +1,21 @@
+package com.databricks.labs.mosaic.core.types
+import org.apache.spark.sql.types._
+ * Type definition for Hex encoding. Hex encoding is defined as (hex: string).
+ * This abstraction over StringType is needed to ensure matching can
+ * distinguish between StringType (WKT) and HexType (HEX).
+ */
+class HexType()
+ extends StructType(
+ Array(
+ StructField("hex", StringType)
+ )
+ ) {
+ override def simpleString: String = "HEX"
+ override def typeName: String = "struct"
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/MosaicChip.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/MosaicChip.scala
new file mode 100644
index 000000000..6c7e773a9
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/MosaicChip.scala
@@ -0,0 +1,73 @@
+package com.databricks.labs.mosaic.core.types
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.types.{LongType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+ * A case class modeling an instance of a mosaic chip. A chip can belong to
+ * either core or border set.
+ *
+ * @param isCore
+ * Whether the chip belongs to the core set.
+ * @param index
+ * Index ID.
+ * @param geom
+ * Geometry instance if the chip is a border chip.
+ */
+case class MosaicChip(isCore: Boolean, index: Either[Long, String], geom: MosaicGeometry) {
+ /**
+ * Indicates whether the chip is outside of the representation of the
+ * geometry it was generated to represent (ie false positive index).
+ */
+ def isEmpty: Boolean = !isCore & Option(geom).forall(_.isEmpty)
+ /**
+ * Formats the index ID as the data type supplied by the index system.
+ *
+ * @param indexSystem Index system to use for formatting.
+ * @return MosaicChip with formatted index ID.
+ */
+ def formatCellId(indexSystem: IndexSystem): MosaicChip = {
+ (indexSystem.getCellIdDataType, index) match {
+ case (_: LongType, Left(_)) => this
+ case (_: StringType, Right(_)) => this
+ case (_: LongType, Right(value)) => this.copy(index = Left(indexSystem.parse(value)))
+ case (_: StringType, Left(value)) => this.copy(index = Right(indexSystem.format(value)))
+ case _ => throw new IllegalArgumentException("Invalid cell id data type")
+ }
+ }
+ def cellIdAsLong(indexSystem: IndexSystem): Long = index match {
+ case Left(value) => value
+ case _ => indexSystem.parse(index.right.get)
+ }
+ def cellIdAsStr(indexSystem: IndexSystem): String = index match {
+ case Right(value) => value
+ case _ => indexSystem.format(index.left.get)
+ }
+ /**
+ * Serialise to spark internal representation.
+ *
+ * @return
+ * An instance of [[InternalRow]].
+ */
+ def serialize: InternalRow = {
+ if (index.isLeft) InternalRow.fromSeq(Seq(isCore, index.left.get, encodeGeom))
+ else InternalRow.fromSeq(Seq(isCore, UTF8String.fromString(index.right.get), encodeGeom))
+ }
+ /**
+ * Encodes the chip geometry as WKB.
+ *
+ * @return
+ * An instance of [[Array]] of [[Byte]] representing WKB.
+ */
+ private def encodeGeom: Array[Byte] = Option(geom).map(_.toWKB).orNull
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/MosaicType.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/MosaicType.scala
new file mode 100644
index 000000000..a2ca70b8f
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/MosaicType.scala
@@ -0,0 +1,25 @@
+package com.databricks.labs.mosaic.core.types
+import org.apache.spark.sql.types._
+ * Type definition for MosaicType. MosaicType is defined as (chips:
+ * array[chip]).
+ */
+class MosaicType(fields: Array[StructField]) extends StructType(fields) {
+ override def simpleString: String = "MOSAIC"
+ override def typeName: String = "struct"
+object MosaicType {
+ def apply(idType: DataType): StructType = {
+ new MosaicType(
+ Array(
+ StructField("chips", ArrayType(ChipType(idType)))
+ )
+ )
+ }
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/package.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/package.scala
new file mode 100644
index 000000000..c35abdc38
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/package.scala
@@ -0,0 +1,14 @@
+package com.databricks.labs.mosaic.core
+import org.apache.spark.sql.types._
+ * Contains definition of all Mosaic specific data types. It provides methods
+ * for type inference over geometry columns.
+ */
+package object types {
+ val HexType: DataType = new HexType()
+ val GeoJSONType: DataType = new GeoJSONType()
diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/util/ResourceUtils.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/util/ResourceUtils.scala
new file mode 100644
index 000000000..0425d8c6c
--- /dev/null
+++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/util/ResourceUtils.scala
@@ -0,0 +1,26 @@
+package com.databricks.labs.mosaic.core.util
+import java.io.BufferedInputStream
+import scala.language.postfixOps
+ * Utility for reading resources from the classpath.
+ * This is required for [[com.databricks.labs.mosaic.core.GenericServiceFactory[_]] to work.
+ * All [[com.databricks.labs.mosaic.core.geometry.api.GeometryAPI]], [[com.databricks.labs.mosaic.core.index.IndexSystem]]
+ * and [[com.databricks.labs.mosaic.core.raster.RasterAPI]] implementations are provided via META-INF/services.
+ */
+object ResourceUtils {
+ def readResourceBytes(name: String): Array[Byte] = {
+ val bis = new BufferedInputStream(getClass.getResourceAsStream(name))
+ try Stream.continually(bis.read()).takeWhile(-1 !=).map(_.toByte).toArray
+ finally bis.close()
+ }
+ def readResourceLines(name: String): Array[String] = {
+ val bytes = readResourceBytes(name)
+ val lines = new String(bytes).split("\n")
+ lines
+ }
diff --git a/mosaic-core/src/main/scala/org/apache/spark/sql/adapters/Column.scala b/mosaic-core/src/main/scala/org/apache/spark/sql/adapters/Column.scala
new file mode 100644
index 000000000..85d5d8420
--- /dev/null
+++ b/mosaic-core/src/main/scala/org/apache/spark/sql/adapters/Column.scala
@@ -0,0 +1,10 @@
+package org.apache.spark.sql.adapters
+import org.apache.spark.sql.{Column => SparkColumn}
+import org.apache.spark.sql.catalyst.expressions.Expression
+object Column {
+ def apply(expr: Expression): SparkColumn = {
+ new SparkColumn(expr)
+ }
diff --git a/mosaic-core/src/main/scala/org/apache/spark/sql/adapters/DataFrameReader.scala b/mosaic-core/src/main/scala/org/apache/spark/sql/adapters/DataFrameReader.scala
new file mode 100644
index 000000000..43287f267
--- /dev/null
+++ b/mosaic-core/src/main/scala/org/apache/spark/sql/adapters/DataFrameReader.scala
@@ -0,0 +1,5 @@
+package org.apache.spark.sql.adapters
+import org.apache.spark.sql.SparkSession
+class DataFrameReader(sparkSession: SparkSession) extends org.apache.spark.sql.DataFrameReader(sparkSession) {}
diff --git a/mosaic-core/src/test/resources/CRSBounds.csv b/mosaic-core/src/test/resources/CRSBounds.csv
new file mode 100644
index 000000000..a628e3c74
--- /dev/null
+++ b/mosaic-core/src/test/resources/CRSBounds.csv
@@ -0,0 +1,2 @@
+EPSG:4326: WGS 84,-180.0000,-90.0000,180.0000,90.0000,-180.0000,-90.0000,180.0000,90.0000
\ No newline at end of file
diff --git a/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.GeometryAPIRegister b/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.GeometryAPIRegister
new file mode 100644
index 000000000..85115ba12
--- /dev/null
+++ b/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.GeometryAPIRegister
@@ -0,0 +1,2 @@
\ No newline at end of file
diff --git a/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.IndexSystemRegister b/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.IndexSystemRegister
new file mode 100644
index 000000000..e6eb808f0
--- /dev/null
+++ b/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.IndexSystemRegister
@@ -0,0 +1,2 @@
\ No newline at end of file
diff --git a/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.RasterAPIRegister b/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.RasterAPIRegister
new file mode 100644
index 000000000..1efd1fcfa
--- /dev/null
+++ b/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.RasterAPIRegister
@@ -0,0 +1,2 @@
\ No newline at end of file
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/AdaptorsTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/AdaptorsTest.scala
new file mode 100644
index 000000000..3deba2eff
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/AdaptorsTest.scala
@@ -0,0 +1,16 @@
+package com.databricks.labs.mosaic.core
+import org.apache.spark.sql.adapters.Column
+import org.apache.spark.sql.catalyst.expressions.{Add, Expression}
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.must.Matchers.{be, noException}
+class AdaptorsTest extends AnyFunSuite with MockFactory {
+ test("Could should be constructable outside of spark") {
+ val mockExpression = mock[Expression]
+ noException should be thrownBy Column(Add(mockExpression, mockExpression))
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/GenericFactoryTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/GenericFactoryTest.scala
new file mode 100644
index 000000000..a3b3ab9fe
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/GenericFactoryTest.scala
@@ -0,0 +1,46 @@
+package com.databricks.labs.mosaic.core
+import com.databricks.labs.mosaic.core.GenericServiceFactory.{GeometryAPIFactory, IndexSystemFactory, RasterAPIFactory}
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import com.databricks.labs.mosaic.core.raster.RasterAPI
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class GenericFactoryTest extends AnyFunSuite with MockFactory {
+ test("GeometryAPIFactory should getGeometryAPI based on META-INF/services") {
+ // We need a mock GeometryAPI to test the factory
+ // The generated class path is com.databricks.labs.mosaic.core.geometry.api.GenericFactoryTest$$anon$1
+ // Make sure that this is the first mock in the file ot match $$anon$1
+ val mockGeometryAPI = mock[GeometryAPI]
+ mockGeometryAPI.name _ expects() returning "MockGeometryAPI" anyNumberOfTimes()
+ noException should be thrownBy GeometryAPIFactory.getGeometryAPI("MockGeometryAPI", Array(this))
+ an[IllegalArgumentException] should be thrownBy GeometryAPIFactory.getGeometryAPI("MockGeometryAPI")
+ }
+ test("IndexSystemFactory should getGeometryAPI based on META-INF/services") {
+ // We need a mock IndexSystem to test the factory
+ // The generated class path is com.databricks.labs.mosaic.core.geometry.api.GenericFactoryTest$$anon$2
+ // Make sure that this is the second mock in the file ot match $$anon$2
+ val mockIndexSystem = mock[IndexSystem]
+ mockIndexSystem.name _ expects() returning "MockIndexSystem" anyNumberOfTimes()
+ noException should be thrownBy IndexSystemFactory.getIndexSystem("MockIndexSystem", Array(this))
+ an[IllegalArgumentException] should be thrownBy IndexSystemFactory.getIndexSystem("MockIndexSystem")
+ }
+ test("RasterAPIFactory should getRasterAPI based on META-INF/services") {
+ // We need a mock RasterAPI to test the factory
+ // The generated class path is com.databricks.labs.mosaic.core.geometry.api.GenericFactoryTest$$anon$3
+ // Make sure that this is the third mock in the file ot match $$anon$3
+ val mockRasterAPI = mock[RasterAPI]
+ mockRasterAPI.name _ expects() returning "MockRasterAPI" anyNumberOfTimes()
+ noException should be thrownBy RasterAPIFactory.getRasterAPI("MockRasterAPI", Array(this))
+ an[IllegalArgumentException] should be thrownBy RasterAPIFactory.getRasterAPI("MockRasterAPI")
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/MosaicTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/MosaicTest.scala
new file mode 100644
index 000000000..6faa24fb9
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/MosaicTest.scala
@@ -0,0 +1,122 @@
+package com.databricks.labs.mosaic.core
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.geometry._
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import com.databricks.labs.mosaic.core.types.MosaicChip
+import org.apache.spark.sql.types.LongType
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class MosaicTest extends AnyFunSuite with MockFactory {
+ val mockPoint: MosaicPoint = mock[MosaicPoint]
+ val mockMultiPoint: MosaicMultiPoint = mock[MosaicMultiPoint]
+ val mockLineString: MosaicLineString = mock[MosaicLineString]
+ val mockMultiLineString: MosaicMultiLineString = mock[MosaicMultiLineString]
+ val mockPolygon: MosaicPolygon = mock[MosaicPolygon]
+ val mockIndexSystem: IndexSystem = mock[IndexSystem]
+ val mockGeometryAPI: GeometryAPI = mock[GeometryAPI]
+ val mockMosaicChip: MosaicChip = mock[MosaicChip]
+ def doMock(): Unit = {
+ mockPoint.getGeometryType _ expects() returning "POINT" anyNumberOfTimes()
+ mockPoint.getX _ expects() returning 1.0 anyNumberOfTimes()
+ mockPoint.getY _ expects() returning 1.0 anyNumberOfTimes()
+ mockPoint.isEmpty _ expects() returning false anyNumberOfTimes()
+ mockMultiPoint.getGeometryType _ expects() returning "MULTIPOINT" anyNumberOfTimes()
+ mockMultiPoint.asSeq _ expects() returning Seq(mockPoint) anyNumberOfTimes()
+ mockLineString.getGeometryType _ expects() returning "LINESTRING" anyNumberOfTimes()
+ mockLineString.getShells _ expects() returning Seq(mockLineString) anyNumberOfTimes()
+ mockLineString.asSeq _ expects() returning Seq(mockPoint) anyNumberOfTimes()
+ mockLineString.intersection _ expects mockPoint returning mockPoint anyNumberOfTimes()
+ mockLineString.buffer _ expects * returning mockPolygon anyNumberOfTimes()
+ mockMultiLineString.getGeometryType _ expects() returning "MULTILINESTRING" anyNumberOfTimes()
+ mockMultiLineString.flatten _ expects() returning Seq(mockLineString) anyNumberOfTimes()
+ mockPolygon.getGeometryType _ expects() returning "POLYGON" anyNumberOfTimes()
+ mockPolygon.buffer _ expects * returning mockPolygon anyNumberOfTimes()
+ mockPolygon.isEmpty _ expects() returning false anyNumberOfTimes()
+ mockPolygon.boundary _ expects() returning mockLineString anyNumberOfTimes()
+ mockPolygon.simplify _ expects * returning mockPolygon anyNumberOfTimes()
+ mockIndexSystem.pointToIndex _ expects(*, *, *) returning 1L anyNumberOfTimes()
+ mockIndexSystem.getCellIdDataType _ expects() returning LongType anyNumberOfTimes()
+ (mockIndexSystem.indexToGeometry(_: Long, _: GeometryAPI)) expects(1, mockGeometryAPI) returning mockPoint anyNumberOfTimes()
+ (mockIndexSystem.kRing(_: Long, _: Int)) expects(1, 1) returning Seq(1L) anyNumberOfTimes()
+ mockIndexSystem.getBufferRadius _ expects(mockPolygon, 1, mockGeometryAPI) returning 1.0 anyNumberOfTimes()
+ mockIndexSystem.polyfill _ expects(mockPolygon, 1, Some(mockGeometryAPI)) returning Seq(1L) anyNumberOfTimes()
+ mockIndexSystem.getCoreChips _ expects(Seq(1L), false, mockGeometryAPI) returning Seq(mockMosaicChip) anyNumberOfTimes()
+ mockIndexSystem.getBorderChips _ expects(mockPolygon, Seq(), false, mockGeometryAPI) returning Seq() anyNumberOfTimes()
+ mockMosaicChip.cellIdAsLong _ expects mockIndexSystem returning 1L anyNumberOfTimes()
+ }
+ test("Mosaic should getChips") {
+ doMock()
+ val resolution = 1
+ Mosaic.getChips(mockPoint, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI) shouldBe a[Seq[MosaicChip]]
+ Mosaic.getChips(mockPoint, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI).map(_.cellIdAsLong(mockIndexSystem)) should contain theSameElementsAs Seq(1L)
+ Mosaic.getChips(mockMultiPoint, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI) shouldBe a[Seq[MosaicChip]]
+ Mosaic.getChips(mockMultiPoint, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI).map(_.cellIdAsLong(mockIndexSystem)) should contain theSameElementsAs Seq(1L)
+ Mosaic.getChips(mockLineString, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI) shouldBe a[Seq[MosaicChip]]
+ Mosaic.getChips(mockLineString, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI).map(_.cellIdAsLong(mockIndexSystem)) should contain theSameElementsAs Seq(1L)
+ Mosaic.getChips(mockMultiLineString, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI) shouldBe a[Seq[MosaicChip]]
+ Mosaic.getChips(mockMultiLineString, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI).map(_.cellIdAsLong(mockIndexSystem)) should contain theSameElementsAs Seq(1L)
+ Mosaic.getChips(mockPolygon, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI) shouldBe a[Seq[MosaicChip]]
+ Mosaic.getChips(mockPolygon, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI).map(_.cellIdAsLong(mockIndexSystem)) should contain theSameElementsAs Seq(1L)
+ }
+ test("Mosaic should mosaicFill for empty carved geometries") {
+ doMock()
+ val resolution = 1
+ val mockPolygon2 = mock[MosaicPolygon]
+ mockPolygon2.isEmpty _ expects() returning true anyNumberOfTimes()
+ mockPolygon2.getGeometryType _ expects() returning "POLYGON" anyNumberOfTimes()
+ mockPolygon2.buffer _ expects * returning mockPolygon2 anyNumberOfTimes()
+ mockPolygon2.simplify _ expects * returning mockPolygon2 anyNumberOfTimes()
+ mockIndexSystem.polyfill _ expects(mockPolygon2, 1, Some(mockGeometryAPI)) returning Seq(1L) anyNumberOfTimes()
+ mockIndexSystem.getBufferRadius _ expects(mockPolygon2, 1, mockGeometryAPI) returning 1.0 anyNumberOfTimes()
+ mockIndexSystem.getCoreChips _ expects(Seq(1L), true, mockGeometryAPI) returning Seq(mockMosaicChip) anyNumberOfTimes()
+ mockIndexSystem.getBorderChips _ expects(mockPolygon2, Seq(), true, mockGeometryAPI) returning Seq() anyNumberOfTimes()
+ Mosaic.getChips(mockPolygon2, resolution, keepCoreGeom = true, mockIndexSystem, mockGeometryAPI) shouldBe a[Seq[MosaicChip]]
+ Mosaic.getChips(mockPolygon2, resolution, keepCoreGeom = true, mockIndexSystem, mockGeometryAPI).map(_.cellIdAsLong(mockIndexSystem)) should contain theSameElementsAs Seq(1L)
+ }
+ test("Mosaic should fail for lineFill on polygon") {
+ doMock()
+ an[Error] should be thrownBy Mosaic.lineFill(mockPolygon, 1, mockIndexSystem, mockGeometryAPI)
+ }
+ test("Mosaic should implement geometry kRing") {
+ doMock()
+ (mockIndexSystem.kRing(_: Long, _: Int)) expects(1, 2) returning Seq(1L) anyNumberOfTimes()
+ Mosaic.geometryKRing(mockPolygon, 1, 2, mockIndexSystem, mockGeometryAPI) shouldBe a[Set[Long]]
+ Mosaic.geometryKRing(mockPolygon, 1, 2, mockIndexSystem, mockGeometryAPI) should contain theSameElementsAs Seq(1L)
+ }
+ test("Mosaic should implement geometry kLoop") {
+ doMock()
+ (mockIndexSystem.kLoop(_: Long, _: Int)) expects(1, 2) returning Seq(1L, 2L) anyNumberOfTimes()
+ Mosaic.geometryKLoop(mockPolygon, 1, 2, mockIndexSystem, mockGeometryAPI) shouldBe a[Set[Long]]
+ Mosaic.geometryKLoop(mockPolygon, 1, 2, mockIndexSystem, mockGeometryAPI) should contain theSameElementsAs Seq(2L)
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/codegen/format/ConvertToCodeGenTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/codegen/format/ConvertToCodeGenTest.scala
new file mode 100644
index 000000000..675c2c2b3
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/codegen/format/ConvertToCodeGenTest.scala
@@ -0,0 +1,203 @@
+package com.databricks.labs.mosaic.core.codegen.format
+import com.databricks.labs.mosaic.core.expressions.geometry.RequiresCRS
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.types.{GeoJSONType, HexType}
+import org.apache.spark.sql.catalyst.expressions.codegen._
+import org.apache.spark.sql.types.{BinaryType, CalendarIntervalType, StringType}
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class ConvertToCodeGenTest extends AnyFunSuite with MockFactory {
+ val mockGeometryAPI: GeometryAPI = mock[GeometryAPI]
+ val mockIO: GeometryIOCodeGen = mock[GeometryIOCodeGen]
+ val mocCtx: CodegenContext = mock[CodegenContext]
+ def doMock(): Unit = {
+ mockIO.fromWKT _ expects(mocCtx, "eval1", mockGeometryAPI) returning(
+ "Geometry geom1 = new Geometry(eval1.toString());", "geom1"
+ ) anyNumberOfTimes()
+ mockIO.fromWKB _ expects(mocCtx, "eval1", mockGeometryAPI) returning(
+ "Geometry geom1 = new Geometry(eval1.bytes());", "geom1"
+ ) anyNumberOfTimes()
+ mockIO.fromHex _ expects(mocCtx, "eval1", mockGeometryAPI) returning(
+ "Geometry geom1 = new Geometry(eval1.hex().toBytes());", "geom1"
+ ) anyNumberOfTimes()
+ mockIO.fromGeoJSON _ expects(mocCtx, "eval1", mockGeometryAPI) returning(
+ "Geometry geom1 = Geometry.parseJSON(eval1);", "geom1"
+ ) anyNumberOfTimes()
+ mockIO.toWKT _ expects(mocCtx, "geom1", mockGeometryAPI) returning(
+ "String wkt2 = geom1.toWKT();", "wkt2"
+ ) anyNumberOfTimes()
+ mockIO.toWKB _ expects(mocCtx, "geom1", mockGeometryAPI) returning(
+ "byte[] wkb2 = geom1.toWKB();", "wkb2"
+ ) anyNumberOfTimes()
+ mockIO.toHEX _ expects(mocCtx, "geom1", mockGeometryAPI) returning(
+ "String hex2 = geom1.toHex();", "hex2"
+ ) anyNumberOfTimes()
+ mockIO.toGeoJSON _ expects(mocCtx, "geom1", mockGeometryAPI) returning(
+ "String json2 = geom1.toJSON();", "json2"
+ ) anyNumberOfTimes()
+ mockGeometryAPI.ioCodeGen _ expects() returning mockIO anyNumberOfTimes()
+ }
+ test("ConvertToCodeGen should generate read code") {
+ doMock()
+ ConvertToCodeGen.readGeometryCode(
+ mocCtx, "eval1", StringType, mockGeometryAPI
+ ) shouldEqual mockIO.fromWKT(mocCtx, "eval1", mockGeometryAPI)
+ ConvertToCodeGen.readGeometryCode(
+ mocCtx, "eval1", BinaryType, mockGeometryAPI
+ ) shouldEqual mockIO.fromWKB(mocCtx, "eval1", mockGeometryAPI)
+ ConvertToCodeGen.readGeometryCode(
+ mocCtx, "eval1", HexType, mockGeometryAPI
+ ) shouldEqual mockIO.fromHex(mocCtx, "eval1", mockGeometryAPI)
+ ConvertToCodeGen.readGeometryCode(
+ mocCtx, "eval1", GeoJSONType, mockGeometryAPI
+ ) shouldEqual mockIO.fromGeoJSON(mocCtx, "eval1", mockGeometryAPI)
+ an[Error] should be thrownBy ConvertToCodeGen.readGeometryCode(
+ mocCtx, "eval1", CalendarIntervalType, mockGeometryAPI
+ )
+ }
+ test("ConvertToCodeGen should generate write code") {
+ doMock()
+ ConvertToCodeGen.writeGeometryCode(
+ mocCtx, "geom1", StringType, mockGeometryAPI
+ ) shouldEqual mockIO.toWKT(mocCtx, "geom1", mockGeometryAPI)
+ ConvertToCodeGen.writeGeometryCode(
+ mocCtx, "geom1", BinaryType, mockGeometryAPI
+ ) shouldEqual mockIO.toWKB(mocCtx, "geom1", mockGeometryAPI)
+ ConvertToCodeGen.writeGeometryCode(
+ mocCtx, "geom1", HexType, mockGeometryAPI
+ ) shouldEqual mockIO.toHEX(mocCtx, "geom1", mockGeometryAPI)
+ ConvertToCodeGen.writeGeometryCode(
+ mocCtx, "geom1", "GEOJSON", mockGeometryAPI
+ ) shouldEqual mockIO.toGeoJSON(mocCtx, "geom1", mockGeometryAPI)
+ an[Error] should be thrownBy ConvertToCodeGen.writeGeometryCode(
+ mocCtx, "eval1", "other", mockGeometryAPI
+ )
+ }
+ test("ConvertToCodeGen should generate code for different input and output types") {
+ doMock()
+ // Cannot mock due to inheritance issues
+ val valueEv = VariableValue("eval3", null)
+ val evCode = ExprCode(null, valueEv)
+ val expectedCode: String =
+ s"""
+ |Geometry geom1 = new Geometry(eval1.toString());
+ |byte[] wkb2 = geom1.toWKB();
+ |eval3 = wkb2;
+ |""".stripMargin
+ mockGeometryAPI.codeGenTryWrap _ expects expectedCode returning
+ s"""try{$expectedCode}""" anyNumberOfTimes()
+ val result = ConvertToCodeGen.fromEval(
+ mocCtx,
+ evCode,
+ "eval1",
+ StringType,
+ "WKB",
+ mockGeometryAPI
+ )
+ result.contains(expectedCode) shouldBe true
+ result.contains("try") && result.contains("{") && result.contains("}") shouldBe true
+ val nullSafeWrapper: (CodegenContext, ExprCode, String => String) => ExprCode = {
+ (_: CodegenContext, _: ExprCode, _: String => String) => {
+ val code = ConvertToCodeGen.fromEval(
+ mocCtx,
+ evCode,
+ "eval1",
+ StringType,
+ "WKB",
+ mockGeometryAPI)
+ ExprCode(null, VariableValue(code, null))
+ }
+ }
+ val codeGen = ConvertToCodeGen.doCodeGen(
+ mocCtx,
+ evCode,
+ nullSafeWrapper,
+ StringType,
+ "WKB",
+ mockGeometryAPI
+ )
+ codeGen.value.code.contains(expectedCode) shouldBe true
+ }
+ test("ConvertToCodeGen should generate code for same input and output types") {
+ doMock()
+ // Cannot mock due to inheritance issues
+ val valueEv = VariableValue("eval2", null)
+ val evCode = ExprCode(null, valueEv)
+ val expectedCode: String =
+ s"""
+ |eval2 = eval1;
+ |""".stripMargin
+ val result = ConvertToCodeGen.fromEval(
+ mocCtx,
+ evCode,
+ "eval1",
+ BinaryType,
+ "binary",
+ mockGeometryAPI
+ )
+ result.contains(expectedCode) shouldBe true
+ !result.contains("try") && !result.contains("{") && !result.contains("}") shouldBe true
+ val nullSafeWrapper: (CodegenContext, ExprCode, String => String) => ExprCode = {
+ (_: CodegenContext, _: ExprCode, _: String => String) => ExprCode(null, VariableValue(expectedCode, null))
+ }
+ val codeGen = ConvertToCodeGen.doCodeGen(
+ mocCtx,
+ evCode,
+ nullSafeWrapper,
+ BinaryType,
+ "WKB",
+ mockGeometryAPI
+ )
+ codeGen.value.code.contains(expectedCode) shouldBe true
+ }
+ test("RequiresCRS should return correct encoding for each geometry type") {
+ doMock()
+ object TestObject extends RequiresCRS {}
+ noException should be thrownBy TestObject.checkEncoding(GeoJSONType)
+ an[Exception] should be thrownBy TestObject.checkEncoding(StringType)
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryFormatTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryFormatTest.scala
new file mode 100644
index 000000000..9ef7874ff
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryFormatTest.scala
@@ -0,0 +1,19 @@
+package com.databricks.labs.mosaic.core.codegen.format
+import com.databricks.labs.mosaic.core.types.{GeoJSONType, HexType}
+import org.apache.spark.sql.types.{BinaryType, CalendarIntervalType, StringType}
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class GeometryFormatTest extends AnyFunSuite with MockFactory {
+ test("GeometryFormat should handle valid and invalid data types") {
+ noException should be thrownBy GeometryFormat.getDefaultFormat(BinaryType)
+ noException should be thrownBy GeometryFormat.getDefaultFormat(StringType)
+ noException should be thrownBy GeometryFormat.getDefaultFormat(HexType)
+ noException should be thrownBy GeometryFormat.getDefaultFormat(GeoJSONType)
+ an[Error] should be thrownBy GeometryFormat.getDefaultFormat(CalendarIntervalType)
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/crs/CRSBoundsProviderTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/crs/CRSBoundsProviderTest.scala
new file mode 100644
index 000000000..33f47702e
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/crs/CRSBoundsProviderTest.scala
@@ -0,0 +1,74 @@
+package com.databricks.labs.mosaic.core.crs
+import com.databricks.labs.mosaic.core.expressions.geometry.RequiresCRS
+import com.databricks.labs.mosaic.core.geometry.MosaicPoint
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.types.GeoJSONType
+import org.apache.spark.sql.types.StringType
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class CRSBoundsProviderTest extends AnyFunSuite with MockFactory {
+ val mockGeometryAPI: GeometryAPI = mock[GeometryAPI]
+ val mockPoint1: MosaicPoint = mock[MosaicPoint]
+ val mockPoint2: MosaicPoint = mock[MosaicPoint]
+ def doMock(): Unit = {
+ mockPoint1.getX _ expects() returning -180.00 anyNumberOfTimes()
+ mockPoint1.getY _ expects() returning -90.00 anyNumberOfTimes()
+ mockPoint2.getX _ expects() returning 180.00 anyNumberOfTimes()
+ mockPoint2.getY _ expects() returning 90.00 anyNumberOfTimes()
+ mockGeometryAPI.fromCoords _ expects Seq(-180.00, -90.00) returning mockPoint1 anyNumberOfTimes()
+ mockGeometryAPI.fromCoords _ expects Seq(180.00, 90.00) returning mockPoint2 anyNumberOfTimes()
+ }
+ test("CRSBoundsProvider should load resource file and return correct bounds for EPSG:4326") {
+ doMock()
+ val boundsProvider = CRSBoundsProvider(geometryAPI = mockGeometryAPI)
+ val bounds4326 = boundsProvider.bounds("EPSG", 4326)
+ bounds4326.lowerLeft.getX shouldBe -180.00
+ bounds4326.lowerLeft.getY shouldBe -90.00
+ bounds4326.upperRight.getX shouldBe 180.00
+ bounds4326.upperRight.getY shouldBe 90.00
+ bounds4326.getUpperX shouldBe 180.00
+ bounds4326.getUpperY shouldBe 90.00
+ bounds4326.getLowerX shouldBe -180.00
+ bounds4326.getLowerY shouldBe -90.00
+ }
+ test("CRSBoundsProvider should load resource file and return correct reprojected bounds for EPSG:4326") {
+ doMock()
+ val boundsProvider = CRSBoundsProvider(geometryAPI = mockGeometryAPI)
+ val bounds4326 = boundsProvider.reprojectedBounds("EPSG", 4326)
+ bounds4326.lowerLeft.getX shouldBe -180.00
+ bounds4326.lowerLeft.getY shouldBe -90.00
+ bounds4326.upperRight.getX shouldBe 180.00
+ bounds4326.upperRight.getY shouldBe 90.00
+ }
+ test("CRSBoundsProvider should fail to load resource file and throw exception for EPSG:-9999") {
+ doMock()
+ val boundsProvider = CRSBoundsProvider(geometryAPI = mockGeometryAPI)
+ an[Exception] should be thrownBy boundsProvider.bounds("EPSG", 9999)
+ an[Exception] should be thrownBy boundsProvider.reprojectedBounds("EPSG", 9999)
+ }
+ test("RequiresCRS should return correct encoding for each geometry type") {
+ object TestObject extends RequiresCRS {}
+ noException should be thrownBy TestObject.checkEncoding(GeoJSONType)
+ an[Exception] should be thrownBy TestObject.checkEncoding(StringType)
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/GenericExpressionFactoryTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/GenericExpressionFactoryTest.scala
new file mode 100644
index 000000000..04c5a03a4
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/GenericExpressionFactoryTest.scala
@@ -0,0 +1,28 @@
+package com.databricks.labs.mosaic.core.expressions
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
+import org.apache.spark.sql.catalyst.expressions.{Add, Expression}
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class GenericExpressionFactoryTest extends AnyFunSuite with MockFactory {
+ test("GenericExpressionFactory should make copy of Add expression") {
+ val addExpression = Add(mock[Expression], mock[Expression])
+ val newArgs = Array(mock[Expression], mock[Expression])
+ GenericExpressionFactory.makeCopyImpl[Add](
+ addExpression, newArgs.map(_.asInstanceOf[AnyRef]), 2, mock[MosaicExpressionConfig]
+ ) shouldBe Add(newArgs(0), newArgs(1))
+ }
+ test("GenericExpressionFactory should generate a base builder") {
+ val mockExpr = mock[Expression]
+ mockExpr.toString _ expects() returning "mockExpr" anyNumberOfTimes()
+ GenericExpressionFactory.getBaseBuilder[Add](2, mock[MosaicExpressionConfig]) shouldBe a[FunctionBuilder]
+ val builder = GenericExpressionFactory.getBaseBuilder[Add](2, mock[MosaicExpressionConfig])
+ builder.apply(Seq(mockExpr, mockExpr)) shouldBe Add(mockExpr, mockExpr)
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/MosaicExpressionConfigTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/MosaicExpressionConfigTest.scala
new file mode 100644
index 000000000..4a3f8a69c
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/MosaicExpressionConfigTest.scala
@@ -0,0 +1,55 @@
+package com.databricks.labs.mosaic.core.expressions
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import com.databricks.labs.mosaic.core.raster.RasterAPI
+import org.apache.spark.SharedSparkContext
+import org.apache.spark.sql.SparkSession
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+//noinspection ScalaUnusedSymbol
+class MosaicExpressionConfigTest extends AnyFunSuite with SharedSparkContext with MockFactory {
+ test("MosaicExpressionConfig") {
+ val spark = SparkSession.builder().getOrCreate()
+ spark.sparkContext.setLogLevel("FATAL")
+ val mockGeometryAPI = mock[GeometryAPI]
+ val mockIndexSystem = mock[IndexSystem]
+ val mockRasterAPI = mock[RasterAPI]
+ // Class paths match the scalamock macro generated classes, the order needs to be persevered
+ val configs = Map(
+ MOSAIC_GEOMETRY_API -> "com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$1",
+ MOSAIC_INDEX_SYSTEM -> "com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$2",
+ MOSAIC_RASTER_API -> "com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$3",
+ MOSAIC_RASTER_CHECKPOINT -> "mosaic-raster-checkpoint"
+ )
+ val mosaicExpressionConfig = MosaicExpressionConfig(configs)
+ noException should be thrownBy mosaicExpressionConfig.updateSparkConf()
+ noException should be thrownBy mosaicExpressionConfig.getGeometryAPI(Array(this))
+ noException should be thrownBy mosaicExpressionConfig.getIndexSystem(Array(this))
+ noException should be thrownBy mosaicExpressionConfig.getRasterAPI(Array(this))
+ noException should be thrownBy mosaicExpressionConfig.getRasterCheckpoint
+ noException should be thrownBy mosaicExpressionConfig.setGeometryAPI("geometryAPI")
+ noException should be thrownBy mosaicExpressionConfig.setIndexSystem("indexSystem")
+ noException should be thrownBy mosaicExpressionConfig.setRasterAPI("rasterAPI")
+ noException should be thrownBy mosaicExpressionConfig.setRasterCheckpoint("rasterCheckpoint")
+ noException should be thrownBy mosaicExpressionConfig.setConfig("key", "value")
+ spark.conf.set(MOSAIC_GEOMETRY_API, "com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$1")
+ spark.conf.set(MOSAIC_INDEX_SYSTEM, "com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$2")
+ spark.conf.set(MOSAIC_RASTER_API, "com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$3")
+ spark.conf.set(MOSAIC_RASTER_CHECKPOINT, "mosaic-raster-checkpoint")
+ MosaicExpressionConfig(spark) shouldBe a[MosaicExpressionConfig]
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/WithExpressionInfoTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/WithExpressionInfoTest.scala
new file mode 100644
index 000000000..1ed4b956c
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/WithExpressionInfoTest.scala
@@ -0,0 +1,25 @@
+package com.databricks.labs.mosaic.core.expressions
+import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+//noinspection ScalaUnusedSymbol
+class WithExpressionInfoTest extends AnyFunSuite with MockFactory {
+ abstract class TestExpression extends WithExpressionInfo {
+ override final def usage: String = super.usage
+ override final def example: String = super.example
+ override final def group: String = super.group
+ override final def database: Option[String] = super.database
+ }
+ val mockExpression: TestExpression = mock[TestExpression]
+ test("MosaicExpressionConfig") {
+ mockExpression.name _ expects() returning "test"
+ mockExpression.getExpressionInfo() shouldBe a[ExpressionInfo]
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/BinaryVectorExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/BinaryVectorExpressionTest.scala
new file mode 100644
index 000000000..0800b44fc
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/BinaryVectorExpressionTest.scala
@@ -0,0 +1,122 @@
+package com.databricks.labs.mosaic.core.expressions.geometry
+import com.databricks.labs.mosaic.core.codegen.format.GeometryIOCodeGen
+import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import org.apache.spark.sql.catalyst.expressions.{Add, Expression}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, VariableValue}
+import org.apache.spark.sql.types.{BinaryType, StringType}
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class BinaryVectorExpressionTest extends AnyFunSuite with MockFactory {
+ val mockLeftExpression: Expression = mock[Expression]
+ val mockRightExpression: Expression = mock[Expression]
+ val mockGeometryAPI: GeometryAPI = mock[GeometryAPI]
+ val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig]
+ // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class
+ // We are using Add as a template in order to test makeCopy which is linked to GenericExpressionFactory
+ abstract class DummyExpr extends BinaryVectorExpression[Add](
+ mockLeftExpression, mockRightExpression, true, mockExpressionConfig
+ ) {
+ // For partial mocking, make methods that are testable final, scalamock will not mock final methods
+ override final def nullSafeEval(leftGeometryRow: Any, rightGeometryRow: Any): Any =
+ super.nullSafeEval(leftGeometryRow, rightGeometryRow)
+ override final def left: Expression = super.left
+ override final def right: Expression = super.right
+ override final def geometryAPI: GeometryAPI = super.geometryAPI
+ override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs)
+ override final def withNewChildrenInternal(newFirst: Expression, newSecond: Expression): Expression =
+ super.withNewChildrenInternal(newFirst, newSecond)
+ override final def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = super.doGenCode(ctx, ev)
+ // We are making inherited nullSafeCodeGen final and passthrough so that we can test the nested behavior in doGenCode
+ override final def nullSafeCodeGen(ctx: CodegenContext, ev: ExprCode, f: (String, String) => String): ExprCode = {
+ ExprCode(null, VariableValue(f("geom1", "geom2"), null))
+ }
+ }
+ val bytes: Array[Byte] = "POINT EMPTY".getBytes
+ val mockExpression: DummyExpr = mock[DummyExpr]
+ val mockIndexSystem: IndexSystem = mock[IndexSystem]
+ val mockPoint: MosaicGeometry = mock[MosaicGeometry]
+ val mockCtx: CodegenContext = mock[CodegenContext]
+ val mockIO: GeometryIOCodeGen = mock[GeometryIOCodeGen]
+ val expectedCode: String =
+ """
+ |Geometry geom1 = Geometry(wkb1);
+ |Geometry geom2 = Geometry(wkt1);
+ |MosaicGeometry geom3 = MosaicGeometry(geom1).add(MosaicGeometry(geom2));
+ |byte[] wkb3 = geom3.toWKB();
+ |eval1 = wkb3;
+ |""".stripMargin
+ def doMock(): Unit = {
+ mockExpressionConfig.getGeometryAPI _ expects * returning mockGeometryAPI anyNumberOfTimes()
+ mockLeftExpression.dataType _ expects() returning BinaryType anyNumberOfTimes()
+ mockRightExpression.dataType _ expects() returning StringType anyNumberOfTimes()
+ mockGeometryAPI.valueToGeometry _ expects(bytes, mockLeftExpression.dataType) returning mockPoint anyNumberOfTimes()
+ mockGeometryAPI.valueToGeometry _ expects("POINT EMPTY", mockRightExpression.dataType) returning mockPoint anyNumberOfTimes()
+ mockGeometryAPI.ioCodeGen _ expects() returning mockIO anyNumberOfTimes()
+ mockGeometryAPI.codeGenTryWrap _ expects expectedCode returning expectedCode anyNumberOfTimes()
+ mockIO.fromWKB _ expects(mockCtx, "geom1", mockGeometryAPI) returning ("Geometry geom1 = Geometry(wkb1);", "geom1") anyNumberOfTimes()
+ mockIO.fromWKT _ expects(mockCtx, "geom2", mockGeometryAPI) returning ("Geometry geom2 = Geometry(wkt1);", "geom2") anyNumberOfTimes()
+ mockExpression.geometryTransform _ expects(*, *) returning mockPoint anyNumberOfTimes()
+ mockExpression.serialise _ expects(mockPoint, true, BinaryType) returning bytes anyNumberOfTimes()
+ mockExpression.mosaicGeometryRef _ expects "geom1" returning "MosaicGeometry(geom1)" anyNumberOfTimes()
+ mockExpression.mosaicGeometryRef _ expects "geom2" returning "MosaicGeometry(geom2)" anyNumberOfTimes()
+ mockExpression.geometryCodeGen _ expects(*, *, *) returning ("MosaicGeometry geom3 = MosaicGeometry(geom1).add(MosaicGeometry(geom2));", "geom3") anyNumberOfTimes()
+ mockExpression.serialiseCodegen _ expects(*, *, *, *) returning ("byte[] wkb3 = geom3.toWKB();", "wkb3") anyNumberOfTimes()
+ }
+ test("BinaryVectorExpression should implement accessor methods") {
+ doMock()
+ mockExpression.left shouldBe mockLeftExpression
+ mockExpression.right shouldBe mockRightExpression
+ mockExpression.geometryAPI shouldBe mockGeometryAPI
+ mockExpression.makeCopy(Array(mockLeftExpression, mockRightExpression)) shouldBe Add(mockLeftExpression, mockRightExpression)
+ mockExpression.withNewChildrenInternal(mockLeftExpression, mockRightExpression) shouldBe Add(mockLeftExpression, mockRightExpression)
+ }
+ test("VectorExpression should evaluate") {
+ doMock()
+ val result = mockExpression.nullSafeEval(
+ bytes,
+ )
+ result shouldBe bytes
+ }
+ test("VectorExpression should doGenCode") {
+ doMock()
+ val exprCode = ExprCode(null, VariableValue("eval1", null))
+ mockExpression.doGenCode(mockCtx, exprCode).value.code shouldBe expectedCode
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/RequiresCRSTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/RequiresCRSTest.scala
new file mode 100644
index 000000000..c3fbe4a18
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/RequiresCRSTest.scala
@@ -0,0 +1,17 @@
+package com.databricks.labs.mosaic.core.expressions.geometry
+import com.databricks.labs.mosaic.core.types.GeoJSONType
+import org.apache.spark.sql.types.StringType
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class RequiresCRSTest extends AnyFunSuite with MockFactory {
+ test("RequiresCRS should return correct encoding for each geometry type") {
+ object TestObject extends RequiresCRS {}
+ noException should be thrownBy TestObject.checkEncoding(GeoJSONType)
+ an[Exception] should be thrownBy TestObject.checkEncoding(StringType)
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector1ArgExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector1ArgExpressionTest.scala
new file mode 100644
index 000000000..065b1d7af
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector1ArgExpressionTest.scala
@@ -0,0 +1,113 @@
+package com.databricks.labs.mosaic.core.expressions.geometry
+import com.databricks.labs.mosaic.core.codegen.format.GeometryIOCodeGen
+import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, VariableValue}
+import org.apache.spark.sql.catalyst.expressions.{Add, Expression}
+import org.apache.spark.sql.types.BinaryType
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class UnaryVector1ArgExpressionTest extends AnyFunSuite with MockFactory {
+ val mockLeftExpression: Expression = mock[Expression]
+ val mockRightExpression: Expression = mock[Expression]
+ val mockGeometryAPI: GeometryAPI = mock[GeometryAPI]
+ val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig]
+ // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class
+ // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory
+ abstract class DummyExpr extends UnaryVector1ArgExpression[Add](
+ mockLeftExpression, mockRightExpression, true, mockExpressionConfig
+ ) {
+ // For partial mocking, make methods that are testable final, scalamock will not mock final methods
+ override final def nullSafeEval(leftGeometryRow: Any, arg1Row: Any): Any =
+ super.nullSafeEval(leftGeometryRow, arg1Row)
+ override final def left: Expression = super.left
+ override final def right: Expression = super.right
+ override final def geometryAPI: GeometryAPI = super.geometryAPI
+ override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs)
+ override final def withNewChildrenInternal(newFirst: Expression, newSecond: Expression): Expression =
+ super.withNewChildrenInternal(newFirst, newSecond)
+ override final def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = super.doGenCode(ctx, ev)
+ // We are making inherited nullSafeCodeGen final and passthrough so that we can test the nested behavior in doGenCode
+ override final def nullSafeCodeGen(ctx: CodegenContext, ev: ExprCode, f: (String, String) => String): ExprCode = {
+ ExprCode(null, VariableValue(f("geom1", "arg1"), null))
+ }
+ }
+ val bytes: Array[Byte] = "POINT EMPTY".getBytes
+ val mockExpression: DummyExpr = mock[DummyExpr]
+ val mockIndexSystem: IndexSystem = mock[IndexSystem]
+ val mockPoint: MosaicGeometry = mock[MosaicGeometry]
+ val mockCtx: CodegenContext = mock[CodegenContext]
+ val mockIO: GeometryIOCodeGen = mock[GeometryIOCodeGen]
+ val expectedCode: String =
+ """
+ |Geometry geom1 = Geometry(wkb1);
+ |MosaicGeometry geom3 = MosaicGeometry(geom1).buffer(arg1);
+ |byte[] wkb3 = geom3.toWKB();
+ |eval1 = wkb3;
+ |""".stripMargin
+ def doMock(): Unit = {
+ mockExpressionConfig.getGeometryAPI _ expects * returning mockGeometryAPI anyNumberOfTimes()
+ mockLeftExpression.dataType _ expects() returning BinaryType anyNumberOfTimes()
+ mockGeometryAPI.valueToGeometry _ expects(bytes, mockLeftExpression.dataType) returning mockPoint anyNumberOfTimes()
+ mockGeometryAPI.ioCodeGen _ expects() returning mockIO anyNumberOfTimes()
+ mockGeometryAPI.codeGenTryWrap _ expects expectedCode returning expectedCode anyNumberOfTimes()
+ mockIO.fromWKB _ expects(mockCtx, "geom1", mockGeometryAPI) returning("Geometry geom1 = Geometry(wkb1);", "geom1") anyNumberOfTimes()
+ mockExpression.geometryTransform _ expects (*, *) returning mockPoint anyNumberOfTimes()
+ mockExpression.serialise _ expects(mockPoint, true, BinaryType) returning bytes anyNumberOfTimes()
+ mockExpression.mosaicGeometryRef _ expects "geom1" returning "MosaicGeometry(geom1)" anyNumberOfTimes()
+ mockExpression.geometryCodeGen _ expects(*, *, *) returning("MosaicGeometry geom3 = MosaicGeometry(geom1).buffer(arg1);", "geom3") anyNumberOfTimes()
+ mockExpression.serialiseCodegen _ expects(*, *, *, *) returning("byte[] wkb3 = geom3.toWKB();", "wkb3") anyNumberOfTimes()
+ }
+ test("BinaryVectorExpression should implement accessor methods") {
+ doMock()
+ mockExpression.left shouldBe mockLeftExpression
+ mockExpression.right shouldBe mockRightExpression
+ mockExpression.geometryAPI shouldBe mockGeometryAPI
+ mockExpression.makeCopy(Array(mockLeftExpression, mockRightExpression)) shouldBe Add(mockLeftExpression, mockRightExpression)
+ mockExpression.withNewChildrenInternal(mockLeftExpression, mockRightExpression) shouldBe Add(mockLeftExpression, mockRightExpression)
+ }
+ test("VectorExpression should evaluate") {
+ doMock()
+ val result = mockExpression.nullSafeEval(bytes, 1)
+ result shouldBe bytes
+ }
+ test("VectorExpression should doGenCode") {
+ doMock()
+ val exprCode = ExprCode(null, VariableValue("eval1", null))
+ mockExpression.doGenCode(mockCtx, exprCode).value.code shouldBe expectedCode
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector2ArgExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector2ArgExpressionTest.scala
new file mode 100644
index 000000000..8eab70db5
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector2ArgExpressionTest.scala
@@ -0,0 +1,119 @@
+package com.databricks.labs.mosaic.core.expressions.geometry
+import com.databricks.labs.mosaic.core.codegen.format.GeometryIOCodeGen
+import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, VariableValue}
+import org.apache.spark.sql.catalyst.expressions.{Conv, Expression}
+import org.apache.spark.sql.types.BinaryType
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class UnaryVector2ArgExpressionTest extends AnyFunSuite with MockFactory {
+ val mockFirstExpression: Expression = mock[Expression]
+ val mockSecondExpression: Expression = mock[Expression]
+ val mockThirdExpression: Expression = mock[Expression]
+ val mockGeometryAPI: GeometryAPI = mock[GeometryAPI]
+ val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig]
+ // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class
+ // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory
+ abstract class DummyExpr extends UnaryVector2ArgExpression[Conv](
+ mockFirstExpression, mockSecondExpression, mockThirdExpression, true, mockExpressionConfig
+ ) {
+ // For partial mocking, make methods that are testable final, scalamock will not mock final methods
+ override final def nullSafeEval(leftGeometryRow: Any, arg1Row: Any, arg2Row: Any): Any =
+ super.nullSafeEval(leftGeometryRow, arg1Row, arg2Row)
+ override final def first: Expression = super.first
+ override final def second: Expression = super.second
+ override final def third: Expression = super.third
+ override final def geometryAPI: GeometryAPI = super.geometryAPI
+ override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs)
+ override final def withNewChildrenInternal(newFirst: Expression, newSecond: Expression, newThird: Expression): Expression =
+ super.withNewChildrenInternal(newFirst, newSecond, newThird)
+ override final def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = super.doGenCode(ctx, ev)
+ // We are making inherited nullSafeCodeGen final and passthrough so that we can test the nested behavior in doGenCode
+ override final def nullSafeCodeGen(ctx: CodegenContext, ev: ExprCode, f: (String, String, String) => String): ExprCode = {
+ ExprCode(null, VariableValue(f("geom1", "arg1", "arg2"), null))
+ }
+ }
+ val bytes: Array[Byte] = "POINT EMPTY".getBytes
+ val mockExpression: DummyExpr = mock[DummyExpr]
+ val mockIndexSystem: IndexSystem = mock[IndexSystem]
+ val mockPoint: MosaicGeometry = mock[MosaicGeometry]
+ val mockCtx: CodegenContext = mock[CodegenContext]
+ val mockIO: GeometryIOCodeGen = mock[GeometryIOCodeGen]
+ val expectedCode: String =
+ """
+ |Geometry geom1 = Geometry(wkb1);
+ |MosaicGeometry geom3 = MosaicGeometry(geom1).buffer(arg1 + arg2);
+ |byte[] wkb3 = geom3.toWKB();
+ |eval1 = wkb3;
+ |""".stripMargin
+ def doMock(): Unit = {
+ mockExpressionConfig.getGeometryAPI _ expects * returning mockGeometryAPI anyNumberOfTimes()
+ mockFirstExpression.dataType _ expects() returning BinaryType anyNumberOfTimes()
+ mockGeometryAPI.valueToGeometry _ expects(bytes, mockFirstExpression.dataType) returning mockPoint anyNumberOfTimes()
+ mockGeometryAPI.ioCodeGen _ expects() returning mockIO anyNumberOfTimes()
+ mockGeometryAPI.codeGenTryWrap _ expects expectedCode returning expectedCode anyNumberOfTimes()
+ mockIO.fromWKB _ expects(mockCtx, "geom1", mockGeometryAPI) returning("Geometry geom1 = Geometry(wkb1);", "geom1") anyNumberOfTimes()
+ mockExpression.geometryTransform _ expects (*, *, *) returning mockPoint anyNumberOfTimes()
+ mockExpression.serialise _ expects(mockPoint, true, BinaryType) returning bytes anyNumberOfTimes()
+ mockExpression.mosaicGeometryRef _ expects "geom1" returning "MosaicGeometry(geom1)" anyNumberOfTimes()
+ mockExpression.geometryCodeGen _ expects(*, *, *, *) returning("MosaicGeometry geom3 = MosaicGeometry(geom1).buffer(arg1 + arg2);", "geom3") anyNumberOfTimes()
+ mockExpression.serialiseCodegen _ expects(*, *, *, *) returning("byte[] wkb3 = geom3.toWKB();", "wkb3") anyNumberOfTimes()
+ }
+ test("BinaryVectorExpression should implement accessor methods") {
+ doMock()
+ mockExpression.first shouldBe mockFirstExpression
+ mockExpression.second shouldBe mockSecondExpression
+ mockExpression.third shouldBe mockThirdExpression
+ mockExpression.geometryAPI shouldBe mockGeometryAPI
+ mockExpression.makeCopy(Array(mockFirstExpression, mockSecondExpression, mockThirdExpression)) shouldBe
+ Conv(mockFirstExpression, mockSecondExpression, mockThirdExpression)
+ mockExpression.withNewChildrenInternal(mockFirstExpression, mockSecondExpression, mockThirdExpression) shouldBe
+ Conv(mockFirstExpression, mockSecondExpression, mockThirdExpression)
+ }
+ test("VectorExpression should evaluate") {
+ doMock()
+ val result = mockExpression.nullSafeEval(bytes, 1, 2)
+ result shouldBe bytes
+ }
+ test("VectorExpression should doGenCode") {
+ doMock()
+ val exprCode = ExprCode(null, VariableValue("eval1", null))
+ mockExpression.doGenCode(mockCtx, exprCode).value.code shouldBe expectedCode
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVectorExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVectorExpressionTest.scala
new file mode 100644
index 000000000..7e5dfae9e
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVectorExpressionTest.scala
@@ -0,0 +1,109 @@
+package com.databricks.labs.mosaic.core.expressions.geometry
+import com.databricks.labs.mosaic.core.codegen.format.GeometryIOCodeGen
+import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, VariableValue}
+import org.apache.spark.sql.catalyst.expressions.{Abs, Expression}
+import org.apache.spark.sql.types.BinaryType
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class UnaryVectorExpressionTest extends AnyFunSuite with MockFactory {
+ val mockLeftExpression: Expression = mock[Expression]
+ val mockGeometryAPI: GeometryAPI = mock[GeometryAPI]
+ val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig]
+ // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class
+ // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory
+ abstract class DummyExpr extends UnaryVectorExpression[Abs](
+ mockLeftExpression, true, mockExpressionConfig
+ ) {
+ // For partial mocking, make methods that are testable final, scalamock will not mock final methods
+ override final def nullSafeEval(leftGeometryRow: Any): Any =
+ super.nullSafeEval(leftGeometryRow)
+ override final def child: Expression = super.child
+ override final def geometryAPI: GeometryAPI = super.geometryAPI
+ override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs)
+ override final def withNewChildInternal(newFirst: Expression): Expression =
+ super.withNewChildInternal(newFirst)
+ override final def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = super.doGenCode(ctx, ev)
+ // We are making inherited nullSafeCodeGen final and passthrough so that we can test the nested behavior in doGenCode
+ override final def nullSafeCodeGen(ctx: CodegenContext, ev: ExprCode, f: String => String): ExprCode = {
+ ExprCode(null, VariableValue(f("geom1"), null))
+ }
+ }
+ val bytes: Array[Byte] = "POINT EMPTY".getBytes
+ val mockExpression: DummyExpr = mock[DummyExpr]
+ val mockIndexSystem: IndexSystem = mock[IndexSystem]
+ val mockPoint: MosaicGeometry = mock[MosaicGeometry]
+ val mockCtx: CodegenContext = mock[CodegenContext]
+ val mockIO: GeometryIOCodeGen = mock[GeometryIOCodeGen]
+ val expectedCode: String =
+ """
+ |Geometry geom1 = Geometry(wkb1);
+ |MosaicGeometry geom3 = MosaicGeometry(geom1);
+ |byte[] wkb3 = geom3.toWKB();
+ |eval1 = wkb3;
+ |""".stripMargin
+ def doMock(): Unit = {
+ mockExpressionConfig.getGeometryAPI _ expects * returning mockGeometryAPI anyNumberOfTimes()
+ mockLeftExpression.dataType _ expects() returning BinaryType anyNumberOfTimes()
+ mockGeometryAPI.valueToGeometry _ expects(bytes, mockLeftExpression.dataType) returning mockPoint anyNumberOfTimes()
+ mockGeometryAPI.ioCodeGen _ expects() returning mockIO anyNumberOfTimes()
+ mockGeometryAPI.codeGenTryWrap _ expects expectedCode returning expectedCode anyNumberOfTimes()
+ mockIO.fromWKB _ expects(mockCtx, "geom1", mockGeometryAPI) returning ("Geometry geom1 = Geometry(wkb1);", "geom1") anyNumberOfTimes()
+ mockExpression.geometryTransform _ expects * returning mockPoint anyNumberOfTimes()
+ mockExpression.serialise _ expects(mockPoint, true, BinaryType) returning bytes anyNumberOfTimes()
+ mockExpression.mosaicGeometryRef _ expects "geom1" returning "MosaicGeometry(geom1)" anyNumberOfTimes()
+ mockExpression.geometryCodeGen _ expects(*, *) returning ("MosaicGeometry geom3 = MosaicGeometry(geom1);", "geom3") anyNumberOfTimes()
+ mockExpression.serialiseCodegen _ expects(*, *, *, *) returning ("byte[] wkb3 = geom3.toWKB();", "wkb3") anyNumberOfTimes()
+ }
+ test("BinaryVectorExpression should implement accessor methods") {
+ doMock()
+ mockExpression.child shouldBe mockLeftExpression
+ mockExpression.geometryAPI shouldBe mockGeometryAPI
+ mockExpression.makeCopy(Array(mockLeftExpression)) shouldBe Abs(mockLeftExpression)
+ mockExpression.withNewChildInternal(mockLeftExpression) shouldBe Abs(mockLeftExpression)
+ }
+ test("VectorExpression should evaluate") {
+ doMock()
+ val result = mockExpression.nullSafeEval(bytes)
+ result shouldBe bytes
+ }
+ test("VectorExpression should doGenCode") {
+ doMock()
+ val exprCode = ExprCode(null, VariableValue("eval1", null))
+ mockExpression.doGenCode(mockCtx, exprCode).value.code shouldBe expectedCode
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/VectorExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/VectorExpressionTest.scala
new file mode 100644
index 000000000..7e13b36d9
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/VectorExpressionTest.scala
@@ -0,0 +1,106 @@
+package com.databricks.labs.mosaic.core.expressions.geometry
+import com.databricks.labs.mosaic.core.codegen.format.GeometryIOCodeGen
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+import org.apache.spark.sql.types.{BinaryType, StringType}
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class VectorExpressionTest extends AnyFunSuite with MockFactory {
+ val mockExpression: VectorExpression = mock[VectorExpression]
+ val mockIndexSystem: IndexSystem = mock[IndexSystem]
+ val mockGeometryAPI: GeometryAPI = mock[GeometryAPI]
+ val mockPoint: MosaicGeometry = mock[MosaicGeometry]
+ val mockCtx: CodegenContext = mock[CodegenContext]
+ val mockIO: GeometryIOCodeGen = mock[GeometryIOCodeGen]
+ def doMock(): Unit = {
+ mockExpression.geometryAPI _ expects() returning mockGeometryAPI anyNumberOfTimes()
+ mockExpression.mosaicGeomClass _ expects() returning "M_GEOMETRY" anyNumberOfTimes()
+ mockExpression.geomClass _ expects() returning "GEOMETRY" anyNumberOfTimes()
+ mockExpression.CRSBoundsProviderClass _ expects() returning "CRSBoundsProvider" anyNumberOfTimes()
+ mockExpression.geometryAPIClass _ expects() returning "GEOMETRY_API" anyNumberOfTimes()
+ mockIndexSystem.name _ expects() returning "INDEX_SYSTEM" anyNumberOfTimes()
+ mockGeometryAPI.name _ expects() returning "GEOMETRY_API" anyNumberOfTimes()
+ mockGeometryAPI.serialize _ expects(mockPoint, StringType) returning "POINT EMPTY" anyNumberOfTimes()
+ mockGeometryAPI.ioCodeGen _ expects() returning mockIO anyNumberOfTimes()
+ mockGeometryAPI.geometryClass _ expects() returning "GEOMETRY" anyNumberOfTimes()
+ mockGeometryAPI.mosaicGeometryClass _ expects() returning "M_GEOMETRY" anyNumberOfTimes()
+ mockPoint.toWKT _ expects() returning "POINT EMPTY" anyNumberOfTimes()
+ mockCtx.freshName _ expects "baseGeometry" returning "baseGeometry1" anyNumberOfTimes()
+ mockIO.toWKT _ expects(mockCtx, "baseGeometry1", mockGeometryAPI) returning
+ ("String wkt1 = baseGeometry1.toWKT();", "wkt1") anyNumberOfTimes()
+ }
+ test("VectorExpression should be correctly initialised from expressionConfig") {
+ doMock()
+ mockExpression.mosaicGeomClass shouldBe "M_GEOMETRY"
+ mockExpression.geomClass shouldBe "GEOMETRY"
+ mockExpression.CRSBoundsProviderClass shouldBe "CRSBoundsProvider"
+ mockExpression.geometryAPIClass shouldBe "GEOMETRY_API"
+ }
+ test("VectorExpression should serialise a result or a geometry result") {
+ doMock()
+ object TestObject extends VectorExpression {
+ override def geometryAPI: GeometryAPI = mockGeometryAPI
+ }
+ TestObject.serialise("POINT EMPTY", returnsGeometry = false, StringType) shouldBe "POINT EMPTY"
+ TestObject.serialise("POINT EMPTY".getBytes, returnsGeometry = false, BinaryType) shouldBe "POINT EMPTY".getBytes
+ TestObject.serialise(mockPoint, returnsGeometry = true, StringType) shouldBe "POINT EMPTY"
+ }
+ test("VectorExpression should generate serialise code") {
+ doMock()
+ object TestObject extends VectorExpression {
+ override def geometryAPI: GeometryAPI = mockGeometryAPI
+ }
+ TestObject.serialiseCodegen("geom1", returnsGeometry = true, StringType, mockCtx) shouldBe
+ (
+ """
+ |GEOMETRY baseGeometry1 = geom1.getGeom();
+ |String wkt1 = baseGeometry1.toWKT();
+ |""".stripMargin, "wkt1")
+ TestObject.serialiseCodegen("geom1", returnsGeometry = false, StringType, mockCtx) shouldBe
+ ("", "geom1")
+ }
+ test("VectorExpression should return correct mosaicGeometryRef") {
+ doMock()
+ object TestObject extends VectorExpression {
+ override def geometryAPI: GeometryAPI = mockGeometryAPI
+ }
+ TestObject.mosaicGeometryRef("geom1") shouldBe "M_GEOMETRY.apply(geom1)"
+ }
+ test("VectorExpression should implement accessor methods") {
+ doMock()
+ object TestObject extends VectorExpression {
+ override def geometryAPI: GeometryAPI = mockGeometryAPI
+ }
+ noException should be thrownBy TestObject.mosaicGeomClass
+ noException should be thrownBy TestObject.geomClass
+ noException should be thrownBy TestObject.CRSBoundsProviderClass
+ noException should be thrownBy TestObject.geometryAPIClass
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster1ArgExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster1ArgExpressionTest.scala
new file mode 100644
index 000000000..06284c56c
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster1ArgExpressionTest.scala
@@ -0,0 +1,86 @@
+package com.databricks.labs.mosaic.core.expressions.raster
+import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig
+import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI}
+import org.apache.spark.sql.catalyst.expressions.{Add, Expression}
+import org.apache.spark.sql.types.{BinaryType, DataType}
+import org.apache.spark.unsafe.types.UTF8String
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class Raster1ArgExpressionTest extends AnyFunSuite with MockFactory {
+ val mockLeftExpression: Expression = mock[Expression]
+ val mockRightExpression: Expression = mock[Expression]
+ val mockRasterAPI: RasterAPI = mock[RasterAPI]
+ val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig]
+ mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes()
+ mockRasterAPI.enable _ expects() returning mockRasterAPI anyNumberOfTimes()
+ val mockRaster: MosaicRaster = mock[MosaicRaster]
+ // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class
+ // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory
+ abstract class DummyExpr extends Raster1ArgExpression[Add](
+ mockLeftExpression, mockRightExpression, BinaryType, mockExpressionConfig
+ ) {
+ // For partial mocking, make methods that are testable final, scalamock will not mock final methods
+ override final def nullSafeEval(leftGeometryRow: Any, arg1Row: Any): Any =
+ super.nullSafeEval(leftGeometryRow, arg1Row)
+ override final def left: Expression = super.left
+ override final def right: Expression = super.right
+ override final def dataType: DataType = super.dataType
+ override final val rasterAPI: RasterAPI = mockRasterAPI
+ override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs)
+ override final def withNewChildrenInternal(newFirst: Expression, newArg1: Expression): Expression =
+ super.withNewChildrenInternal(newFirst, newArg1)
+ }
+ val bytes: Array[Byte] = "POINT EMPTY".getBytes
+ val mockExpression: DummyExpr = mock[DummyExpr]
+ def doMock(): Unit = {
+ mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes()
+ mockExpression.rasterTransform _ expects(mockRaster, 1) returning mockRaster anyNumberOfTimes()
+ mockRasterAPI.raster _ expects "path" returning mockRaster anyNumberOfTimes()
+ mockRaster.cleanUp _ expects() returning null anyNumberOfTimes()
+ mockLeftExpression.toString _ expects() returning "left" anyNumberOfTimes()
+ mockRightExpression.toString _ expects() returning "right" anyNumberOfTimes()
+ }
+ test("Raster1ArgExpression should implement accessor methods") {
+ doMock()
+ mockExpression.left shouldBe mockLeftExpression
+ mockExpression.right shouldBe mockRightExpression
+ mockExpression.dataType shouldBe BinaryType
+ }
+ test("Raster1ArgExpression should evaluate") {
+ doMock()
+ val runtimePath = UTF8String.fromString("path")
+ val result = mockExpression.nullSafeEval(runtimePath, 1)
+ result shouldBe mockRaster
+ }
+ test("Raster1ArgExpression should make copy") {
+ doMock()
+ mockExpression.makeCopy(Array[AnyRef](mockLeftExpression, mockRightExpression)) shouldBe a[Add]
+ mockExpression.withNewChildrenInternal(mockLeftExpression, mockRightExpression) shouldBe a[Add]
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster2ArgExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster2ArgExpressionTest.scala
new file mode 100644
index 000000000..ac218dadb
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster2ArgExpressionTest.scala
@@ -0,0 +1,91 @@
+package com.databricks.labs.mosaic.core.expressions.raster
+import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig
+import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI}
+import org.apache.spark.sql.catalyst.expressions.{Conv, Expression}
+import org.apache.spark.sql.types.{BinaryType, DataType}
+import org.apache.spark.unsafe.types.UTF8String
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers.{a, _}
+class Raster2ArgExpressionTest extends AnyFunSuite with MockFactory {
+ val mockFirstExpression: Expression = mock[Expression]
+ val mockSecondExpression: Expression = mock[Expression]
+ val mockThirdExpression: Expression = mock[Expression]
+ val mockRasterAPI: RasterAPI = mock[RasterAPI]
+ val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig]
+ mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes()
+ mockRasterAPI.enable _ expects() returning mockRasterAPI anyNumberOfTimes()
+ val mockRaster: MosaicRaster = mock[MosaicRaster]
+ // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class
+ // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory
+ abstract class DummyExpr extends Raster2ArgExpression[Conv](
+ mockFirstExpression, mockSecondExpression, mockThirdExpression, BinaryType, mockExpressionConfig
+ ) {
+ // For partial mocking, make methods that are testable final, scalamock will not mock final methods
+ override final def nullSafeEval(leftGeometryRow: Any, arg1Row: Any, arg2Row: Any): Any =
+ super.nullSafeEval(leftGeometryRow, arg1Row, arg2Row)
+ override final def first: Expression = super.first
+ override final def second: Expression = super.second
+ override final def third: Expression = super.third
+ override final def dataType: DataType = super.dataType
+ override final val rasterAPI: RasterAPI = mockRasterAPI
+ override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs)
+ override final def withNewChildrenInternal(newFirst: Expression, newArg1: Expression, newArg2: Expression): Expression =
+ super.withNewChildrenInternal(newFirst, newArg1, newArg2)
+ }
+ val bytes: Array[Byte] = "POINT EMPTY".getBytes
+ val mockExpression: DummyExpr = mock[DummyExpr]
+ def doMock(): Unit = {
+ mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes()
+ mockExpression.rasterTransform _ expects(mockRaster, 1, 2) returning mockRaster anyNumberOfTimes()
+ mockRasterAPI.raster _ expects "path" returning mockRaster anyNumberOfTimes()
+ mockRaster.cleanUp _ expects() returning null anyNumberOfTimes()
+ mockFirstExpression.toString _ expects() returning "first" anyNumberOfTimes()
+ mockSecondExpression.toString _ expects() returning "second" anyNumberOfTimes()
+ mockThirdExpression.toString _ expects() returning "third" anyNumberOfTimes()
+ }
+ test("Raster2ArgExpression should implement accessor methods") {
+ doMock()
+ mockExpression.first shouldBe mockFirstExpression
+ mockExpression.second shouldBe mockSecondExpression
+ mockExpression.third shouldBe mockThirdExpression
+ mockExpression.dataType shouldBe BinaryType
+ }
+ test("Raster2ArgExpression should evaluate") {
+ doMock()
+ val runtimePath = UTF8String.fromString("path")
+ val result = mockExpression.nullSafeEval(runtimePath, 1, 2)
+ result shouldBe mockRaster
+ }
+ test("Raster2ArgExpression should make copy") {
+ doMock()
+ mockExpression.makeCopy(Array[AnyRef](mockFirstExpression, mockSecondExpression, mockThirdExpression)) shouldBe a[Conv]
+ mockExpression.withNewChildrenInternal(mockFirstExpression, mockSecondExpression, mockThirdExpression) shouldBe a[Conv]
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterBandExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterBandExpressionTest.scala
new file mode 100644
index 000000000..b3564aa0c
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterBandExpressionTest.scala
@@ -0,0 +1,88 @@
+package com.databricks.labs.mosaic.core.expressions.raster
+import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig
+import com.databricks.labs.mosaic.core.raster.{MosaicRaster, MosaicRasterBand, RasterAPI}
+import org.apache.spark.sql.catalyst.expressions.{Add, Expression}
+import org.apache.spark.sql.types.{BinaryType, DataType}
+import org.apache.spark.unsafe.types.UTF8String
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers.{a, _}
+class RasterBandExpressionTest extends AnyFunSuite with MockFactory {
+ val mockLeftExpression: Expression = mock[Expression]
+ val mockRightExpression: Expression = mock[Expression]
+ val mockRasterAPI: RasterAPI = mock[RasterAPI]
+ val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig]
+ mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes()
+ mockRasterAPI.enable _ expects() returning mockRasterAPI anyNumberOfTimes()
+ val mockRaster: MosaicRaster = mock[MosaicRaster]
+ val mockBand: MosaicRasterBand = mock[MosaicRasterBand]
+ // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class
+ // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory
+ abstract class DummyExpr extends RasterBandExpression[Add](
+ mockLeftExpression, mockRightExpression, BinaryType, mockExpressionConfig
+ ) {
+ // For partial mocking, make methods that are testable final, scalamock will not mock final methods
+ override final def nullSafeEval(leftRasterRow: Any, rightRasterRow: Any): Any =
+ super.nullSafeEval(leftRasterRow, rightRasterRow)
+ override final def left: Expression = super.left
+ override final def right: Expression = super.right
+ override final def dataType: DataType = super.dataType
+ override final val rasterAPI: RasterAPI = mockRasterAPI
+ override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs)
+ override final def withNewChildrenInternal(newLeft: Expression, newRight: Expression): Expression =
+ super.withNewChildrenInternal(newLeft, newRight)
+ }
+ val bytes: Array[Byte] = "POINT EMPTY".getBytes
+ val mockExpression: DummyExpr = mock[DummyExpr]
+ def doMock(): Unit = {
+ mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes()
+ mockExpression.bandTransform _ expects(mockRaster, mockBand) returning mockRaster anyNumberOfTimes()
+ mockRasterAPI.raster _ expects "path" returning mockRaster anyNumberOfTimes()
+ mockRaster.cleanUp _ expects() returning null anyNumberOfTimes()
+ mockRaster.getBand _ expects 1 returning mockBand anyNumberOfTimes()
+ mockLeftExpression.toString _ expects() returning "left" anyNumberOfTimes()
+ mockRightExpression.toString _ expects() returning "right" anyNumberOfTimes()
+ }
+ test("RasterBandExpression should implement accessor methods") {
+ doMock()
+ mockExpression.left shouldBe mockLeftExpression
+ mockExpression.right shouldBe mockRightExpression
+ mockExpression.dataType shouldBe BinaryType
+ }
+ test("RasterBandExpression should evaluate") {
+ doMock()
+ val runtimePath = UTF8String.fromString("path")
+ val result = mockExpression.nullSafeEval(runtimePath, 1)
+ result shouldBe mockRaster
+ }
+ test("RasterBandExpression should make copy") {
+ doMock()
+ mockExpression.makeCopy(Array[AnyRef](mockLeftExpression, mockRightExpression)) shouldBe a[Add]
+ mockExpression.withNewChildrenInternal(mockLeftExpression, mockRightExpression) shouldBe a[Add]
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterExpressionTest.scala
new file mode 100644
index 000000000..e22f7964f
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterExpressionTest.scala
@@ -0,0 +1,81 @@
+package com.databricks.labs.mosaic.core.expressions.raster
+import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig
+import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI}
+import org.apache.spark.sql.catalyst.expressions.{Abs, Expression}
+import org.apache.spark.sql.types.{BinaryType, DataType}
+import org.apache.spark.unsafe.types.UTF8String
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers.{a, _}
+class RasterExpressionTest extends AnyFunSuite with MockFactory {
+ val mockChildExpression: Expression = mock[Expression]
+ val mockRasterAPI: RasterAPI = mock[RasterAPI]
+ val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig]
+ mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes()
+ mockRasterAPI.enable _ expects() returning mockRasterAPI anyNumberOfTimes()
+ val mockRaster: MosaicRaster = mock[MosaicRaster]
+ // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class
+ // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory
+ abstract class DummyExpr extends RasterExpression[Abs](
+ mockChildExpression, BinaryType, mockExpressionConfig
+ ) {
+ // For partial mocking, make methods that are testable final, scalamock will not mock final methods
+ override final def nullSafeEval(childRasterRow: Any): Any =
+ super.nullSafeEval(childRasterRow)
+ override final def child: Expression = super.child
+ override final def dataType: DataType = super.dataType
+ override final val rasterAPI: RasterAPI = mockRasterAPI
+ override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs)
+ override final def withNewChildInternal(newChild: Expression): Expression =
+ super.withNewChildInternal(newChild)
+ }
+ val bytes: Array[Byte] = "POINT EMPTY".getBytes
+ val mockExpression: DummyExpr = mock[DummyExpr]
+ def doMock(): Unit = {
+ mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes()
+ mockRasterAPI.raster _ expects "path" returning mockRaster anyNumberOfTimes()
+ mockRaster.cleanUp _ expects() returning null anyNumberOfTimes()
+ mockChildExpression.toString _ expects() returning "child" anyNumberOfTimes()
+ mockExpression.rasterTransform _ expects mockRaster returning mockRaster anyNumberOfTimes()
+ }
+ test("RasterExpression should implement accessor methods") {
+ doMock()
+ mockExpression.child shouldBe mockChildExpression
+ mockExpression.dataType shouldBe BinaryType
+ }
+ test("RasterExpression should evaluate") {
+ doMock()
+ val runtimePath = UTF8String.fromString("path")
+ val result = mockExpression.nullSafeEval(runtimePath)
+ result shouldBe mockRaster
+ }
+ test("RasterExpression should make copy") {
+ doMock()
+ mockExpression.makeCopy(Array[AnyRef](mockChildExpression)) shouldBe a[Abs]
+ mockExpression.withNewChildInternal(mockChildExpression) shouldBe a[Abs]
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterGeneratorExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterGeneratorExpressionTest.scala
new file mode 100644
index 000000000..15f6db7a6
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterGeneratorExpressionTest.scala
@@ -0,0 +1,95 @@
+package com.databricks.labs.mosaic.core.expressions.raster
+import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig
+import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Abs, Expression}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.unsafe.types
+import org.apache.spark.unsafe.types.UTF8String
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers.{a, _}
+class RasterGeneratorExpressionTest extends AnyFunSuite with MockFactory {
+ val mockChildExpression: Expression = mock[Expression]
+ val mockRasterAPI: RasterAPI = mock[RasterAPI]
+ val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig]
+ mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes()
+ mockRasterAPI.enable _ expects() returning mockRasterAPI anyNumberOfTimes()
+ val mockRaster: MosaicRaster = mock[MosaicRaster]
+ // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class
+ // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory
+ abstract class DummyExpr extends RasterGeneratorExpression[Abs](
+ mockChildExpression, mockExpressionConfig
+ ) {
+ // For partial mocking, make methods that are testable final, scalamock will not mock final methods
+ override final def eval(childRasterRow: InternalRow): TraversableOnce[InternalRow] =
+ super.eval(childRasterRow)
+ override final def position: Boolean = super.position
+ override final def inline: Boolean = super.inline
+ override final def elementSchema: StructType = super.elementSchema
+ override final val rasterAPI: RasterAPI = mockRasterAPI
+ override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs)
+ override final def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
+ super.withNewChildrenInternal(newChildren)
+ }
+ val bytes: Array[Byte] = "POINT EMPTY".getBytes
+ val utf8Str: UTF8String = types.UTF8String.fromString("path")
+ val tiles: Seq[(Long, (Int, Int, Int, Int))] =
+ Seq((1L, (1, 2, 3, 4)), (2L, (5, 6, 7, 8)), (3L, (9, 10, 11, 12)))
+ val mockExpression: DummyExpr = mock[DummyExpr]
+ def doMock(): Unit = {
+ mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes()
+ mockExpressionConfig.getRasterCheckpoint _ expects() returning "path" anyNumberOfTimes()
+ mockRasterAPI.raster _ expects "path" returning mockRaster anyNumberOfTimes()
+ mockRaster.cleanUp _ expects() returning null anyNumberOfTimes()
+ tiles.foreach(
+ tile => mockRaster.saveCheckpoint _ expects(*, tile._1, tile._2, "path") returning "path" anyNumberOfTimes()
+ )
+ mockChildExpression.toString _ expects() returning "child" anyNumberOfTimes()
+ mockChildExpression.eval _ expects * returning utf8Str anyNumberOfTimes()
+ mockExpression.rasterGenerator _ expects mockRaster returning tiles anyNumberOfTimes()
+ mockExpression.children _ expects() returning IndexedSeq(mockChildExpression) anyNumberOfTimes()
+ }
+ test("RasterGeneratorExpression should implement accessor methods") {
+ doMock()
+ mockExpression.position shouldBe false
+ mockExpression.inline shouldBe false
+ mockExpression.elementSchema shouldBe a[StructType]
+ }
+ test("RasterGeneratorExpression should evaluate") {
+ doMock()
+ val runtimePath = UTF8String.fromString("path")
+ val result = mockExpression.eval(InternalRow(runtimePath))
+ result shouldBe a[List[String]]
+ }
+ test("RasterGeneratorExpression should make copy") {
+ doMock()
+ mockExpression.makeCopy(Array[AnyRef](mockChildExpression)) shouldBe a[Abs]
+ mockExpression.withNewChildrenInternal(Array(mockChildExpression)) shouldBe a[Abs]
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterTest.scala
new file mode 100644
index 000000000..82d19018a
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterTest.scala
@@ -0,0 +1,18 @@
+package com.databricks.labs.mosaic.core.expressions.raster
+import org.apache.spark.sql.catalyst.util.ArrayBasedMapData
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers.{a, convertToAnyShouldWrapper}
+class RasterTest extends AnyFunSuite with MockFactory {
+ test("package object should implement buildMapString and buildMapDouble") {
+ val map1 = Map("a" -> "b", "c" -> "d")
+ val map2 = Map("a" -> 1.0, "c" -> 2.0)
+ com.databricks.labs.mosaic.core.expressions.raster.buildMapString(map1) shouldBe a[ArrayBasedMapData]
+ com.databricks.labs.mosaic.core.expressions.raster.buildMapDouble(map2) shouldBe a[ArrayBasedMapData]
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterToGridExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterToGridExpressionTest.scala
new file mode 100644
index 000000000..0e6404b22
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterToGridExpressionTest.scala
@@ -0,0 +1,102 @@
+package com.databricks.labs.mosaic.core.expressions.raster
+import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import com.databricks.labs.mosaic.core.raster.{MosaicRaster, MosaicRasterBand, RasterAPI}
+import org.apache.spark.sql.catalyst.expressions.{Add, Expression}
+import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.types.{BinaryType, LongType}
+import org.apache.spark.unsafe.types.UTF8String
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers.{a, _}
+class RasterToGridExpressionTest extends AnyFunSuite with MockFactory {
+ val mockPathExpression: Expression = mock[Expression]
+ val mockResolutionExpression: Expression = mock[Expression]
+ val mockRasterAPI: RasterAPI = mock[RasterAPI]
+ val mockIndexSystem: IndexSystem = mock[IndexSystem]
+ val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig]
+ val mockRaster: MosaicRaster = mock[MosaicRaster]
+ val mockRasterBand: MosaicRasterBand = mock[MosaicRasterBand]
+ val geoTransform: Seq[Double] = Seq(1.0, 2.0, 3.0, 4.0, 5.0, 6.0)
+ val cellMeasurePairs: Seq[Map[Long, Int]] = Seq(Map(1L -> 1), Map(2L -> 2), Map(3L -> 3), Map(4L -> 4), Map(5L -> 5), Map(6L -> 6))
+ def doMock(): Unit = {
+ mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes()
+ mockExpressionConfig.getGeometryAPI _ expects * returning null anyNumberOfTimes()
+ mockExpressionConfig.getIndexSystem _ expects * returning mockIndexSystem anyNumberOfTimes()
+ mockExpressionConfig.getCellIdType _ expects() returning LongType anyNumberOfTimes()
+ mockRasterAPI.raster _ expects "path" returning mockRaster anyNumberOfTimes()
+ mockRasterAPI.enable _ expects() returning mockRasterAPI anyNumberOfTimes()
+ mockRaster.cleanUp _ expects() returning null anyNumberOfTimes()
+ mockRaster.getGeoTransform _ expects() returning geoTransform anyNumberOfTimes()
+ mockRaster.transformBands[Map[Long, _]] _ expects * returning cellMeasurePairs anyNumberOfTimes()
+ (mockRasterBand.transformValues[(Long, Double)](_: (Int, Int, Double) => (Long, Double), _: (Long, Double))) expects(*, *) returning Seq(
+ Seq((1L, 1.0)), Seq((2L, 2.0)), Seq((3L, 3.0)), Seq((4L, 4.0)), Seq((5L, 5.0)), Seq((6L, 6.0))
+ ) anyNumberOfTimes()
+ mockPathExpression.toString _ expects() returning "left" anyNumberOfTimes()
+ mockResolutionExpression.toString _ expects() returning "right" anyNumberOfTimes()
+ mockIndexSystem.pointToIndex _ expects(*, *, *) returning 1L anyNumberOfTimes()
+ cellMeasurePairs.foreach(
+ pairs => pairs.foreach {
+ case (cellId, _) => mockIndexSystem.serializeCellId _ expects cellId returning UTF8String.fromString("1") anyNumberOfTimes()
+ }
+ )
+ }
+ // DummyExpr needs mocks to be set up before it is instantiated, so we call doMock() before declaring the class
+ doMock()
+ // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class
+ // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory
+ abstract class DummyExpr extends RasterToGridExpression[Add, Int](
+ mockPathExpression, mockResolutionExpression, BinaryType, mockExpressionConfig
+ ) {
+ // For partial mocking, make methods that are testable final, scalamock will not mock final methods
+ override final def rasterTransform(raster: MosaicRaster, arg1: Any): Any =
+ super.rasterTransform(raster, arg1)
+ override final def pixelTransformer(gt: Seq[Double], resolution: Int)(x: Int, y: Int, value: Double): (Long, Double) =
+ super.pixelTransformer(gt, resolution)(x, y, value)
+ override final def bandTransformer(band: MosaicRasterBand, resolution: Int, gt: Seq[Double]): Map[Long, Int] =
+ super.bandTransformer(band, resolution, gt)
+ }
+ val mockExpression: DummyExpr = mock[DummyExpr]
+ test("RasterToGridExpression should implement pixelTransformer") {
+ doMock()
+ val result = mockExpression.pixelTransformer(geoTransform, 1)(1, 1, 1.0)
+ result shouldBe a[(Long, Double)]
+ result shouldBe(1L, 1.0)
+ }
+ test("RasterToGridExpression should implement rasterTransform") {
+ doMock()
+ val result = mockExpression.rasterTransform(mockRaster, 1)
+ result shouldBe a[GenericArrayData]
+ result.asInstanceOf[GenericArrayData].array.length shouldBe 6
+ }
+ test("RasterToGridExpression should implement bandTransformer") {
+ doMock()
+ mockExpression.valuesCombiner _ expects * returning 1 anyNumberOfTimes()
+ val result = mockExpression.bandTransformer(mockRasterBand, 1, geoTransform)
+ result shouldBe a[Map[Long, Int]]
+ result.keys.toSeq.length shouldBe 6
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryTest.scala
new file mode 100644
index 000000000..79f63a2c1
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryTest.scala
@@ -0,0 +1,66 @@
+package com.databricks.labs.mosaic.core.geometry
+import com.databricks.labs.mosaic.core.crs.{CRSBounds, CRSBoundsProvider}
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.must.Matchers.be
+import org.scalatest.matchers.should.Matchers.{an, convertToAnyShouldWrapper}
+import scala.collection.immutable
+class MosaicGeometryTest extends AnyFunSuite with MockFactory {
+ abstract class TestMosaicGeometry extends MosaicGeometry {
+ override final def transformCRSXY(sridTo: Int, sridFrom: Int): MosaicGeometry = super.transformCRSXY(sridTo, sridFrom)
+ override final def minMaxCoord(coord: String, minMax: String): Double = super.minMaxCoord(coord, minMax)
+ override final def hasValidCoords(crsBoundsProvider: CRSBoundsProvider, crsCode: String, which: String): Boolean = super.hasValidCoords(crsBoundsProvider, crsCode, which)
+ }
+ class TestCRSBoundsProvider extends CRSBoundsProvider(null) {}
+ val mockMosaicGeometry: MosaicGeometry = mock[TestMosaicGeometry]
+ val mockMosaicPoints: Seq[immutable.IndexedSeq[MosaicPoint]] = Seq((0 to 5).map(_ => mock[MosaicPoint]))
+ val mockCRSBoundsProvider: CRSBoundsProvider = mock[CRSBoundsProvider]
+ val mockBounds: CRSBounds = mock[CRSBounds]
+ def doMock(): Unit = {
+ mockMosaicPoints.foreach(_.zipWithIndex.foreach { case (point, index) =>
+ point.getX _ expects() returning index.toDouble anyNumberOfTimes()
+ point.getY _ expects() returning index.toDouble anyNumberOfTimes()
+ point.getZ _ expects() returning index.toDouble anyNumberOfTimes()
+ })
+ mockMosaicGeometry.getShellPoints _ expects() returning mockMosaicPoints anyNumberOfTimes()
+ mockMosaicGeometry.getHolePoints _ expects() returning Seq.empty anyNumberOfTimes()
+ mockCRSBoundsProvider.bounds _ expects("EPSG", "4326".toInt) returning mockBounds anyNumberOfTimes()
+ mockCRSBoundsProvider.reprojectedBounds _ expects("EPSG", "4326".toInt) returning mockBounds anyNumberOfTimes()
+ mockBounds.getUpperX _ expects() returning 180 anyNumberOfTimes()
+ mockBounds.getUpperY _ expects() returning 90 anyNumberOfTimes()
+ mockBounds.getLowerX _ expects() returning -180 anyNumberOfTimes()
+ mockBounds.getLowerY _ expects() returning -90 anyNumberOfTimes()
+ }
+ test("MosaicGeometry should return minMaxCoord") {
+ doMock()
+ mockMosaicGeometry.minMaxCoord("x", "min") shouldBe 0.0
+ mockMosaicGeometry.minMaxCoord("x", "max") shouldBe 5.0
+ mockMosaicGeometry.minMaxCoord("y", "min") shouldBe 0.0
+ mockMosaicGeometry.minMaxCoord("y", "max") shouldBe 5.0
+ mockMosaicGeometry.minMaxCoord("z", "min") shouldBe 0.0
+ mockMosaicGeometry.minMaxCoord("z", "max") shouldBe 5.0
+ }
+ test("MosaicGeometry should run hasValidCoords") {
+ doMock()
+ mockMosaicGeometry.hasValidCoords(mockCRSBoundsProvider, "EPSG:4326", "bounds") shouldBe true
+ mockMosaicGeometry.hasValidCoords(mockCRSBoundsProvider, "EPSG:4326", "reprojected_bounds") shouldBe true
+ an[Error] should be thrownBy mockMosaicGeometry.hasValidCoords(mockCRSBoundsProvider, "EPSG:4326", "invalid")
+ }
+ test("MosaicGeometry should fail for transformCRSXY") {
+ doMock()
+ an[Exception] should be thrownBy mockMosaicGeometry.transformCRSXY(4326, 4326)
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPITest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPITest.scala
new file mode 100644
index 000000000..efe2b3339
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPITest.scala
@@ -0,0 +1,84 @@
+package com.databricks.labs.mosaic.core.geometry.api
+import com.databricks.labs.mosaic.core.geometry.{MosaicGeometry, MosaicPoint}
+import com.databricks.labs.mosaic.core.types.{GeoJSONType, GeometryTypeEnum, HexType}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.types.{BinaryType, DataType, DateType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class GeometryAPITest extends AnyFunSuite with MockFactory {
+ val mockReader: GeometryReader = mock[GeometryReader]
+ abstract class TestGeometryAPI extends GeometryAPI(mockReader) {
+ override final def pointsToGeometry(points: Seq[MosaicPoint], geomType: GeometryTypeEnum.Value): MosaicGeometry = super.pointsToGeometry(points, geomType)
+ override final def rowToGeometry(inputData: InternalRow, dataType: DataType): MosaicGeometry = super.rowToGeometry(inputData, dataType)
+ override final def valueToGeometry(inputData: Any, dataType: DataType): MosaicGeometry = super.valueToGeometry(inputData, dataType)
+ override final def serialize(geometry: MosaicGeometry, dataType: DataType): Any = super.serialize(geometry, dataType)
+ }
+ val mockApi: TestGeometryAPI = mock[TestGeometryAPI]
+ val mockRow: InternalRow = mock[InternalRow]
+ val mockPoint: MosaicPoint = mock[MosaicPoint]
+ val bytes: Array[Byte] = "POINT (1 1)".getBytes
+ val subRow: InternalRow = InternalRow.fromSeq(Seq(UTF8String.fromString("POINT (1 1)")))
+ def doMock(): Unit = {
+ mockRow.getString _ expects 0 returning "POINT (1 1)" anyNumberOfTimes()
+ mockRow.getBinary _ expects 0 returning bytes anyNumberOfTimes()
+ mockRow.get _ expects(0, HexType) returning subRow anyNumberOfTimes()
+ mockRow.get _ expects(0, GeoJSONType) returning subRow anyNumberOfTimes()
+ mockReader.fromWKB _ expects bytes returning mock[MosaicPoint] anyNumberOfTimes()
+ mockReader.fromWKT _ expects "POINT (1 1)" returning mock[MosaicPoint] anyNumberOfTimes()
+ mockReader.fromHEX _ expects "POINT (1 1)" returning mock[MosaicPoint] anyNumberOfTimes()
+ mockReader.fromJSON _ expects "POINT (1 1)" returning mock[MosaicPoint] anyNumberOfTimes()
+ mockReader.fromSeq _ expects(Seq(mockPoint), GeometryTypeEnum.POINT) returning mock[MosaicPoint] anyNumberOfTimes()
+ mockPoint.toWKT _ expects() returning "POINT (1 1)" anyNumberOfTimes()
+ mockPoint.toWKB _ expects() returning bytes anyNumberOfTimes()
+ mockPoint.toJSON _ expects() returning "POINT (1 1)" anyNumberOfTimes()
+ mockPoint.toHEX _ expects() returning "POINT (1 1)" anyNumberOfTimes()
+ }
+ test("GeometryAPI should convert points to geometry") {
+ doMock()
+ mockApi.pointsToGeometry(Seq(mockPoint), GeometryTypeEnum.POINT) shouldBe a[MosaicGeometry]
+ }
+ test("GeometryAPI should convert row to geometry") {
+ doMock()
+ mockApi.rowToGeometry(mockRow, BinaryType) shouldBe a[MosaicGeometry]
+ mockApi.rowToGeometry(mockRow, StringType) shouldBe a[MosaicGeometry]
+ mockApi.rowToGeometry(mockRow, GeoJSONType) shouldBe a[MosaicGeometry]
+ mockApi.rowToGeometry(mockRow, HexType) shouldBe a[MosaicGeometry]
+ an[Error] should be thrownBy mockApi.rowToGeometry(mockRow, DateType)
+ }
+ test("GeometryAPI should convert value to geometry") {
+ doMock()
+ mockApi.valueToGeometry(bytes, BinaryType) shouldBe a[MosaicGeometry]
+ mockApi.valueToGeometry(UTF8String.fromString("POINT (1 1)"), StringType) shouldBe a[MosaicGeometry]
+ mockApi.valueToGeometry(subRow, GeoJSONType) shouldBe a[MosaicGeometry]
+ mockApi.valueToGeometry(subRow, HexType) shouldBe a[MosaicGeometry]
+ an[Error] should be thrownBy mockApi.valueToGeometry("POINT (1 1)", DateType)
+ }
+ test("GeometryAPI should serialize geometry") {
+ doMock()
+ mockApi.serialize(mockPoint, BinaryType) shouldBe a[Array[Byte]]
+ mockApi.serialize(mockPoint, StringType) shouldBe a[UTF8String]
+ mockApi.serialize(mockPoint, GeoJSONType) shouldBe a[InternalRow]
+ mockApi.serialize(mockPoint, HexType) shouldBe a[InternalRow]
+ an[Error] should be thrownBy mockApi.serialize(mockPoint, DateType)
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/index/IndexSystemTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/index/IndexSystemTest.scala
new file mode 100644
index 000000000..216610b41
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/index/IndexSystemTest.scala
@@ -0,0 +1,202 @@
+package com.databricks.labs.mosaic.core.index
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
+import com.databricks.labs.mosaic.core.types.{Coordinates, MosaicChip}
+import org.apache.spark.sql.types.{DataType, DateType, LongType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class IndexSystemTest extends AnyFunSuite with MockFactory {
+ abstract class TestIndexSystem extends IndexSystem(LongType) {
+ // Override as final all the methods whose behavior we want to test, this will prevent the compiler from mocking those methods
+ override final def getCellIdDataType: DataType = super.getCellIdDataType
+ override final def setCellIdDataType(dataType: DataType): Unit = super.setCellIdDataType(dataType)
+ override final def formatCellId(cellId: Any, dt: DataType): Any = super.formatCellId(cellId, dt)
+ override final def formatCellId(cellId: Any): Any = super.formatCellId(cellId)
+ override final def serializeCellId(cellId: Any): Any = super.serializeCellId(cellId)
+ override final def kRing(index: String, n: Int): Seq[String] = super.kRing(index, n)
+ override final def kLoop(index: String, n: Int): Seq[String] = super.kLoop(index, n)
+ override final def getBorderChips(geometry: MosaicGeometry, borderIndices: Seq[Long], keepCoreGeom: Boolean, geometryAPI: GeometryAPI): Seq[MosaicChip] =
+ super.getBorderChips(geometry, borderIndices, keepCoreGeom, geometryAPI)
+ override final def getCoreChips(coreIndices: Seq[Long], keepCoreGeom: Boolean, geometryAPI: GeometryAPI): Seq[MosaicChip] =
+ super.getCoreChips(coreIndices, keepCoreGeom, geometryAPI)
+ override final def area(index: Long): Double = super.area(index)
+ override final def area(index: String): Double = super.area(index)
+ override final def indexToCenter(index: String): Coordinates = super.indexToCenter(index)
+ override final def indexToBoundary(index: String): Seq[Coordinates] = super.indexToBoundary(index)
+ override final def coerceChipGeometry(geometries: Seq[MosaicGeometry]): Seq[MosaicGeometry] = super.coerceChipGeometry(geometries)
+ override final def coerceChipGeometry(geom: MosaicGeometry, cell: Long, geometryAPI: GeometryAPI): MosaicGeometry = super.coerceChipGeometry(geom, cell, geometryAPI)
+ }
+ val mockIndexSystem: IndexSystem = mock[TestIndexSystem]
+ val mockGeometry: MosaicGeometry = mock[MosaicGeometry]
+ val mockGeometry2: MosaicGeometry = mock[MosaicGeometry]
+ val mockGeometry3: MosaicGeometry = mock[MosaicGeometry]
+ val mockGeometry4: MosaicGeometry = mock[MosaicGeometry]
+ val mockGeometryAPI: GeometryAPI = mock[GeometryAPI]
+ def doMock(): Unit = {
+ mockIndexSystem.format _ expects 123456789L returning "123456789" anyNumberOfTimes()
+ mockIndexSystem.parse _ expects "123456789" returning 123456789L anyNumberOfTimes()
+ mockIndexSystem.parse _ expects "10000000001" returning 10000000001L anyNumberOfTimes()
+ (mockIndexSystem.kRing(_: Long, _: Int)) expects(123456789L, 1) returning Seq(123456789L) anyNumberOfTimes()
+ (mockIndexSystem.kLoop(_: Long, _: Int)) expects(123456789L, 1) returning Seq(123456789L) anyNumberOfTimes()
+ (mockIndexSystem.indexToGeometry(_: Long, _: GeometryAPI)) expects(10000000001L, mockGeometryAPI) returning mockGeometry anyNumberOfTimes()
+ (mockIndexSystem.indexToGeometry(_: Long, _: GeometryAPI)) expects(10000000002L, mockGeometryAPI) returning mockGeometry anyNumberOfTimes()
+ (mockIndexSystem.indexToGeometry(_: Long, _: GeometryAPI)) expects(10000000003L, mockGeometryAPI) returning mockGeometry2 anyNumberOfTimes()
+ (mockIndexSystem.indexToCenter(_: Long)) expects 10000000001L returning Coordinates(1.5, 1.5) anyNumberOfTimes()
+ (mockIndexSystem.indexToBoundary(_: Long)) expects 10000000001L returning
+ Seq(Coordinates(1, 1), Coordinates(1, 2), Coordinates(2, 2), Coordinates(1, 2), Coordinates(1, 1)) anyNumberOfTimes()
+ mockGeometry.intersection _ expects mockGeometry returning mockGeometry anyNumberOfTimes()
+ mockGeometry.intersection _ expects mockGeometry2 returning mockGeometry2 anyNumberOfTimes()
+ mockGeometry.getGeometryType _ expects() returning "POINT" anyNumberOfTimes()
+ mockGeometry2.getGeometryType _ expects() returning "GEOMETRYCOLLECTION" anyNumberOfTimes()
+ mockGeometry3.getGeometryType _ expects() returning "POLYGON" anyNumberOfTimes()
+ mockGeometry4.getGeometryType _ expects() returning "LINESTRING" anyNumberOfTimes()
+ mockGeometry.equals _ expects mockGeometry returning true anyNumberOfTimes()
+ mockGeometry2.equals _ expects mockGeometry returning false anyNumberOfTimes()
+ mockGeometry2.equals _ expects mockGeometry2 returning false anyNumberOfTimes()
+ mockGeometry.equals _ expects mockGeometry2 returning false anyNumberOfTimes()
+ mockGeometry.isEmpty _ expects() returning false anyNumberOfTimes()
+ mockGeometry2.isEmpty _ expects() returning false anyNumberOfTimes()
+ mockGeometry2.getBoundary _ expects() returning mockGeometry2 anyNumberOfTimes()
+ mockGeometry.difference _ expects mockGeometry2 returning mockGeometry anyNumberOfTimes()
+ mockGeometry2.difference _ expects mockGeometry2 returning mockGeometry anyNumberOfTimes()
+ }
+ test("IndexSystem should get and set cellID data type") {
+ mockIndexSystem.getCellIdDataType shouldBe LongType
+ noException should be thrownBy mockIndexSystem.setCellIdDataType(LongType)
+ }
+ test("IndexSystem should format cellID") {
+ doMock()
+ mockIndexSystem.formatCellId(123456789L, LongType) shouldBe 123456789L
+ mockIndexSystem.formatCellId("123456789", LongType) shouldBe 123456789L
+ mockIndexSystem.formatCellId(UTF8String.fromString("123456789"), LongType) shouldBe 123456789L
+ mockIndexSystem.formatCellId(123456789L, StringType) shouldBe "123456789"
+ mockIndexSystem.formatCellId("123456789", StringType) shouldBe "123456789"
+ mockIndexSystem.formatCellId(UTF8String.fromString("123456789"), StringType) shouldBe "123456789"
+ an[Error] should be thrownBy mockIndexSystem.formatCellId(123456789L, DateType)
+ mockIndexSystem.formatCellId(123456789L) shouldBe 123456789L
+ }
+ test("IndexSystem should serializeCellId") {
+ doMock()
+ mockIndexSystem.setCellIdDataType(StringType)
+ mockIndexSystem.getCellIdDataType shouldBe StringType
+ mockIndexSystem.serializeCellId(123456789L) shouldBe UTF8String.fromString("123456789")
+ mockIndexSystem.serializeCellId("123456789") shouldBe UTF8String.fromString("123456789")
+ mockIndexSystem.serializeCellId(UTF8String.fromString("123456789")) shouldBe UTF8String.fromString("123456789")
+ mockIndexSystem.setCellIdDataType(LongType)
+ mockIndexSystem.getCellIdDataType shouldBe LongType
+ mockIndexSystem.serializeCellId(123456789L) shouldBe 123456789L
+ mockIndexSystem.serializeCellId("123456789") shouldBe 123456789L
+ mockIndexSystem.serializeCellId(UTF8String.fromString("123456789")) shouldBe 123456789L
+ an[Error] should be thrownBy mockIndexSystem.serializeCellId(1.0)
+ }
+ test("IndexSystem should implement kRing and kLoop for StringType") {
+ doMock()
+ mockIndexSystem.kRing("123456789", 1) shouldBe a[Seq[_]]
+ mockIndexSystem.kLoop("123456789", 1) shouldBe a[Seq[_]]
+ }
+ test("IndexSystem should implement getBorderChips") {
+ doMock()
+ val result = mockIndexSystem.getBorderChips(
+ mockGeometry,
+ Seq(10000000001L, 10000000002L, 10000000003L),
+ keepCoreGeom = true,
+ mockGeometryAPI
+ )
+ result shouldBe a[Seq[_]]
+ result.last shouldBe a[MosaicChip]
+ result.last.index.left.get shouldBe 10000000003L
+ val result2 = mockIndexSystem.getBorderChips(
+ mockGeometry,
+ Seq(10000000001L, 10000000002L, 10000000003L),
+ keepCoreGeom = false,
+ mockGeometryAPI
+ )
+ result2.map(_.geom).flatMap(Option(_)).length shouldBe 1
+ }
+ test("IndexSystem should implement getCoreChips") {
+ doMock()
+ val result = mockIndexSystem.getCoreChips(
+ Seq(10000000001L, 10000000002L, 10000000003L),
+ keepCoreGeom = true,
+ mockGeometryAPI
+ )
+ result shouldBe a[Seq[_]]
+ result.last shouldBe a[MosaicChip]
+ result.last.index.left.get shouldBe 10000000003L
+ val result2 = mockIndexSystem.getCoreChips(
+ Seq(10000000001L, 10000000002L, 10000000003L),
+ keepCoreGeom = false,
+ mockGeometryAPI
+ )
+ result2.map(_.geom).flatMap(Option(_)).length shouldBe 0
+ }
+ test("IndexSystem should implement area") {
+ doMock()
+ mockIndexSystem.area(10000000001L) shouldBe 12360.971936046964
+ mockIndexSystem.area("10000000001") shouldBe 12360.971936046964
+ }
+ test("IndexSystem should implement indexToCenter and indexToBoundary for string type") {
+ doMock()
+ mockIndexSystem.indexToCenter("10000000001")
+ mockIndexSystem.indexToBoundary("10000000001")
+ }
+ test("IndexSystem should implement coerceChipGeometry") {
+ doMock()
+ mockIndexSystem.coerceChipGeometry(Seq(mockGeometry, mockGeometry2)) shouldBe Seq(mockGeometry)
+ mockIndexSystem.coerceChipGeometry(Seq(mockGeometry, mockGeometry3)) shouldBe Seq(mockGeometry3)
+ mockIndexSystem.coerceChipGeometry(Seq(mockGeometry, mockGeometry4)) shouldBe Seq(mockGeometry4)
+ mockIndexSystem.coerceChipGeometry(Nil) shouldBe Nil
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterBandTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterBandTest.scala
new file mode 100644
index 000000000..944f1d5fd
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterBandTest.scala
@@ -0,0 +1,26 @@
+package com.databricks.labs.mosaic.core.raster
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class MosaicRasterBandTest extends AnyFunSuite with MockFactory {
+ abstract class TestMosaicRasterBand extends MosaicRasterBand {
+ override final def values: Array[Double] = super.values
+ override final def maskValues: Array[Double] = super.maskValues
+ }
+ test("MosaicRasterBand") {
+ val mockMosaicRasterBand = mock[TestMosaicRasterBand]
+ mockMosaicRasterBand.xSize _ expects() returning 1 anyNumberOfTimes()
+ mockMosaicRasterBand.ySize _ expects() returning 1 anyNumberOfTimes()
+ (mockMosaicRasterBand.values(_: Int, _: Int, _: Int, _: Int)) expects(0, 0, 1, 1) returning Array(1.0) anyNumberOfTimes()
+ (mockMosaicRasterBand.maskValues(_: Int, _: Int, _: Int, _: Int)) expects(0, 0, 1, 1) returning Array(1.0) anyNumberOfTimes()
+ mockMosaicRasterBand.values shouldBe Array(1.0)
+ mockMosaicRasterBand.maskValues shouldBe Array(1.0)
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterTest.scala
new file mode 100644
index 000000000..9e1994deb
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterTest.scala
@@ -0,0 +1,18 @@
+package com.databricks.labs.mosaic.core.raster
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class MosaicRasterTest extends AnyFunSuite with MockFactory {
+ abstract class TestMosaicRaster extends MosaicRaster("path", 1234) {
+ override final def getMemSize: Long = super.getMemSize
+ }
+ test("MosaicRaster") {
+ val mockMosaicRaster = mock[TestMosaicRaster]
+ mockMosaicRaster.getMemSize shouldBe 1234
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/RasterAPITest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/RasterAPITest.scala
new file mode 100644
index 000000000..e127449f8
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/RasterAPITest.scala
@@ -0,0 +1,40 @@
+package com.databricks.labs.mosaic.core.raster
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class RasterAPITest extends AnyFunSuite with MockFactory {
+ val mockRasterReader: RasterReader = mock[RasterReader]
+ val mockMosaicRaster: MosaicRaster = mock[MosaicRaster]
+ val mockMosaicRasterBand: MosaicRasterBand = mock[MosaicRasterBand]
+ abstract class TestRasterAPI extends RasterAPI(mockRasterReader) {
+ override final def raster(path: String): MosaicRaster = super.raster(path)
+ override final def band(path: String, bandIndex: Int): MosaicRasterBand = super.band(path, bandIndex)
+ override final def toWorldCoord(gt: Seq[Double], x: Int, y: Int): (Double, Double) =
+ super.toWorldCoord(gt, x, y)
+ override final def fromWorldCoord(gt: Seq[Double], x: Double, y: Double): (Int, Int) =
+ super.fromWorldCoord(gt, x, y)
+ }
+ test("RasterAPI") {
+ val mockRasterAPI = mock[TestRasterAPI]
+ mockRasterReader.readRaster _ expects "path" returning mockMosaicRaster anyNumberOfTimes()
+ mockRasterReader.readBand _ expects("path", 1) returning mockMosaicRasterBand anyNumberOfTimes()
+ mockRasterReader.toWorldCoord _ expects(Seq(1.0, 2.0, 3.0, 4.0, 5.0, 6.0), 1, 1) returning (3.0, 4.0) anyNumberOfTimes()
+ mockRasterReader.fromWorldCoord _ expects(Seq(1.0, 2.0, 3.0, 4.0, 5.0, 6.0), 3.0, 4.0) returning (1, 1) anyNumberOfTimes()
+ mockRasterAPI.raster("path") shouldBe mockMosaicRaster
+ mockRasterAPI.band("path", 1) shouldBe mockMosaicRasterBand
+ mockRasterAPI.toWorldCoord(Seq(1.0, 2.0, 3.0, 4.0, 5.0, 6.0), 1, 1) shouldBe (3.0, 4.0)
+ mockRasterAPI.fromWorldCoord(Seq(1.0, 2.0, 3.0, 4.0, 5.0, 6.0), 3.0, 4.0) shouldBe (1, 1)
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/ChipTypeTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/ChipTypeTest.scala
new file mode 100644
index 000000000..2b0ba4a55
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/ChipTypeTest.scala
@@ -0,0 +1,15 @@
+package com.databricks.labs.mosaic.core.types
+import org.apache.spark.sql.types.LongType
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class ChipTypeTest extends AnyFunSuite with MockFactory {
+ test("ChipType") {
+ ChipType(LongType).typeName should be ("struct")
+ ChipType(LongType).simpleString should be ("CHIP")
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/GeoJSONTypeTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/GeoJSONTypeTest.scala
new file mode 100644
index 000000000..745d1aa29
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/GeoJSONTypeTest.scala
@@ -0,0 +1,15 @@
+package com.databricks.labs.mosaic.core.types
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class GeoJSONTypeTest extends AnyFunSuite with MockFactory {
+ test("GeoJSONType") {
+ val geoJSONType = new GeoJSONType()
+ geoJSONType.typeName should be ("struct")
+ geoJSONType.simpleString should be ("GEOJSON")
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/GeometryTypeEnumTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/GeometryTypeEnumTest.scala
new file mode 100644
index 000000000..abff36453
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/GeometryTypeEnumTest.scala
@@ -0,0 +1,50 @@
+package com.databricks.labs.mosaic.core.types
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+import com.databricks.labs.mosaic.core.types.GeometryTypeEnum._
+class GeometryTypeEnumTest extends AnyFunSuite with MockFactory {
+ test("HexType") {
+ GeometryTypeEnum.fromString("POINT") shouldBe POINT
+ GeometryTypeEnum.fromString("LINESTRING") shouldBe LINESTRING
+ GeometryTypeEnum.fromString("POLYGON") shouldBe POLYGON
+ GeometryTypeEnum.fromString("MULTIPOINT") shouldBe MULTIPOINT
+ GeometryTypeEnum.fromString("MULTILINESTRING") shouldBe MULTILINESTRING
+ GeometryTypeEnum.fromString("MULTIPOLYGON") shouldBe MULTIPOLYGON
+ GeometryTypeEnum.fromString("LINEARRING") shouldBe LINEARRING
+ an[Error] should be thrownBy GeometryTypeEnum.fromString("NOT A GEOM")
+ GeometryTypeEnum.fromId(1) shouldBe POINT
+ GeometryTypeEnum.fromId(2) shouldBe MULTIPOINT
+ GeometryTypeEnum.fromId(3) shouldBe LINESTRING
+ GeometryTypeEnum.fromId(4) shouldBe MULTILINESTRING
+ GeometryTypeEnum.fromId(5) shouldBe POLYGON
+ GeometryTypeEnum.fromId(6) shouldBe MULTIPOLYGON
+ GeometryTypeEnum.fromId(7) shouldBe LINEARRING
+ GeometryTypeEnum.fromId(8) shouldBe GEOMETRYCOLLECTION
+ an[Error] should be thrownBy GeometryTypeEnum.fromId(9)
+ GeometryTypeEnum.groupOf(POINT) shouldBe POINT
+ GeometryTypeEnum.groupOf(MULTIPOINT) shouldBe POINT
+ GeometryTypeEnum.groupOf(LINESTRING) shouldBe LINESTRING
+ GeometryTypeEnum.groupOf(MULTILINESTRING) shouldBe LINESTRING
+ GeometryTypeEnum.groupOf(POLYGON) shouldBe POLYGON
+ GeometryTypeEnum.groupOf(MULTIPOLYGON) shouldBe POLYGON
+ GeometryTypeEnum.isFlat(POINT) shouldBe true
+ GeometryTypeEnum.isFlat(MULTIPOINT) shouldBe false
+ GeometryTypeEnum.isFlat(LINESTRING) shouldBe true
+ GeometryTypeEnum.isFlat(MULTILINESTRING) shouldBe false
+ GeometryTypeEnum.isFlat(POLYGON) shouldBe true
+ GeometryTypeEnum.isFlat(MULTIPOLYGON) shouldBe false
+ GeometryTypeEnum.isFlat(GEOMETRYCOLLECTION) shouldBe false
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/HexTypeTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/HexTypeTest.scala
new file mode 100644
index 000000000..ad115490f
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/HexTypeTest.scala
@@ -0,0 +1,15 @@
+package com.databricks.labs.mosaic.core.types
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class HexTypeTest extends AnyFunSuite with MockFactory {
+ test("HexType") {
+ val hexType = new HexType()
+ hexType.typeName should be ("struct")
+ hexType.simpleString should be ("HEX")
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/MosaicChipTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/MosaicChipTest.scala
new file mode 100644
index 000000000..95781f2a3
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/MosaicChipTest.scala
@@ -0,0 +1,51 @@
+package com.databricks.labs.mosaic.core.types
+import com.databricks.labs.mosaic.core.geometry.MosaicGeometry
+import com.databricks.labs.mosaic.core.index.IndexSystem
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.types.{BinaryType, LongType, StringType}
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class MosaicChipTest extends AnyFunSuite with MockFactory {
+ test("ChipType") {
+ val mockMosaicGeometry = mock[MosaicGeometry]
+ val mockIndexSystem = mock[IndexSystem]
+ mockIndexSystem.format _ expects 1L returning "1" anyNumberOfTimes()
+ mockIndexSystem.parse _ expects "1" returning 1L anyNumberOfTimes()
+ mockMosaicGeometry.toWKB _ expects() returning Array[Byte](1, 2, 3) anyNumberOfTimes()
+ MosaicChip(isCore = false, Left(1L), null).isEmpty shouldBe true
+ MosaicChip(isCore = true, Left(1L), null).isEmpty shouldBe false
+ mockIndexSystem.getCellIdDataType _ expects() returning LongType once()
+ MosaicChip(isCore = false, Left(1L), null).formatCellId(mockIndexSystem) shouldBe MosaicChip(isCore = false, Left(1L), null)
+ mockIndexSystem.getCellIdDataType _ expects() returning StringType once()
+ MosaicChip(isCore = false, Left(1L), null).formatCellId(mockIndexSystem) shouldBe MosaicChip(isCore = false, Right("1"), null)
+ mockIndexSystem.getCellIdDataType _ expects() returning LongType once()
+ MosaicChip(isCore = false, Right("1"), null).formatCellId(mockIndexSystem) shouldBe MosaicChip(isCore = false, Left(1L), null)
+ mockIndexSystem.getCellIdDataType _ expects() returning StringType once()
+ MosaicChip(isCore = false, Right("1"), null).formatCellId(mockIndexSystem) shouldBe MosaicChip(isCore = false, Right("1"), null)
+ mockIndexSystem.getCellIdDataType _ expects() returning BinaryType once()
+ an[IllegalArgumentException] should be thrownBy MosaicChip(isCore = false, Left(1L), null).formatCellId(mockIndexSystem)
+ MosaicChip(isCore = false, Left(1L), null).cellIdAsLong(mockIndexSystem) shouldBe 1L
+ MosaicChip(isCore = false, Right("1"), null).cellIdAsLong(mockIndexSystem) shouldBe 1L
+ MosaicChip(isCore = false, Left(1L), null).cellIdAsStr(mockIndexSystem) shouldBe "1"
+ MosaicChip(isCore = false, Right("1"), null).cellIdAsStr(mockIndexSystem) shouldBe "1"
+ MosaicChip(isCore = false, Left(1L), null).serialize shouldBe a[InternalRow]
+ MosaicChip(isCore = false, Right("1"), null).serialize shouldBe a[InternalRow]
+ MosaicChip(isCore = false, Left(1L), mockMosaicGeometry).serialize shouldBe a[InternalRow]
+ }
diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/MosaicTypeTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/MosaicTypeTest.scala
new file mode 100644
index 000000000..a6b7a0ca6
--- /dev/null
+++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/MosaicTypeTest.scala
@@ -0,0 +1,16 @@
+package com.databricks.labs.mosaic.core.types
+import org.apache.spark.sql.types.LongType
+import org.scalamock.scalatest.MockFactory
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers._
+class MosaicTypeTest extends AnyFunSuite with MockFactory {
+ test("MosaicType") {
+ val mosaicType = MosaicType(LongType)
+ mosaicType.typeName should be ("struct")
+ mosaicType.simpleString should be ("MOSAIC")
+ }
diff --git a/mosaic-index/.gitignore b/mosaic-index/.gitignore
new file mode 100644
index 000000000..5ff6309b7
--- /dev/null
+++ b/mosaic-index/.gitignore
@@ -0,0 +1,38 @@
+### IntelliJ IDEA ###
+### Eclipse ###
+### NetBeans ###
+### VS Code ###
+### Mac OS ###
\ No newline at end of file
diff --git a/mosaic-index/pom.xml b/mosaic-index/pom.xml
new file mode 100644
index 000000000..296c9aff8
--- /dev/null
+++ b/mosaic-index/pom.xml
@@ -0,0 +1,13 @@
+ 4.0.0
+ com.databricks.labs
+ mosaic
+ 0.4.1
+ mosaic-index
+ mosaic-index
diff --git a/mosaic-raster/pom.xml b/mosaic-raster/pom.xml
new file mode 100644
index 000000000..3fa7f639e
--- /dev/null
+++ b/mosaic-raster/pom.xml
@@ -0,0 +1,28 @@
+ com.databricks.labs
+ mosaic
+ 0.4.1
+ 4.0.0
+ mosaic-raster
+ mosaic-raster
+ jar
+ org.scala-lang
+ scala-library
diff --git a/mosaic-vector/pom.xml b/mosaic-vector/pom.xml
new file mode 100644
index 000000000..0ded74ad0
--- /dev/null
+++ b/mosaic-vector/pom.xml
@@ -0,0 +1,76 @@
+ com.databricks.labs
+ mosaic
+ 0.4.1
+ 4.0.0
+ mosaic-vector
+ mosaic-vector
+ jar
+ org.scala-lang
+ scala-library
+ ${project.groupId}
+ mosaic-core
+ ${project.version}
+ junit
+ junit
+ test
+ org.scalatest
+ scalatest_${scala.compat.version}
+ org.scalamock
+ scalamock_${scala.compat.version}
+ org.apache.spark
+ spark-core_${scala.compat.version}
+ test-jar
+ test
+ org.apache.spark
+ spark-sql_${scala.compat.version}
+ test-jar
+ test
+ org.apache.spark
+ spark-catalyst_${scala.compat.version}
+ test-jar
+ test
+ org.apache.spark
+ spark-core_${scala.compat.version}
+ provided
+ org.apache.spark
+ spark-sql_${scala.compat.version}
+ provided
diff --git a/pom.xml b/pom.xml
index 1d5344c09..6369c0a95 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,279 +1,131 @@
- ${mosaic.version}
+ pom
+ 0.4.1
+ mosaic-parent
+ 3.0
- 1.11
- 1.11
+ 1.8
+ 1.8
- org.scala-lang
- scala-library
- ${scala.version}
- junit
- junit
- 4.13.2
- test
- org.scalatest
- scalatest_${scala.compat.version}
- 3.2.14
- test
- org.scalamock
- scalamock_${scala.compat.version}
- 5.2.0
- test
- org.apache.spark
- spark-core_${scala.compat.version}
- ${spark.version}
- test-jar
- test
- org.apache.spark
- spark-sql_${scala.compat.version}
- ${spark.version}
- test-jar
- test
- org.apache.spark
- spark-catalyst_${scala.compat.version}
- ${spark.version}
- test-jar
- test
- org.apache.spark
- spark-core_${scala.compat.version}
- ${spark.version}
- provided
- org.apache.spark
- spark-sql_${scala.compat.version}
- ${spark.version}
- provided
- org.apache.spark
- spark-mllib_${scala.compat.version}
- ${spark.version}
- provided
- io.delta
- delta-core_${scala.compat.version}
- 2.1.0
- provided
- com.uber
- h3
- 3.7.0
- org.locationtech.jts
- jts-core
- 1.19.0
- org.locationtech.jts.io
- jts-io-common
- 1.19.0
- com.esri.geometry
- esri-geometry-api
- 2.2.4
- com.fasterxml.jackson.core
- jackson-core
- org.locationtech.proj4j
- proj4j-epsg
- 1.2.2
- org.locationtech.proj4j
- proj4j
- 1.2.2
- org.gdal
- gdal
- 3.4.0
+ org.scala-lang
+ scala-library
+ ${scala.version}
+ junit
+ junit
+ 4.13.2
+ test
+ org.scalatest
+ scalatest_${scala.compat.version}
+ 3.2.14
+ test
+ org.scalamock
+ scalamock_${scala.compat.version}
+ 5.2.0
+ test
+ org.apache.spark
+ spark-core_${scala.compat.version}
+ ${spark.version}
+ test-jar
+ test
+ org.apache.spark
+ spark-sql_${scala.compat.version}
+ ${spark.version}
+ test-jar
+ test
+ org.apache.spark
+ spark-catalyst_${scala.compat.version}
+ ${spark.version}
+ test-jar
+ test
+ org.apache.spark
+ spark-core_${scala.compat.version}
+ ${spark.version}
+ provided
+ org.apache.spark
+ spark-sql_${scala.compat.version}
+ ${spark.version}
+ provided
+ com.uber
+ h3
+ 3.7.0
+ org.locationtech.jts
+ jts-core
+ 1.19.0
+ org.locationtech.jts.io
+ jts-io-common
+ 1.19.0
+ org.locationtech.proj4j
+ proj4j-epsg
+ 1.2.2
+ org.locationtech.proj4j
+ proj4j
+ 1.2.2
+ org.gdal
+ gdal
+ 3.4.0
+ github
+ GitHub Packages
+ https://maven.pkg.github.com/databrickslabs/mosaic
- src/main/scala
- src/test/scala
- org.apache.maven.plugins
- maven-assembly-plugin
- 3.6.0
- jar-with-dependencies
- assemble-all
- package
- single
- net.alchim31.maven
- scala-maven-plugin
- 4.7.1
- compile
- testCompile
- -dependencyfile
- ${project.build.directory}/.scala_dependencies
- org.apache.maven.plugins
- maven-surefire-plugin
- 3.1.0
- true
- org.scalatest
- scalatest-maven-plugin
- 2.0.0
- ${project.build.directory}/test-reports
- test
- test
- org.scalastyle
- scalastyle-maven-plugin
- 1.0.0
- true
- true
- false
- false
- ${basedir}/src/main/scala
- ${basedir}/src/test/scala
- scalastyle-config.xml
- ${basedir}/target/scalastyle-output.xml
- ${encoding}
- ${encoding}
- check
- org.scoverage
- scoverage-maven-plugin
- 1.4.11
- scoverage-report
- package
- check
- report-only
- ${minimum.coverage}
- true
- ${scala.version}
- skipTests=false
- org.apache.maven.plugins
- maven-resources-plugin
- 3.0.2
- copy-files-on-build
- package
- copy-resources
- ${basedir}/python/mosaic/lib
- ${basedir}/target/
- *.jar
- false
@@ -288,15 +140,16 @@
- 0.3.11
+ ${version}
- github
- GitHub Packages
- https://maven.pkg.github.com/databrickslabs/mosaic
+ mosaic-core
+ mosaic-vector
+ mosaic-raster
+ mosaic-index