diff --git a/mosaic-core/pom.xml b/mosaic-core/pom.xml new file mode 100644 index 000000000..306930672 --- /dev/null +++ b/mosaic-core/pom.xml @@ -0,0 +1,193 @@ + + + + com.databricks.labs + mosaic + 0.4.1 + + 4.0.0 + + mosaic-core + mosaic-core + jar + + + + org.scala-lang + scala-library + + + + junit + junit + test + + + org.scalatest + scalatest_${scala.compat.version} + + + org.scalamock + scalamock_${scala.compat.version} + + + + org.apache.spark + spark-sql_${scala.compat.version} + + + org.apache.spark + spark-catalyst_${scala.compat.version} + test-jar + + + org.apache.spark + spark-core_${scala.compat.version} + test-jar + + + + + src/main/scala + src/test/scala + + + org.apache.maven.plugins + maven-assembly-plugin + 3.6.0 + + + jar-with-dependencies + + + + + assemble-all + package + + single + + + + + + + net.alchim31.maven + scala-maven-plugin + 4.7.1 + + + + + compile + testCompile + + + + -dependencyfile + ${project.build.directory}/.scala_dependencies + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.1.0 + + + true + + + + org.scalatest + scalatest-maven-plugin + 2.0.0 + + ${project.build.directory}/test-reports + + + + test + + test + + + + + + org.scalastyle + scalastyle-maven-plugin + 1.0.0 + + true + true + false + false + ${basedir}/src/main/scala + ${basedir}/src/test/scala + scalastyle-config.xml + ${basedir}/target/scalastyle-output.xml + ${encoding} + ${encoding} + + + + + check + + + + + + org.scoverage + scoverage-maven-plugin + 1.4.11 + + + scoverage-report + package + + check + report-only + + + + + 0 + true + ${scala.version} + skipTests=false + + + + org.apache.maven.plugins + maven-resources-plugin + 3.0.2 + + + copy-files-on-build + package + + copy-resources + + + ${basedir}/python/mosaic/lib + + + ${basedir}/target/ + *.jar + false + + + + + + + + + + + diff --git a/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.GeometryAPIRegister b/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.GeometryAPIRegister new file mode 100644 index 000000000..bbf455b40 --- /dev/null +++ b/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.GeometryAPIRegister @@ -0,0 +1 @@ +com.databricks.labs.mosaic.geometry.JTS \ No newline at end of file diff --git a/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.IndexSystemRegister b/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.IndexSystemRegister new file mode 100644 index 000000000..3a06aba56 --- /dev/null +++ b/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.IndexSystemRegister @@ -0,0 +1,3 @@ +com.databricks.labs.mosaic.index.H3 +com.databricks.labs.mosaic.index.BNG +com.databricks.labs.mosaic.index.CUSTOM \ No newline at end of file diff --git a/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.RasterAPIRegister b/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.RasterAPIRegister new file mode 100644 index 000000000..2585377bc --- /dev/null +++ b/mosaic-core/src/main/resources/META-INF/services/com.databricks.labs.mosaic.RasterAPIRegister @@ -0,0 +1 @@ +com.databricks.labs.mosaic.raster.GDAL \ No newline at end of file diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/GenericServiceFactory.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/GenericServiceFactory.scala new file mode 100644 index 000000000..bf65a6577 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/GenericServiceFactory.scala @@ -0,0 +1,69 @@ +package com.databricks.labs.mosaic.core + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.core.raster.RasterAPI +import com.databricks.labs.mosaic.core.util.ResourceUtils + +import scala.util.Try + +/** + * Generic service factory for loading implementations of [[com.databricks.labs.mosaic.core.geometry.api.GeometryAPI]], + * [[com.databricks.labs.mosaic.core.index.IndexSystem]] and [[com.databricks.labs.mosaic.core.raster.RasterAPI]]. + * This class implements the interaction with the META-INF/services directory. + * All the implementations are provided via the META-INF/services directory and are loaded at runtime. + */ +abstract class GenericServiceFactory[T](registerName: String) { + + private def fetchClasses: Seq[Class[_]] = { + ResourceUtils.readResourceLines(s"/META-INF/services/$registerName") + .map(name => Try(Class.forName(name))) + .filter(_.isSuccess) + .map(_.get) + .toSeq + } + + def getService(name: String, params: Array[Object] = Array.empty): T = { + val classes = fetchClasses + val instance = classes + .map(clazz => Try(clazz.getConstructor(params.map(_.getClass): _*))) + .map(_.map(_.newInstance(params: _*).asInstanceOf[T])) + .filter(_.isSuccess) + .map(_.get) + .headOption + + instance.getOrElse( + throw new IllegalArgumentException(s"Unable to find service with name $name") + ) + } + +} + +/** + * This object contains the actual factory instances for [[com.databricks.labs.mosaic.core.geometry.api.GeometryAPI]], + * [[com.databricks.labs.mosaic.core.index.IndexSystem]] and [[com.databricks.labs.mosaic.core.raster.RasterAPI]]. + */ +object GenericServiceFactory { + + object GeometryAPIFactory + extends GenericServiceFactory[GeometryAPI](registerName = "com.databricks.labs.mosaic.GeometryAPIRegister") { + def getGeometryAPI(name: String, params: Array[Object] = Array.empty): GeometryAPI = { + getService(name, params) + } + } + + object IndexSystemFactory + extends GenericServiceFactory[IndexSystem](registerName = "com.databricks.labs.mosaic.IndexSystemRegister") { + def getIndexSystem(name: String, params: Array[Object] = Array.empty): IndexSystem = { + getService(name, params) + } + } + + object RasterAPIFactory + extends GenericServiceFactory[RasterAPI](registerName = "com.databricks.labs.mosaic.RasterAPIRegister") { + def getRasterAPI(name: String, params: Array[Object] = Array.empty): RasterAPI = { + getService(name, params) + } + } + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/Mosaic.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/Mosaic.scala new file mode 100644 index 000000000..41e7d25ab --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/Mosaic.scala @@ -0,0 +1,286 @@ +package com.databricks.labs.mosaic.core + +import com.databricks.labs.mosaic.core.geometry._ +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.core.types.GeometryTypeEnum._ +import com.databricks.labs.mosaic.core.types._ + +import scala.annotation.tailrec + +/** + * Single abstracted logic for mosaic fill via [[IndexSystem]]. [[IndexSystem]] + * is in charge of implementing the individual steps of the logic. + */ +object Mosaic { + + /** + * This method is used to fill a geometry with a given resolution. + * + * @param geometry The geometry to fill with chips. + * @param resolution The resolution to fill the geometry with. + * @param keepCoreGeom Whether or not to keep the core geometry. + * @param indexSystem The index system to use for filling the geometry. + * @param geometryAPI The geometry API to use for manipulating the geometry. + * @return A sequence of [[MosaicChip]]s. + */ + def getChips( + geometry: MosaicGeometry, + resolution: Int, + keepCoreGeom: Boolean, + indexSystem: IndexSystem, + geometryAPI: GeometryAPI + ): Seq[MosaicChip] = { + GeometryTypeEnum.fromString(geometry.getGeometryType) match { + case POINT => pointChip(geometry, resolution, keepCoreGeom, indexSystem) + case MULTIPOINT => multiPointChips(geometry, resolution, keepCoreGeom, indexSystem) + case LINESTRING => lineFill(geometry, resolution, indexSystem, geometryAPI) + case MULTILINESTRING => lineFill(geometry, resolution, indexSystem, geometryAPI) + case _ => mosaicFill(geometry, resolution, keepCoreGeom, indexSystem, geometryAPI) + } + } + + /** + * This method is used to fill a geometry with a given resolution. + * This method is designed to be used with a [[MosaicMultiPoint]]. + * + * @param geometry The geometry to fill with chips. + * @param resolution The resolution to fill the geometry with. + * @param keepCoreGeom Whether or not to keep the core geometry. + * @param indexSystem The index system to use for filling the geometry. + * @return A sequence of [[MosaicChip]]s. + */ + def multiPointChips( + geometry: MosaicGeometry, + resolution: Int, + keepCoreGeom: Boolean, + indexSystem: IndexSystem + ): Seq[MosaicChip] = { + val points = geometry.asInstanceOf[MosaicMultiPoint].asSeq + points.flatMap(point => pointChip(point, resolution, keepCoreGeom, indexSystem)) + } + + /** + * This method is used to fill a geometry with a given resolution. + * This method is designed to be used with a [[MosaicPoint]]. + * + * @param geometry The geometry to fill with chips. + * @param resolution The resolution to fill the geometry with. + * @param keepCoreGeom Whether or not to keep the core geometry. + * @param indexSystem The index system to use for filling the geometry. + * @return A sequence of [[MosaicChip]]s. + */ + def pointChip( + geometry: MosaicGeometry, + resolution: Int, + keepCoreGeom: Boolean, + indexSystem: IndexSystem + ): Seq[MosaicChip] = { + val point = geometry.asInstanceOf[MosaicPoint] + val chipGeom = if (keepCoreGeom) point else null + val cellId = indexSystem.pointToIndex(point.getX, point.getY, resolution) + val chip = MosaicChip(isCore = false, Left(cellId), chipGeom) + Seq(chip.formatCellId(indexSystem)) + } + + /** + * This method is used to fill a geometry with a given resolution. + * This method is designed to be used with a [[MosaicPolygon]], [[MosaicMultiPolygon]] and [[MosaicGeometryCollection]]. + * + * @param geometry The geometry to fill with chips. + * @param resolution The resolution to fill the geometry with. + * @param indexSystem The index system to use for filling the geometry. + * @param geometryAPI The geometry API to use for manipulating the geometry. + * @return A sequence of [[MosaicChip]]s. + */ + def mosaicFill( + geometry: MosaicGeometry, + resolution: Int, + keepCoreGeom: Boolean, + indexSystem: IndexSystem, + geometryAPI: GeometryAPI + ): Seq[MosaicChip] = { + + val radius = indexSystem.getBufferRadius(geometry, resolution, geometryAPI) + + // do not modify the radius + val carvedGeometry = geometry.buffer(-radius) + // add 1% to the radius to ensure union of carved and border geometries does not have holes inside the original geometry areas + val borderGeometry = + if (carvedGeometry.isEmpty) { + geometry.buffer(radius * 1.01).simplify(0.01 * radius) + } else { + geometry.boundary.buffer(radius * 1.01).simplify(0.01 * radius) + } + + val coreIndices = indexSystem.polyfill(carvedGeometry, resolution, Some(geometryAPI)) + val borderIndices = indexSystem.polyfill(borderGeometry, resolution, Some(geometryAPI)).diff(coreIndices) + + val coreChips = indexSystem.getCoreChips(coreIndices, keepCoreGeom, geometryAPI) + val borderChips = indexSystem.getBorderChips(geometry, borderIndices, keepCoreGeom, geometryAPI) + + coreChips ++ borderChips + } + + /** + * This method is used to decompose a [[MosaicLineString]] or [[MosaicMultiLineString]] into a sequence of [[MosaicChip]]s. + * + * @param geometry The line to decompose. + * @param resolution The resolution to decompose the line with. + * @param indexSystem The index system to use for decomposing the line. + * @param geometryAPI The geometry API to use for manipulating the geometry. + * @return A sequence of [[MosaicChip]]s. + */ + def lineFill(geometry: MosaicGeometry, resolution: Int, indexSystem: IndexSystem, geometryAPI: GeometryAPI): Seq[MosaicChip] = { + GeometryTypeEnum.fromString(geometry.getGeometryType) match { + case LINESTRING => lineDecompose(geometry.asInstanceOf[MosaicLineString], resolution, indexSystem, geometryAPI) + case MULTILINESTRING => + val multiLine = geometry.asInstanceOf[MosaicMultiLineString] + multiLine.flatten.flatMap(line => lineDecompose(line.asInstanceOf[MosaicLineString], resolution, indexSystem, geometryAPI)) + case gt => throw new Error(s"$gt not supported for line fill/decompose operation.") + } + } + + /** + * @param geometry + * Geometry to get k ring cells for. + * @param resolution + * Resolution of the cells to get. + * @param indexSystem + * Index system to use. + * @param geometryAPI + * Geometry API to use. + * @return + * A set of k ring cells for the geometry. + */ + //noinspection DuplicatedCode + def geometryKRing(geometry: MosaicGeometry, resolution: Int, k: Int, indexSystem: IndexSystem, geometryAPI: GeometryAPI): Set[Long] = { + val (coreCells, borderCells) = getCellSets(geometry, resolution, indexSystem, geometryAPI) + val borderKRing = borderCells.flatMap(indexSystem.kRing(_, k)) + val kRing = coreCells ++ borderKRing + kRing + } + + /** + * @param geometry + * Geometry to get k loop around + * @param resolution + * Resolution of the cells + * @param indexSystem + * Index system to use + * @param geometryAPI + * Geometry API to use + * @return + * Set of cells that form a k loop around geometry + */ + //noinspection DuplicatedCode + def geometryKLoop(geometry: MosaicGeometry, resolution: Int, k: Int, indexSystem: IndexSystem, geometryAPI: GeometryAPI): Set[Long] = { + val n: Int = k - 1 + // This would be much more efficient if we could use the + // pre-computed tessellation of the geometry for repeated calls. + val (coreCells, borderCells) = getCellSets(geometry, resolution, indexSystem, geometryAPI) + + // We use nRing as naming for kRing where k = n + val borderNRing = borderCells.flatMap(indexSystem.kRing(_, n)) + val nRing = coreCells ++ borderNRing + + val borderKLoop = borderCells.flatMap(indexSystem.kLoop(_, k)) + + val kLoop = borderKLoop -- nRing + kLoop + } + + /** + * This method is used to decompose a [[MosaicLineString]] into a sequence of [[MosaicChip]]s. + * Note that this method only works on [[MosaicLineString]]s. + * For [[MosaicMultiLineString]]s, use [[lineFill]]. + * + * @param line The line to decompose. + * @param resolution The resolution to decompose the line with. + * @param indexSystem The index system to use for decomposing the line. + * @param geometryAPI The geometry API to use for manipulating the geometry. + * @return A sequence of [[MosaicChip]]s. + */ + private def lineDecompose( + line: MosaicLineString, + resolution: Int, + indexSystem: IndexSystem, + geometryAPI: GeometryAPI + ): Seq[MosaicChip] = { + val start = line.getShells.head.asSeq.head + val startIndex = indexSystem.pointToIndex(start.getX, start.getY, resolution) + + @tailrec + def traverseLine( + line: MosaicLineString, + queue: Seq[Long], + traversed: Set[Long], + chips: Seq[MosaicChip] + ): Seq[MosaicChip] = { + val newTraversed = traversed ++ queue + val (newQueue, newChips) = queue.foldLeft( + (Seq.empty[Long], chips) + )((accumulator: (Seq[Long], Seq[MosaicChip]), current: Long) => { + val indexGeom = indexSystem.indexToGeometry(current, geometryAPI) + val lineSegment = line.intersection(indexGeom) + if (!lineSegment.isEmpty) { + val chip = MosaicChip(isCore = false, Left(current), lineSegment) + val kRing = indexSystem.kRing(current, 1) + + // Ignore already processed chips and those which are already in the + // queue to be processed + val toQueue = kRing.filterNot((newTraversed ++ accumulator._1).contains) + (accumulator._1 ++ toQueue, accumulator._2 ++ Seq(chip)) + } else if (newTraversed.size == 1) { + // The line segment intersection was empty, but we only intersected the first point + // with a single cell. + // We need to run an intersection with a first ring because the starting point might be laying + // exactly on the cell boundary. + val kRing = indexSystem.kRing(current, 1) + val toQueue = kRing.filterNot(newTraversed.contains) + (toQueue, accumulator._2) + } else { + accumulator + } + }) + if (newQueue.isEmpty) { + newChips + } else { + traverseLine(line, newQueue, newTraversed, newChips) + } + } + + val result = traverseLine(line, Seq(startIndex), Set.empty[Long], Seq.empty[MosaicChip]) + result + } + + /** + * Returns core cells and border cells as a sets of Longs. The + * implementation currently depends on [[getChips()]] method. + * + * @param geometry + * Geometry to fill with cells. + * @param resolution + * Resolution of the cells. + * @param indexSystem + * Index system to use. + * @param geometryAPI + * Geometry API to use. + * @return + * Tuple of core cells and border cells. + */ + private def getCellSets( + geometry: MosaicGeometry, + resolution: Int, + indexSystem: IndexSystem, + geometryAPI: GeometryAPI + ): (Set[Long], Set[Long]) = { + val chips = Mosaic.getChips(geometry, resolution, keepCoreGeom = false, indexSystem, geometryAPI) + val (coreChips, borderChips) = chips.partition(_.isCore) + + val coreCells = coreChips.map(_.cellIdAsLong(indexSystem)).toSet + val borderCells = borderChips.map(_.cellIdAsLong(indexSystem)).toSet + (coreCells, borderCells) + } + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/MosaicException.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/MosaicException.scala new file mode 100644 index 000000000..f16a87bcc --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/MosaicException.scala @@ -0,0 +1,12 @@ +package com.databricks.labs.mosaic.core + +object MosaicException { + + def GeometryEncodingNotSupported(supportedGeometryEncodings: Seq[String], suppliedGeometryEncoding: String): Exception = + new Exception( + s"This expression only supports geometries encoded as ${supportedGeometryEncodings.mkString(",")}." + + s"$suppliedGeometryEncoding was supplied as input." + ) + + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/ConvertToCodeGen.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/ConvertToCodeGen.scala new file mode 100644 index 000000000..02988c428 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/ConvertToCodeGen.scala @@ -0,0 +1,132 @@ +package com.databricks.labs.mosaic.core.codegen.format + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.types._ +import org.apache.spark.sql.catalyst.expressions.codegen._ +import org.apache.spark.sql.types._ + +/** + * This class is used to generate CodeGen for converting between different geometry formats. + */ +object ConvertToCodeGen { + + /** + * This method generates code to construct a geometry from the input format. + * Then the method generates code to write the geometry to the output format. + * There is currently no support for conversion without constructing a geometry. + * + * @param ctx CodegenContext used for code generation. + * @param ev ExprCode that will store the reference to the output. + * @param eval Reference to the input. + * @param inputDataType DataType of the input. + * @param outputDataTypeName Name of the output DataType. + * @param geometryAPI GeometryAPI used to manipulate the geometry. + * @return Code to construct the geometry from the input format and write the geometry to the output format. + */ + def fromEval( + ctx: CodegenContext, + ev: ExprCode, + eval: String, + inputDataType: DataType, + outputDataTypeName: String, + geometryAPI: GeometryAPI + ): String = { + if (inputDataType.simpleString == outputDataTypeName) { + s""" + |${ev.value} = $eval; + |""".stripMargin + } else { + val (inCode, geomInRef) = readGeometryCode(ctx, eval, inputDataType, geometryAPI) + val (outCode, geomOutRef) = writeGeometryCode(ctx, geomInRef, outputDataTypeName, geometryAPI) + geometryAPI.codeGenTryWrap( + s""" + |$inCode + |$outCode + |${ev.value} = $geomOutRef; + |""".stripMargin) + } + } + + /** + * This method executes the actual code generation. + * We need this nesting to allow for testing through scalamock. + * + * @param ctx CodegenContext used for code generation. + * @param ev ExprCode that will store the reference to the output. + * @param nullSafeCodeGen Code to generate the output. + * @param inputDataType DataType of the input. + * @param outputDataTypeName Name of the output DataType. + * @param geometryAPI GeometryAPI used to manipulate the geometry. + * @return Code to construct the geometry from the input format and write the geometry to the output format. + */ + def doCodeGen( + ctx: CodegenContext, + ev: ExprCode, + nullSafeCodeGen: (CodegenContext, ExprCode, String => String) => ExprCode, + inputDataType: DataType, + outputDataTypeName: String, + geometryAPI: GeometryAPI + ): ExprCode = { + nullSafeCodeGen( + ctx, + ev, + eval => fromEval(ctx, ev, eval, inputDataType, outputDataTypeName, geometryAPI) + ) + } + + /** + * This method generates code to read the geometry from the input format. + * + * @param ctx CodegenContext used for code generation. + * @param eval Reference to the input. + * @param inputDataType DataType of the input. + * @param geometryAPI GeometryAPI used to manipulate the geometry. + * @return Code to construct the geometry from the input format. + */ + def readGeometryCode(ctx: CodegenContext, eval: String, inputDataType: DataType, geometryAPI: GeometryAPI): (String, String) = { + val geometryCodeGen = geometryAPI.ioCodeGen + inputDataType match { + case BinaryType => geometryCodeGen.fromWKB(ctx, eval, geometryAPI) + case StringType => geometryCodeGen.fromWKT(ctx, eval, geometryAPI) + case HexType => geometryCodeGen.fromHex(ctx, eval, geometryAPI) + case GeoJSONType => geometryCodeGen.fromGeoJSON(ctx, eval, geometryAPI) + case _ => throw new Error(s"Geometry API unsupported: ${inputDataType.typeName}.") + } + } + + /** + * This method generates code to write the geometry to the output format. + * + * @param ctx CodegenContext used for code generation. + * @param eval Reference to the input. + * @param outputDataType DataType of the output. + * @param geometryAPI GeometryAPI used to manipulate the geometry. + * @return Code to write the geometry to the output format. + */ + def writeGeometryCode(ctx: CodegenContext, eval: String, outputDataType: DataType, geometryAPI: GeometryAPI): (String, String) = { + val outDataFormat = GeometryFormat.getDefaultFormat(outputDataType) + writeGeometryCode(ctx, eval, outDataFormat, geometryAPI) + } + + /** + * This method generates code to write the geometry to the output format. + * + * @param ctx CodegenContext used for code generation. + * @param eval Reference to the input. + * @param outputDataFormatName Name of the output format. + * @param geometryAPI GeometryAPI used to manipulate the geometry. + * @return Code to write the geometry to the output format. + */ + def writeGeometryCode(ctx: CodegenContext, eval: String, outputDataFormatName: String, geometryAPI: GeometryAPI): (String, String) = { + val geometryCodeGen = geometryAPI.ioCodeGen + + outputDataFormatName match { + case "WKB" => geometryCodeGen.toWKB(ctx, eval, geometryAPI) + case "WKT" => geometryCodeGen.toWKT(ctx, eval, geometryAPI) + case "HEX" => geometryCodeGen.toHEX(ctx, eval, geometryAPI) + case "GEOJSON" => geometryCodeGen.toGeoJSON(ctx, eval, geometryAPI) + case _ => throw new Error(s"Data type unsupported: $outputDataFormatName.") + } + } + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryFormat.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryFormat.scala new file mode 100644 index 000000000..2437daf90 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryFormat.scala @@ -0,0 +1,24 @@ +package com.databricks.labs.mosaic.core.codegen.format + +import com.databricks.labs.mosaic.core.types._ +import org.apache.spark.sql.types._ + +/** Utility object for handling default formats for data types */ +object GeometryFormat { + + /** + * Get the default format for a given data type. + * + * @param outputDataType The data type to get the default format for. + * @return The default format for the given data type. + */ + def getDefaultFormat(outputDataType: DataType): String = { + outputDataType match { + case BinaryType => "WKB" + case StringType => "WKT" + case HexType => "HEX" + case GeoJSONType => "GEOJSON" + case _ => throw new Error(s"Unsupported data type ${outputDataType.typeName}.") + } + } +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryIOCodeGen.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryIOCodeGen.scala new file mode 100644 index 000000000..54324e591 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryIOCodeGen.scala @@ -0,0 +1,29 @@ +package com.databricks.labs.mosaic.core.codegen.format + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext + +/** + * GeometryIOCodeGen is a trait that defines the interface for generating code for the various geometry formats. + * To support a new format toFormat and fromFormat methods need to be added to this trait. + * This is the IO CodeGen contract for all Geometry implementations. + */ +trait GeometryIOCodeGen { + + def fromWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) + + def fromWKB(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) + + def fromGeoJSON(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) + + def fromHex(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) + + def toWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) + + def toWKB(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) + + def toGeoJSON(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) + + def toHEX(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/crs/CRSBounds.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/crs/CRSBounds.scala new file mode 100644 index 000000000..b91688b2b --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/crs/CRSBounds.scala @@ -0,0 +1,47 @@ +package com.databricks.labs.mosaic.core.crs + +import com.databricks.labs.mosaic.core.geometry.MosaicPoint +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI + +/** + * CRSBounds captures lower left and upper right extreme points for a given + * CRS. Extreme points are provided as MosaicPoints. The CRSBounds instances + * are constructed via geometry API. + * + * @param lowerLeft + * Lower left extreme point (xmin, ymin). + * @param upperRight + * Upper right extreme point (xmax, ymax). + */ +case class CRSBounds(lowerLeft: MosaicPoint, upperRight: MosaicPoint) { + def getLowerX: Double = lowerLeft.getX + + def getLowerY: Double = lowerLeft.getY + + def getUpperX: Double = upperRight.getX + + def getUpperY: Double = upperRight.getY +} + +object CRSBounds { + + /** + * Construct CRSBounds instance for give extreme coordinate values. + * Construction is bound for the selected geometry API at runtime. + * + * @param geometryAPI + * Geometry API attached to Mosaic Context. + * @param x1 + * Minimum x coordinate value. + * @param y1 + * Minimum y coordinate value. + * @param x2 + * Maximum x coordinate value. + * @param y2 + * Maximum y coordinate value. + * @return + */ + def apply(geometryAPI: GeometryAPI, x1: Double, y1: Double, x2: Double, y2: Double): CRSBounds = { + CRSBounds(geometryAPI.fromCoords(Seq(x1, y1)), geometryAPI.fromCoords(Seq(x2, y2))) + } +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/crs/CRSBoundsProvider.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/crs/CRSBoundsProvider.scala new file mode 100644 index 000000000..e78983c82 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/crs/CRSBoundsProvider.scala @@ -0,0 +1,87 @@ +package com.databricks.labs.mosaic.core.crs + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import java.io.InputStream + +/** + * CRSBoundsProvider provides APIs to get bounds extreme points based on CRS + * dataset name (ie. EPSG) and CRS id (ie. 4326). The lookup is not exhaustive + * and it is generated based on a resource file. Resource file is sourced based + * on spatial reference.org. Not all CRSs available at spatial reference.org have + * bounds specified. Those are skipped in the resource file. + * + * @see + * https://spatialreference.org/ + * @param lookup + * A map of (crs_dataset, id) -> (bounds, reprojected_bounds) pairs. + */ +case class CRSBoundsProvider(private val lookup: Map[(String, Int), (CRSBounds, CRSBounds)]) { + + /** + * Returns bounds for provided CRS dataset and ID pair. Bounds are provided + * as (longitude, latitude) points. Only lower left and upper right points + * are supplied since they correspond to xmin, ymin and xmax and ymax + * extremes. + * + * @param dataset + * CRS dataset, e.g. EPSG. + * @param id + * CRS id within the CRS dataset, e.g. 4326. + * @return + * an instance of [[CRSBounds]] corresponding to supplied (crs_dataset, + * id) pair. + */ + def bounds(dataset: String, id: Int): CRSBounds = { + require(lookup.contains((dataset, id)), s"Requested CRS does not have boundaries defined: ${(dataset, id)}") + lookup((dataset, id))._1 + } + + /** + * Returns reprojected bounds for provided CRS dataset and ID pair. Bounds + * are provided as (longitude, latitude) points equivalents. Only lower + * left and upper right points are supplied since they correspond to xmin, + * ymin and xmax and ymax extremes. + * + * @param dataset + * CRS dataset, e.g. EPSG. + * @param id + * CRS id within the CRS dataset, e.g. 27700. + * @return + * an instance of [[CRSBounds]] corresponding to supplied (crs_dataset, + * id) pair. + */ + def reprojectedBounds(dataset: String, id: Int): CRSBounds = { + require(lookup.contains((dataset, id)), s"Requested CRS does not have boundaries defined: ${(dataset, id)}") + lookup((dataset, id))._2 + } + +} + +object CRSBoundsProvider { + + /** + * Creates an instance of [[CRSBoundsProvider]] based on a resource file + * containing the bounds' lower left and upper right extreme points. The + * lookup contains longitude and latitude bounds and reprojected equivalent + * values. The bounds values have been sourced from spatial reference.org. + * + * @see + * https://spatialreference.org/ + */ + def apply(geometryAPI: GeometryAPI): CRSBoundsProvider = { + val stream: InputStream = getClass.getResourceAsStream("/CRSBounds.csv") + val lines: List[String] = scala.io.Source.fromInputStream(stream).getLines.toList.drop(1) + val lookupItems = lines + .map(line => { + val lineItems = line.split(",") + val nameItems = lineItems(0).split(":") + val (crsDataset, id) = (nameItems(0), nameItems(1).toInt) + val (x1, y1, x2, y2) = (lineItems(1).toDouble, lineItems(2).toDouble, lineItems(3).toDouble, lineItems(4).toDouble) + val (x3, y3, x4, y4) = (lineItems(5).toDouble, lineItems(6).toDouble, lineItems(7).toDouble, lineItems(8).toDouble) + (crsDataset, id) -> (CRSBounds(geometryAPI, x1, y1, x2, y2), CRSBounds(geometryAPI, x3, y3, x4, y4)) + }) + val lookup = lookupItems.toMap + CRSBoundsProvider(lookup) + } + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/GenericExpressionFactory.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/GenericExpressionFactory.scala new file mode 100644 index 000000000..0f7479cc8 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/GenericExpressionFactory.scala @@ -0,0 +1,94 @@ +package com.databricks.labs.mosaic.core.expressions + +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder +import org.apache.spark.sql.catalyst.expressions.Expression + +import scala.reflect.ClassTag +import scala.util.Try + +/** + * GenericExpressionFactory is a factory that creates a function builder for a + * given expression. It handles the boilerplate code needed to create a + * function builder for a given expression. The functions reflect the types and + * identify the correct constructors to be used. + */ +object GenericExpressionFactory { + + /** + * Implements the makeCopy in a generic way so we dont need to repeat the + * same code over and over again. + * @param toCopy + * The expression to copy. + * @param newArgs + * The new arguments for the expression. + * @param nChildren + * The number of children expressions the expression has in the logical + * tree. + * @param expressionConfig + * Additional arguments for the expression (expressionConfigs). + * @tparam T + * The type of the expression. + * @return + * A copy of the expression. + */ + def makeCopyImpl[T <: Expression: ClassTag]( + toCopy: Expression, + newArgs: Array[AnyRef], + nChildren: Int, + expressionConfig: MosaicExpressionConfig + ): Expression = { + val newInstance = construct[T](newArgs.take(nChildren).map(_.asInstanceOf[Expression]), expressionConfig) + newInstance.copyTagsFrom(toCopy) + newInstance + } + + /** + * Constructs an expression with the given arguments. It identifies the + * correct constructor to be used. + * @param args + * The arguments for the expression. + * @param expressionConfig + * Additional arguments for the expression (expressionConfigs). + * @tparam T + * The type of the expression. + * @return + * An instance of the expression. + */ + def construct[T <: Expression: ClassTag](args: Array[_ <: Expression], expressionConfig: MosaicExpressionConfig): Expression = { + val clazz = implicitly[ClassTag[T]].runtimeClass + val allArgs = args ++ Seq(expressionConfig) + val constructors = clazz.getConstructors + + constructors + .map(constructor => + Try { + val argClasses = constructor.getParameterTypes + val castedArgs = allArgs + .take(argClasses.length) + .zip(argClasses) + .map { case (arg, tpe) => tpe.cast(arg) } + .toSeq + .asInstanceOf[Seq[AnyRef]] + constructor.newInstance(castedArgs: _*) + } + ) + .filter(_.isSuccess) + .head + .get + .asInstanceOf[Expression] + } + + /** + * Creates a function builder for a given expression. It identifies the + * correct constructor to be used. + * @param expressionConfig + * Additional arguments for the expression (expressionConfigs). + * @tparam T + * The type of the expression. + * @return + * A function builder for the expression. + */ + def getBaseBuilder[T <: Expression: ClassTag](nChildren: Int, expressionConfig: MosaicExpressionConfig): FunctionBuilder = + (children: Seq[Expression]) => GenericExpressionFactory.construct[T](children.take(nChildren).toArray, expressionConfig) + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/MosaicExpressionConfig.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/MosaicExpressionConfig.scala new file mode 100644 index 000000000..0184566f4 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/MosaicExpressionConfig.scala @@ -0,0 +1,75 @@ +package com.databricks.labs.mosaic.core.expressions + +import com.databricks.labs.mosaic.core.GenericServiceFactory.{GeometryAPIFactory, IndexSystemFactory, RasterAPIFactory} +import com.databricks.labs.mosaic.core._ +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.core.raster.RasterAPI +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.types.DataType + +/** + * Mosaic Expression Config is a class that contains the configuration for the + * Mosaic Expression. Singleton objects are not accessible outside the JVM, so + * this is the mechanism to allow for shared context. This is used to control + * for the Mosaic runtime APIs and checkpoint locations. + * + * @param configs + * The configuration map for the Mosaic Expression. + */ +case class MosaicExpressionConfig(configs: Map[String, String]) { + + def updateSparkConf(): Unit = { + val spark = SparkSession.builder().getOrCreate() + val sparkConf = spark.sparkContext.getConf + configs.foreach { case (k, v) => sparkConf.set(k, v) } + } + + def getGeometryAPI(params: Array[Object] = Array.empty): GeometryAPI = GeometryAPIFactory.getGeometryAPI(configs(MOSAIC_GEOMETRY_API), params) + + def getIndexSystem(params: Array[Object] = Array.empty): IndexSystem = IndexSystemFactory.getIndexSystem(configs(MOSAIC_INDEX_SYSTEM), params) + + def getRasterAPI(params: Array[Object] = Array.empty): RasterAPI = RasterAPIFactory.getRasterAPI(configs(MOSAIC_RASTER_API), params) + + def getRasterCheckpoint: String = configs.getOrElse(MOSAIC_RASTER_CHECKPOINT, MOSAIC_RASTER_CHECKPOINT_DEFAULT) + + def getCellIdType: DataType = getIndexSystem().cellIdType + + def setGeometryAPI(api: String): MosaicExpressionConfig = { + MosaicExpressionConfig(configs + (MOSAIC_GEOMETRY_API -> api)) + } + + def setIndexSystem(system: String): MosaicExpressionConfig = { + MosaicExpressionConfig(configs + (MOSAIC_INDEX_SYSTEM -> system)) + } + + def setRasterAPI(api: String): MosaicExpressionConfig = { + MosaicExpressionConfig(configs + (MOSAIC_RASTER_API -> api)) + } + + def setRasterCheckpoint(checkpoint: String): MosaicExpressionConfig = { + MosaicExpressionConfig(configs + (MOSAIC_RASTER_CHECKPOINT -> checkpoint)) + } + + def setConfig(key: String, value: String): MosaicExpressionConfig = { + MosaicExpressionConfig(configs + (key -> value)) + } + +} + +/** + * Companion object for the Mosaic Expression Config. Provides constructors + * from spark session configuration. + */ +object MosaicExpressionConfig { + + def apply(spark: SparkSession): MosaicExpressionConfig = { + val expressionConfig = new MosaicExpressionConfig(Map.empty[String, String]) + expressionConfig + .setGeometryAPI(spark.conf.get(MOSAIC_GEOMETRY_API)) + .setIndexSystem(spark.conf.get(MOSAIC_INDEX_SYSTEM)) + .setRasterAPI(spark.conf.get(MOSAIC_RASTER_API)) + .setRasterCheckpoint(spark.conf.get(MOSAIC_RASTER_CHECKPOINT, MOSAIC_RASTER_CHECKPOINT_DEFAULT)) + } + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/WithExpressionInfo.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/WithExpressionInfo.scala new file mode 100644 index 000000000..02f8b32b1 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/WithExpressionInfo.scala @@ -0,0 +1,56 @@ +package com.databricks.labs.mosaic.core.expressions + +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo} + +import scala.reflect.ClassTag + +/** + * WithExpressionInfo is a trait that defines the interface for adding + * expression to spark SQL. Any expression that needs to be added to spark SQL + * should extend this trait. + */ +trait WithExpressionInfo { + + def name: String + + def database: Option[String] = None + + def usage: String = "" + + def example: String = "" + + def group: String = "misc_funcs" + + /** + * Returns the expression builder (parser for spark SQL). + * @return + * An expression builder. + */ + def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder + + /** + * Returns the expression info for the expression based on the expression's + * type. Simplifies the amount of boilerplate code needed to add an + * expression to spark SQL. + * @return + * An ExpressionInfo object. + */ + final def getExpressionInfo[T <: Expression: ClassTag](database: Option[String] = None): ExpressionInfo = { + val clazz = implicitly[ClassTag[T]].runtimeClass + new ExpressionInfo( + clazz.getCanonicalName, + database.getOrElse(this.database.orNull), + name, + usage, + "", + example, + "", + group, + "1.0", + "", + "built-in" + ) + } + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/BinaryVectorExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/BinaryVectorExpression.scala new file mode 100644 index 000000000..e1e85d28f --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/BinaryVectorExpression.scala @@ -0,0 +1,132 @@ +package com.databricks.labs.mosaic.core.expressions.geometry + +import com.databricks.labs.mosaic.core.codegen.format.ConvertToCodeGen +import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig} +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression, NullIntolerant} + +import scala.reflect.ClassTag + +/** + * Base class for all unary geometry expressions. It provides the boilerplate + * for creating a function builder for a given expression. It minimises amount + * of code needed to create a new expression. + * + * @param leftGeometryExpr + * The expression for the left/first geometry. + * @param rightGeometryExpr + * The expression for the right/second geometry. + * @param returnsGeometry + * Whether the expression returns a geometry or not. + * @param expressionConfig + * Additional arguments for the expression (expressionConfigs). + * @tparam T + * The type of the extending class. + */ +abstract class BinaryVectorExpression[T <: Expression: ClassTag]( + leftGeometryExpr: Expression, + rightGeometryExpr: Expression, + returnsGeometry: Boolean, + expressionConfig: MosaicExpressionConfig +) extends BinaryExpression + with VectorExpression + with NullIntolerant + with Serializable { + + override def left: Expression = leftGeometryExpr + + override def right: Expression = rightGeometryExpr + + override def geometryAPI: GeometryAPI = expressionConfig.getGeometryAPI() + + /** + * The function to be overriden by the extending class. It is called when + * the expression is evaluated. It provides the vector geometries to the + * expression. It abstracts spark serialization from the caller. + * @param leftGeometry + * The left/first geometry. + * @param rightGeometry + * The right/second geometry. + * @return + * A result of the expression. + */ + def geometryTransform(leftGeometry: MosaicGeometry, rightGeometry: MosaicGeometry): Any + + /** + * Evaluation of the expression. It evaluates the geometry and deserialises + * the geometry. + * + * @param leftGeometryRow + * The row containing the left/first geometry. + * @param rightGeometryRow + * The row containing the right/second geometry. + * @return + * The result of the expression. + */ + //noinspection DuplicatedCode + override def nullSafeEval(leftGeometryRow: Any, rightGeometryRow: Any): Any = { + val leftGeometry = geometryAPI.valueToGeometry(leftGeometryRow, leftGeometryExpr.dataType) + val rightGeometry = geometryAPI.valueToGeometry(rightGeometryRow, rightGeometryExpr.dataType) + val result = geometryTransform(leftGeometry, rightGeometry) + serialise(result, returnsGeometry, leftGeometryExpr.dataType) + } + + /** + * The function to be overriden by the extending class. It is called when + * the expression codegen is evaluated. It abstracts spark serialization + * and deserialization from the caller codegen. + * @param leftMosaicGeometryRef + * The left/first mosaic geometry reference. + * @param rightMosaicGeometryRef + * The right/second mosaic geometry reference. + * @param ctx + * The codegen context. + * @return + * A tuple containing the code and the reference to the result. + */ + def geometryCodeGen(leftMosaicGeometryRef: String, rightMosaicGeometryRef: String, ctx: CodegenContext): (String, String) + + override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 2, expressionConfig) + + override def withNewChildrenInternal( + newFirst: Expression, + newSecond: Expression + ): Expression = makeCopy(Array(newFirst, newSecond)) + + /** + * The actual codegen implementation. It abstracts spark serialization and + * deserialization from the caller codegen. The extending class does not + * need to override this method. + * + * @param ctx + * The codegen context. + * @param ev + * The expression code. + * @return + * The result of the expression. + */ + //noinspection DuplicatedCode + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = + nullSafeCodeGen( + ctx, + ev, + (leftEval, rightEval) => { + val (leftInCode, leftGeomInRef) = ConvertToCodeGen.readGeometryCode(ctx, leftEval, leftGeometryExpr.dataType, geometryAPI) + val (rightInCode, rightGeomInRef) = ConvertToCodeGen.readGeometryCode(ctx, rightEval, rightGeometryExpr.dataType, geometryAPI) + val leftMosaicGeomRef = mosaicGeometryRef(leftGeomInRef) + val rightMosaicGeomRef = mosaicGeometryRef(rightGeomInRef) + val (expressionCode, resultRef) = geometryCodeGen(leftMosaicGeomRef, rightMosaicGeomRef, ctx) + val (serialiseCode, serialisedRef) = serialiseCodegen(resultRef, returnsGeometry, leftGeometryExpr.dataType, ctx) + geometryAPI.codeGenTryWrap(s""" + |$leftInCode + |$rightInCode + |$expressionCode + |$serialiseCode + |${ev.value} = $serialisedRef; + |""".stripMargin) + } + ) + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/RequiresCRS.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/RequiresCRS.scala new file mode 100644 index 000000000..e064c490e --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/RequiresCRS.scala @@ -0,0 +1,24 @@ +package com.databricks.labs.mosaic.core.expressions.geometry + +import com.databricks.labs.mosaic.core.MosaicException +import com.databricks.labs.mosaic.core.codegen.format.GeometryFormat +import org.apache.spark.sql.types._ + +import scala.collection.immutable + +/** + * Trait for checking if the input geometry is in a supported CRS. + * Currently only supports GEOJSON. + */ +trait RequiresCRS { + + val encodings: immutable.Seq[String] = List("GEOJSON") + + def checkEncoding(dataType: DataType): Unit = { + val inputTypeEncoding = GeometryFormat.getDefaultFormat(dataType) + if (!encodings.contains(inputTypeEncoding)) { + throw MosaicException.GeometryEncodingNotSupported(encodings, inputTypeEncoding) + } + } + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector1ArgExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector1ArgExpression.scala new file mode 100644 index 000000000..97db51d02 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector1ArgExpression.scala @@ -0,0 +1,131 @@ +package com.databricks.labs.mosaic.core.expressions.geometry + +import com.databricks.labs.mosaic.core.codegen.format.ConvertToCodeGen +import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig} +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression, NullIntolerant} + +import scala.reflect.ClassTag + +/** + * Base class for all unary geometry expressions that require 1 additional + * argument. It provides the boilerplate for creating a function builder for a + * given expression. It minimises amount of code needed to create a new + * expression. The term unary refers to number of input geometries. By + * convention the number of arguments will be handled via number in the class + * name. + * + * @param geometryExpr + * The expression for the geometry. + * @param argExpr + * The expression for the argument. + * @param returnsGeometry + * Whether the expression returns a geometry or not. + * @param expressionConfig + * Additional arguments for the expression (expressionConfigs). + * @tparam T + * The type of the extending class. + */ +abstract class UnaryVector1ArgExpression[T <: Expression : ClassTag]( + geometryExpr: Expression, + argExpr: Expression, + returnsGeometry: Boolean, + expressionConfig: MosaicExpressionConfig + ) extends BinaryExpression + with VectorExpression + with NullIntolerant + with Serializable { + + override def left: Expression = geometryExpr + + override def right: Expression = argExpr + + override def geometryAPI: GeometryAPI = expressionConfig.getGeometryAPI() + + /** + * The function to be overriden by the extending class. It is called when + * the expression is evaluated. It provides the vector geometry to the + * expression. It abstracts spark serialization from the caller. + * + * @param geometry + * The geometry. + * @param arg + * The argument. + * @return + * A result of the expression. + */ + def geometryTransform(geometry: MosaicGeometry, arg: Any): Any + + /** + * Evaluation of the expression. It evaluates the geometry and deserialises + * the geometry. + * + * @param geometryRow + * The row containing the geometry. + * @return + * The result of the expression. + */ + //noinspection DuplicatedCode + override def nullSafeEval(geometryRow: Any, arg: Any): Any = { + val geometry = geometryAPI.valueToGeometry(geometryRow, geometryExpr.dataType) + val result = geometryTransform(geometry, arg) + serialise(result, returnsGeometry, geometryExpr.dataType) + } + + /** + * The function to be overriden by the extending class. It is called when + * the expression codegen is evaluated. It abstracts spark serialization + * and deserialization from the caller codegen. + * + * @param geometryRef + * The geometry reference. + * @param argRef + * The argument reference. + * @param ctx + * The codegen context. + * @return + * A tuple containing the code and the reference to the result. + */ + def geometryCodeGen(geometryRef: String, argRef: String, ctx: CodegenContext): (String, String) + + override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 2, expressionConfig) + + override def withNewChildrenInternal( + newFirst: Expression, + newSecond: Expression + ): Expression = makeCopy(Array(newFirst, newSecond)) + + /** + * The actual codegen implementation. It abstracts spark serialization and + * deserialization from the caller codegen. The extending class does not + * need to override this method. + * + * @param ctx + * The codegen context. + * @param ev + * The expression code. + * @return + * The result of the expression. + */ + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = + nullSafeCodeGen( + ctx, + ev, + (leftEval, rightEval) => { + val (inCode, geomInRef) = ConvertToCodeGen.readGeometryCode(ctx, leftEval, geometryExpr.dataType, geometryAPI) + val mosaicGeomRef = mosaicGeometryRef(geomInRef) + val (expressionCode, resultRef) = geometryCodeGen(mosaicGeomRef, rightEval, ctx) + val (serialiseCode, serialisedRef) = serialiseCodegen(resultRef, returnsGeometry, geometryExpr.dataType, ctx) + geometryAPI.codeGenTryWrap( + s""" + |$inCode + |$expressionCode + |$serialiseCode + |${ev.value} = $serialisedRef; + |""".stripMargin) + } + ) + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector2ArgExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector2ArgExpression.scala new file mode 100644 index 000000000..25a1e6643 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector2ArgExpression.scala @@ -0,0 +1,141 @@ +package com.databricks.labs.mosaic.core.expressions.geometry + +import com.databricks.labs.mosaic.core.codegen.format.ConvertToCodeGen +import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig} +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant, TernaryExpression} + +import scala.reflect.ClassTag + +/** + * Base class for all unary geometry expressions that require 2 additional + * argument. It provides the boilerplate for creating a function builder for a + * given expression. It minimises amount of code needed to create a new + * expression. The term unary refers to number of input geometries. By + * convention the number of arguments will be handled via number in the class + * name. + * + * @param geometryExpr + * The expression for the geometry. + * @param arg1Expr + * The expression for the first argument. + * @param arg2Expr + * The expression for the second argument. + * @param returnsGeometry + * Whether the expression returns a geometry or not. + * @param expressionConfig + * Additional arguments for the expression (expressionConfigs). + * @tparam T + * The type of the extending class. + */ +abstract class UnaryVector2ArgExpression[T <: Expression: ClassTag]( + geometryExpr: Expression, + arg1Expr: Expression, + arg2Expr: Expression, + returnsGeometry: Boolean, + expressionConfig: MosaicExpressionConfig +) extends TernaryExpression + with VectorExpression + with NullIntolerant + with Serializable { + + override def first: Expression = geometryExpr + + override def second: Expression = arg1Expr + + override def third: Expression = arg2Expr + + override def geometryAPI: GeometryAPI = expressionConfig.getGeometryAPI() + + /** + * The function to be overriden by the extending class. It is called when + * the expression is evaluated. It provides the vector geometry to the + * expression. It abstracts spark serialization from the caller. + * @param geometry + * The geometry. + * @param arg1 + * The first argument. + * @param arg2 + * The second argument. + * @return + * A result of the expression. + */ + def geometryTransform(geometry: MosaicGeometry, arg1: Any, arg2: Any): Any + + /** + * Evaluation of the expression. It evaluates the geometry and deserialises + * the geometry. + * @param geometryRow + * The row containing the geometry. + * @param arg1 + * The first argument. + * @param arg2 + * The second argument. + * @return + * The result of the expression. + */ + override def nullSafeEval(geometryRow: Any, arg1: Any, arg2: Any): Any = { + val geometry = geometryAPI.valueToGeometry(geometryRow, geometryExpr.dataType) + val result = geometryTransform(geometry, arg1, arg2) + serialise(result, returnsGeometry, geometryExpr.dataType) + } + + /** + * The function to be overriden by the extending class. It is called when + * the expression codegen is evaluated. It abstracts spark serialization + * and deserialization from the caller codegen. + * @param mosaicGeometryRef + * The reference to mosaic geometry. + * @param arg1Ref + * The first argument reference. + * @param arg2Ref + * The second argument reference. + * @param ctx + * The codegen context. + * @return + * A tuple containing the code and the reference to the result. + */ + def geometryCodeGen(mosaicGeometryRef: String, arg1Ref: String, arg2Ref: String, ctx: CodegenContext): (String, String) + + override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 3, expressionConfig) + + override def withNewChildrenInternal( + newFirst: Expression, + newSecond: Expression, + newThird: Expression + ): Expression = makeCopy(Array(newFirst, newSecond, newThird)) + + /** + * The actual codegen implementation. It abstracts spark serialization and + * deserialization from the caller codegen. The extending class does not + * need to override this method. + * + * @param ctx + * The codegen context. + * @param ev + * The expression code. + * @return + * The result of the expression. + */ + //noinspection DuplicatedCode + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = + nullSafeCodeGen( + ctx, + ev, + (geomEval, arg1Eval, arg2Eval) => { + val (inCode, geomInRef) = ConvertToCodeGen.readGeometryCode(ctx, geomEval, geometryExpr.dataType, geometryAPI) + val mosaicGeomRef = mosaicGeometryRef(geomInRef) + val (expressionCode, resultRef) = geometryCodeGen(mosaicGeomRef, arg1Eval, arg2Eval, ctx) + val (serialiseCode, serialisedRef) = serialiseCodegen(resultRef, returnsGeometry, geometryExpr.dataType, ctx) + geometryAPI.codeGenTryWrap(s""" + |$inCode + |$expressionCode + |$serialiseCode + |${ev.value} = $serialisedRef; + |""".stripMargin) + } + ) + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVectorExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVectorExpression.scala new file mode 100644 index 000000000..09f3816c6 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVectorExpression.scala @@ -0,0 +1,115 @@ +package com.databricks.labs.mosaic.core.expressions.geometry + +import com.databricks.labs.mosaic.core.codegen.format.ConvertToCodeGen +import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig} +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant, UnaryExpression} + +import scala.reflect.ClassTag + +/** + * Base class for all unary geometry expressions. It provides the boilerplate + * for creating a function builder for a given expression. It minimises amount + * of code needed to create a new expression. + * + * @param geometryExpr + * The expression for the geometry. + * @param returnsGeometry + * Whether the expression returns a geometry or not. + * @param expressionConfig + * Additional arguments for the expression (expressionConfigs). + * @tparam T + * The type of the extending class. + */ +abstract class UnaryVectorExpression[T <: Expression: ClassTag]( + geometryExpr: Expression, + returnsGeometry: Boolean, + expressionConfig: MosaicExpressionConfig +) extends UnaryExpression + with VectorExpression + with NullIntolerant + with Serializable { + + override def child: Expression = geometryExpr + + override def geometryAPI: GeometryAPI = expressionConfig.getGeometryAPI() + + /** + * The function to be overriden by the extending class. It is called when + * the expression is evaluated. It provides the vector geometry to the + * expression. It abstracts spark serialization from the caller. + * @param geometry + * The geometry. + * @return + * A result of the expression. + */ + def geometryTransform(geometry: MosaicGeometry): Any + + /** + * Evaluation of the expression. It evaluates the geometry and deserialises + * the geometry. + * @param geometryRow + * The row containing the geometry. + * + * @return + * The result of the expression. + */ + override def nullSafeEval(geometryRow: Any): Any = { + val geometry = geometryAPI.valueToGeometry(geometryRow, geometryExpr.dataType) + val result = geometryTransform(geometry) + serialise(result, returnsGeometry, geometryExpr.dataType) + } + + /** + * The function to be overriden by the extending class. It is called when + * the expression codegen is evaluated. It abstracts spark serialization + * and deserialization from the caller codegen. + * @param mosaicGeometryRef + * The reference to mosaic geometry. + * @param ctx + * The codegen context. + * @return + * A tuple containing the code and the reference to the result. + */ + def geometryCodeGen(mosaicGeometryRef: String, ctx: CodegenContext): (String, String) + + override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 1, expressionConfig) + + override def withNewChildInternal( + newFirst: Expression + ): Expression = makeCopy(Array(newFirst)) + + /** + * The actual codegen implementation. It abstracts spark serialization and + * deserialization from the caller codegen. The extending class does not + * need to override this method. + * + * @param ctx + * The codegen context. + * @param ev + * The expression code. + * @return + * The result of the expression. + */ + //noinspection DuplicatedCode + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = + nullSafeCodeGen( + ctx, + ev, + eval => { + val (inCode, geomInRef) = ConvertToCodeGen.readGeometryCode(ctx, eval, geometryExpr.dataType, geometryAPI) + val mosaicGeomRef = mosaicGeometryRef(geomInRef) + val (expressionCode, resultRef) = geometryCodeGen(mosaicGeomRef, ctx) + val (serialiseCode, serialisedRef) = serialiseCodegen(resultRef, returnsGeometry, geometryExpr.dataType, ctx) + geometryAPI.codeGenTryWrap(s""" + |$inCode + |$expressionCode + |$serialiseCode + |${ev.value} = $serialisedRef; + |""".stripMargin) + } + ) + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/VectorExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/VectorExpression.scala new file mode 100644 index 000000000..c11a03015 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/geometry/VectorExpression.scala @@ -0,0 +1,95 @@ +package com.databricks.labs.mosaic.core.expressions.geometry + +import com.databricks.labs.mosaic.core.codegen.format.ConvertToCodeGen +import com.databricks.labs.mosaic.core.crs.CRSBoundsProvider +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext +import org.apache.spark.sql.types.DataType + +/** + * Base class for all vector expressions. It provides the boilerplate for + * creating a function builder for a given expression. It minimises amount of + * code needed to create a new expression. + */ +trait VectorExpression { + + def geometryAPI: GeometryAPI + + def mosaicGeomClass: String = geometryAPI.mosaicGeometryClass + + def geomClass: String = geometryAPI.geometryClass + + def CRSBoundsProviderClass: String = classOf[CRSBoundsProvider].getName + + def geometryAPIClass: String = classOf[GeometryAPI].getName + + /** + * Generic serialisation method for the expression result. It serialises + * the geometry if the expression returns a geometry. It passes the result + * through if the expression returns a non-geometry. + * + * @param result + * The result of the expression. + * @param returnsGeometry + * Whether the expression returns a geometry. + * @param dataType + * The data type of the result. + * @return + * The serialised result. + */ + def serialise(result: Any, returnsGeometry: Boolean, dataType: DataType): Any = { + if (returnsGeometry) { + geometryAPI.serialize(result.asInstanceOf[MosaicGeometry], dataType) + } else { + result + } + } + + /** + * Generic serialisation codegen method for the expression. It provide + * serialisation codegen for the geometry if the expression returns a + * geometry. It yields empty codegen if the expression returns a + * non-geometry. + * + * @param resultRef + * The result of the expression. + * @param returnsGeometry + * Whether the expression returns a geometry. + * @param dataType + * The data type of the result. + * @param ctx + * The codegen context. + * @return + * The serialised result. + */ + def serialiseCodegen(resultRef: String, returnsGeometry: Boolean, dataType: DataType, ctx: CodegenContext): (String, String) = { + if (returnsGeometry) { + val baseGeometryRef = ctx.freshName("baseGeometry") + val (code, outputRef) = ConvertToCodeGen.writeGeometryCode(ctx, baseGeometryRef, dataType, geometryAPI) + ( + s""" + |$geomClass $baseGeometryRef = $resultRef.getGeom(); + |$code + |""".stripMargin, + outputRef + ) + } else { + ("", resultRef) // noop code + } + } + + /** + * Simplifies the creation of a geometry reference for the expression + * codegen + * + * @param geometryRef + * The geometry variable reference. + * @return + * The mosaic geometry instance in codegen. + */ + def mosaicGeometryRef(geometryRef: String): String = { + s"${geometryAPI.mosaicGeometryClass}.apply($geometryRef)" + } + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster1ArgExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster1ArgExpression.scala new file mode 100644 index 000000000..e3bc9ba9b --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster1ArgExpression.scala @@ -0,0 +1,96 @@ +package com.databricks.labs.mosaic.core.expressions.raster + +import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig} +import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI} +import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression, NullIntolerant} +import org.apache.spark.sql.types.DataType +import org.apache.spark.unsafe.types.UTF8String + +import scala.reflect.ClassTag +import scala.util.Try + +/** + * Base class for all raster expressions that take two arguments. It provides + * the boilerplate code needed to create a function builder for a given + * expression. It minimises amount of code needed to create a new expression. + * + * @param pathExpr + * The expression for the raster path. + * @param arg1Expr + * The expression for the first argument. + * @param outputType + * The output type of the result. + * @param expressionConfig + * Additional arguments for the expression (expressionConfigs). + * @tparam T + * The type of the extending class. + */ +abstract class Raster1ArgExpression[T <: Expression : ClassTag]( + pathExpr: Expression, + arg1Expr: Expression, + outputType: DataType, + expressionConfig: MosaicExpressionConfig + ) extends BinaryExpression + with NullIntolerant + with Serializable { + + /** + * The raster API to be used. Enable the raster so that subclasses dont + * need to worry about this. + */ + protected val rasterAPI: RasterAPI = expressionConfig.getRasterAPI() + Try { + rasterAPI.enable() + } + + override def left: Expression = pathExpr + + override def right: Expression = arg1Expr + + /** Output Data Type */ + override def dataType: DataType = outputType + + /** + * The function to be overriden by the extending class. It is called when + * the expression is evaluated. It provides the raster and the arguments to + * the expression. It abstracts spark serialization from the caller. + * + * @param raster + * The raster to be used. + * @param arg1 + * The first argument. + * @return + * A result of the expression. + */ + def rasterTransform(raster: MosaicRaster, arg1: Any): Any + + /** + * Evaluation of the expression. It evaluates the raster path and the loads + * the raster from the path. It handles the clean up of the raster before + * returning the results. + * + * @param inputPath + * The path to the raster. It is a UTF8String. + * @param arg1 + * The first argument. + * @return + * The result of the expression. + */ + override def nullSafeEval(inputPath: Any, arg1: Any): Any = { + val path = inputPath.asInstanceOf[UTF8String].toString + + val raster = rasterAPI.raster(path) + val result = rasterTransform(raster, arg1) + + raster.cleanUp() + result + } + + override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 2, expressionConfig) + + override def withNewChildrenInternal( + newFirst: Expression, + newArg1: Expression + ): Expression = makeCopy(Array(newFirst, newArg1)) + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster2ArgExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster2ArgExpression.scala new file mode 100644 index 000000000..5aa03ed07 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster2ArgExpression.scala @@ -0,0 +1,106 @@ +package com.databricks.labs.mosaic.core.expressions.raster + +import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig} +import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI} +import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant, TernaryExpression} +import org.apache.spark.sql.types.DataType +import org.apache.spark.unsafe.types.UTF8String + +import scala.reflect.ClassTag +import scala.util.Try + +/** + * Base class for all raster expressions that take two arguments. It provides + * the boilerplate code needed to create a function builder for a given + * expression. It minimises amount of code needed to create a new expression. + * + * @param pathExpr + * The expression for the raster path. + * @param arg1Expr + * The expression for the first argument. + * @param arg2Expr + * The expression for the second argument. + * @param outputType + * The output type of the result. + * @param expressionConfig + * Additional arguments for the expression (expressionConfigs). + * @tparam T + * The type of the extending class. + */ +abstract class Raster2ArgExpression[T <: Expression : ClassTag]( + pathExpr: Expression, + arg1Expr: Expression, + arg2Expr: Expression, + outputType: DataType, + expressionConfig: MosaicExpressionConfig + ) extends TernaryExpression + with NullIntolerant + with Serializable { + + /** + * The raster API to be used. Enable the raster so that subclasses dont + * need to worry about this. + */ + protected val rasterAPI: RasterAPI = expressionConfig.getRasterAPI() + Try { + rasterAPI.enable() + } + + override def first: Expression = pathExpr + + override def second: Expression = arg1Expr + + override def third: Expression = arg2Expr + + /** Output Data Type */ + override def dataType: DataType = outputType + + /** + * The function to be overriden by the extending class. It is called when + * the expression is evaluated. It provides the raster and the arguments to + * the expression. It abstracts spark serialization from the caller. + * + * @param raster + * The raster to be used. + * @param arg1 + * The first argument. + * @param arg2 + * The second argument. + * @return + * A result of the expression. + */ + def rasterTransform(raster: MosaicRaster, arg1: Any, arg2: Any): Any + + /** + * Evaluation of the expression. It evaluates the raster path and the loads + * the raster from the path. It handles the clean up of the raster before + * returning the results. + * + * @param inputPath + * The path to the raster. It is a UTF8String. + * @param arg1 + * The first argument. + * @param arg2 + * The second argument. + * @return + * The result of the expression. + */ + override def nullSafeEval(inputPath: Any, arg1: Any, arg2: Any): Any = { + val path = inputPath.asInstanceOf[UTF8String].toString + + val raster = rasterAPI.raster(path) + val result = rasterTransform(raster, arg1, arg2) + + raster.cleanUp() + result + } + + override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 3, expressionConfig) + + override def withNewChildrenInternal( + newFirst: Expression, + newArg1: Expression, + newArg2: Expression + ): Expression = makeCopy(Array(newFirst, newArg1, newArg2)) + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterBandExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterBandExpression.scala new file mode 100644 index 000000000..432012a4a --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterBandExpression.scala @@ -0,0 +1,99 @@ +package com.databricks.labs.mosaic.core.expressions.raster + +import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig} +import com.databricks.labs.mosaic.core.raster.{MosaicRaster, MosaicRasterBand, RasterAPI} +import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression, NullIntolerant} +import org.apache.spark.sql.types.DataType +import org.apache.spark.unsafe.types.UTF8String + +import scala.reflect.ClassTag +import scala.util.Try + +/** + * Base class for all raster band expressions that take no arguments. It + * provides the boilerplate code needed to create a function builder for a + * given expression. It minimises amount of code needed to create a new + * expression. + * + * @param pathExpr + * The expression for the raster path. + * @param bandExpr + * The expression for the band index. + * @param outputType + * The output type of the result. + * @param expressionConfig + * Additional arguments for the expression (expressionConfigs). + * @tparam T + * The type of the extending class. + */ +abstract class RasterBandExpression[T <: Expression : ClassTag]( + pathExpr: Expression, + bandExpr: Expression, + outputType: DataType, + expressionConfig: MosaicExpressionConfig + ) extends BinaryExpression + with NullIntolerant + with Serializable { + + /** + * The raster API to be used. Enable the raster so that subclasses dont + * need to worry about this. + */ + protected val rasterAPI: RasterAPI = expressionConfig.getRasterAPI() + Try { + rasterAPI.enable() + } + + override def left: Expression = pathExpr + + override def right: Expression = bandExpr + + /** Output Data Type */ + override def dataType: DataType = outputType + + /** + * The function to be overriden by the extending class. It is called when + * the expression is evaluated. It provides the raster band to the + * expression. It abstracts spark serialization from the caller. + * + * @param raster + * The raster to be used. + * @param band + * The band to be used. + * @return + * The result of the expression. + */ + def bandTransform(raster: MosaicRaster, band: MosaicRasterBand): Any + + /** + * Evaluation of the expression. It evaluates the raster path and the loads + * the raster from the path. It evaluates the band index and loads the + * specified band. It handles the clean up of the raster before returning + * the results. + * + * @param inputPath + * The path to the raster. It is a UTF8String. + * @param inputBand + * The band index to be used. It is an Int. + * @return + * The result of the expression. + */ + override def nullSafeEval(inputPath: Any, inputBand: Any): Any = { + val path = inputPath.asInstanceOf[UTF8String].toString + val bandIndex = inputBand.asInstanceOf[Int] + + val raster = rasterAPI.raster(path) + val band = raster.getBand(bandIndex) + val result = bandTransform(raster, band) + + raster.cleanUp() + + result + } + + override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 2, expressionConfig) + + override def withNewChildrenInternal(newFirst: Expression, newSecond: Expression): Expression = + makeCopy(Array[AnyRef](newFirst, newSecond)) + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterExpression.scala new file mode 100644 index 000000000..edd1597f5 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterExpression.scala @@ -0,0 +1,84 @@ +package com.databricks.labs.mosaic.core.expressions.raster + +import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig} +import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI} +import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant, UnaryExpression} +import org.apache.spark.sql.types.DataType +import org.apache.spark.unsafe.types.UTF8String + +import scala.reflect.ClassTag +import scala.util.Try + +/** + * Base class for all raster expressions that take no arguments. It provides + * the boilerplate code needed to create a function builder for a given + * expression. It minimises amount of code needed to create a new expression. + * + * @param pathExpr + * The expression for the raster path. + * @param outputType + * The output type of the result. + * @param expressionConfig + * Additional arguments for the expression (expressionConfigs). + * @tparam T + * The type of the extending class. + */ +abstract class RasterExpression[T <: Expression : ClassTag]( + pathExpr: Expression, + outputType: DataType, + expressionConfig: MosaicExpressionConfig + ) extends UnaryExpression + with NullIntolerant + with Serializable { + + /** + * The raster API to be used. Enable the raster so that subclasses dont + * need to worry about this. + */ + protected val rasterAPI: RasterAPI = expressionConfig.getRasterAPI() + Try { + rasterAPI.enable() + } + + override def child: Expression = pathExpr + + /** Output Data Type */ + override def dataType: DataType = outputType + + /** + * The function to be overriden by the extending class. It is called when + * the expression is evaluated. It provides the raster to the expression. + * It abstracts spark serialization from the caller. + * + * @param raster + * The raster to be used. + * @return + * The result of the expression. + */ + def rasterTransform(raster: MosaicRaster): Any + + /** + * Evaluation of the expression. It evaluates the raster path and the loads + * the raster from the path. It handles the clean up of the raster before + * returning the results. + * + * @param inputPath + * The path to the raster. It is a UTF8String. + * @return + * The result of the expression. + */ + override def nullSafeEval(inputPath: Any): Any = { + val path = inputPath.asInstanceOf[UTF8String].toString + + val raster = rasterAPI.raster(path) + val result = rasterTransform(raster) + + raster.cleanUp() + result + } + + override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 1, expressionConfig) + + override def withNewChildInternal(newFirst: Expression): Expression = makeCopy(Array(newFirst)) + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterGeneratorExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterGeneratorExpression.scala new file mode 100644 index 000000000..e09afb19f --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterGeneratorExpression.scala @@ -0,0 +1,90 @@ +package com.databricks.labs.mosaic.core.expressions.raster + +import com.databricks.labs.mosaic.core.expressions.{GenericExpressionFactory, MosaicExpressionConfig} +import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{CollectionGenerator, Expression, NullIntolerant} +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String + +import scala.reflect.ClassTag +import scala.util.Try + +/** + * Base class for all raster generator expressions that take no arguments. It + * provides the boilerplate code needed to create a function builder for a + * given expression. It minimises amount of code needed to create a new + * expression. These expressions are used to generate a collection of new + * rasters based on the input raster. The new rasters are written in the + * checkpoint directory. The files are written as GeoTiffs. Subdatasets are not + * supported, please flatten beforehand. + * + * @param inPathExpr + * The expression for the raster path. + * @param expressionConfig + * Additional arguments for the expression (expressionConfigs). + * @tparam T + * The type of the extending class. + */ +abstract class RasterGeneratorExpression[T <: Expression : ClassTag]( + inPathExpr: Expression, + expressionConfig: MosaicExpressionConfig + ) extends CollectionGenerator + with NullIntolerant + with Serializable { + + val uuid: String = java.util.UUID.randomUUID().toString.replace("-", "_") + + /** + * The raster API to be used. Enable the raster so that subclasses dont + * need to worry about this. + */ + protected val rasterAPI: RasterAPI = expressionConfig.getRasterAPI() + Try { + rasterAPI.enable() + } + + override def position: Boolean = false + + override def inline: Boolean = false + + /** + * Generators expressions require an abstraction for element type. Always + * needs to be wrapped in a StructType. The actually type is that of the + * structs element. + */ + override def elementSchema: StructType = StructType(Array(StructField("path", StringType))) + + /** + * The function to be overriden by the extending class. It is called when + * the expression is evaluated. It provides the raster band to the + * expression. It abstracts spark serialization from the caller. + * + * @param raster + * The raster to be used. + * @return + * Sequence of subrasters = (id, reference to the input raster, extent of + * the output raster, unified mask for all bands). + */ + def rasterGenerator(raster: MosaicRaster): Seq[(Long, (Int, Int, Int, Int))] + + override def eval(input: InternalRow): TraversableOnce[InternalRow] = { + val inPath = inPathExpr.eval(input).asInstanceOf[UTF8String].toString + val checkpointPath = expressionConfig.getRasterCheckpoint + + val raster = rasterAPI.raster(inPath) + val result = rasterGenerator(raster) + + for ((id, extent) <- result) yield { + val outPath = raster.saveCheckpoint(uuid, id, extent, checkpointPath) + InternalRow.fromSeq(Seq(UTF8String.fromString(outPath))) + } + + } + + override def makeCopy(newArgs: Array[AnyRef]): Expression = + GenericExpressionFactory.makeCopyImpl[T](this, newArgs, children.length, expressionConfig) + + override def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = makeCopy(newChildren.toArray) + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterToGridExpression.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterToGridExpression.scala new file mode 100644 index 000000000..54045e977 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterToGridExpression.scala @@ -0,0 +1,119 @@ +package com.databricks.labs.mosaic.core.expressions.raster + +import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.core.raster.{MosaicRaster, MosaicRasterBand} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant} +import org.apache.spark.sql.catalyst.util.ArrayData +import org.apache.spark.sql.types.DataType + +import scala.reflect.ClassTag + +/** + * Base class for all raster to grid expressions that take no arguments. It + * provides the boilerplate code needed to create a function builder for a + * given expression. It minimises amount of code needed to create a new + * expression. These expressions project rasters to grid index system of + * Mosaic. All cells are projected to spatial coordinates and then to grid + * index system. The pixels are grouped by cell ids and then combined to form a + * grid -> value/measure collection per band of the raster. + * + * @param pathExpr + * The expression for the raster path. + * @param measureType + * The output type of the result. + * @param expressionConfig + * Additional arguments for the expression (expressionConfigs). + * @tparam T + * The type of the extending class. + */ +abstract class RasterToGridExpression[T <: Expression : ClassTag, P]( + pathExpr: Expression, + resolution: Expression, + measureType: DataType, + expressionConfig: MosaicExpressionConfig + ) extends Raster1ArgExpression[T](pathExpr, resolution, RasterToGridType(expressionConfig.getCellIdType, measureType), expressionConfig) + with NullIntolerant + with Serializable { + + /** The index system to be used. */ + val indexSystem: IndexSystem = expressionConfig.getIndexSystem() + val geometryAPI: GeometryAPI = expressionConfig.getGeometryAPI() + + /** + * It projects the pixels to the grid and groups by the results so that the + * result is a Sequence of (cellId, measure) of each band of the raster. It + * applies the values combiner on the measures of each cell. For no + * combine, use the identity function. + * + * @param raster + * The raster to be used. + * @return + * Sequence of (cellId, measure) of each band of the raster. + */ + override def rasterTransform(raster: MosaicRaster, arg1: Any): Any = { + val gt = raster.getGeoTransform + val resolution = arg1.asInstanceOf[Int] + val bandTransform = bandTransformer(_, resolution, gt) + val transformed = raster.transformBands(bandTransform) + serialize(transformed) + } + + /** + * The method to be overriden to specify how the pixel values are combined + * within a cell. + * + * @param values + * The values to be combined. + * @return + * The combined value/values. + */ + def valuesCombiner(values: Seq[Double]): P + + //noinspection ZeroIndexToHead + protected def pixelTransformer(gt: Seq[Double], resolution: Int)(x: Int, y: Int, value: Double): (Long, Double) = { + val offset = 0.5 // This centers the point to the pixel centroid + val xOffset = offset + x + val yOffset = offset + y + val xGeo = gt(0) + xOffset * gt(1) + yOffset * gt(2) + val yGeo = gt(3) + xOffset * gt(4) + yOffset * gt(5) + val cellID = indexSystem.pointToIndex(xGeo, yGeo, resolution) + (cellID, value) + } + + protected def bandTransformer(band: MosaicRasterBand, resolution: Int, gt: Seq[Double]): Map[Long, P] = { + val results = band.transformValues[(Long, Double)](pixelTransformer(gt, resolution), (0L, -1.0)) + results + // Filter out default cells. We don't want to return them since they are masked in original raster. + // We use 0L as a dummy cell ID for default cells. + .map(row => row.filter(_._1 != 0L)) + .filterNot(_.isEmpty) + .flatten + .groupBy(_._1) // Group by cell ID. + .mapValues(values => valuesCombiner(values.map(_._2))) // Apply combiner that is overridden in subclasses. + } + + /** + * Serializes the result of the raster transform to the desired output + * type. + * + * @param cellsWithMeasure + * The result of the raster transform to be serialized to spark internal + * types. + * @return + * The serialized result. + */ + private def serialize(cellsWithMeasure: Traversable[Traversable[(Any, P)]]) = { + val serialized = ArrayData.toArrayData( + cellsWithMeasure.map(result => + ArrayData.toArrayData( + result.map { case (cellID, value) => InternalRow.fromSeq(Seq(indexSystem.serializeCellId(cellID), value)) } + ) + ) + ) + serialized + } + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/raster.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/raster.scala new file mode 100644 index 000000000..1ea9452f9 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/expressions/raster/raster.scala @@ -0,0 +1,67 @@ +package com.databricks.labs.mosaic.core.expressions + +import org.apache.spark.sql.catalyst.util.{ArrayBasedMapBuilder, ArrayBasedMapData, ArrayData} +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String + +/** Utility methods for raster expressions. */ +package object raster { + + /** Datatype representing pixels in a raster. */ + val PixelCoordsType: DataType = StructType(Seq(StructField("x", IntegerType), StructField("y", IntegerType))) + + /** Datatype representing pixels in a raster. */ + val WorldCoordsType: DataType = StructType(Seq(StructField("x", DoubleType), StructField("y", DoubleType))) + + /** + * Datatype representing a raster projected to a grid. + * @param cellIDType + * The cell ID type of the index system. + * @param measureType + * The measure type of the resulting pixel value. + * + * @return + * The datatype to be used for serialization of the result of + * [[RasterToGridExpression]]. + */ + def RasterToGridType(cellIDType: DataType, measureType: DataType): DataType = { + ArrayType( + ArrayType( + StructType( + Seq(StructField("cellID", cellIDType), StructField("measure", measureType)) + ) + ) + ) + } + + /** + * Builds a spark map from a scala Map[String, String]. + * @param metaData + * The metadata to be used. + * @return + * Serialized map. + */ + def buildMapString(metaData: Map[String, String]): ArrayBasedMapData = { + val keys = ArrayData.toArrayData(metaData.keys.toArray[String].map(UTF8String.fromString)) + val values = ArrayData.toArrayData(metaData.values.toArray[String].map(UTF8String.fromString)) + val mapBuilder = new ArrayBasedMapBuilder(StringType, StringType) + mapBuilder.putAll(keys, values) + mapBuilder.build() + } + + /** + * Builds a spark map from a scala Map[String, Double]. + * @param metaData + * The metadata to be used. + * @return + * Serialized map. + */ + def buildMapDouble(metaData: Map[String, Double]): ArrayBasedMapData = { + val keys = ArrayData.toArrayData(metaData.keys.toArray[String].map(UTF8String.fromString)) + val values = ArrayData.toArrayData(metaData.values.toArray[Double]) + val mapBuilder = new ArrayBasedMapBuilder(StringType, DoubleType) + mapBuilder.putAll(keys, values) + mapBuilder.build() + } + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometry.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometry.scala new file mode 100644 index 000000000..056da865a --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometry.scala @@ -0,0 +1,327 @@ +package com.databricks.labs.mosaic.core.geometry + +import com.databricks.labs.mosaic.core.crs.CRSBoundsProvider +import com.databricks.labs.mosaic.core.geometry.api.GeometryWriter + +import java.util.Locale + +/** + * A trait that defines supported operations on geometries. + * Additional methods can be available in specific geometry types. + * Every geometry framework should implement this trait. + */ +trait MosaicGeometry extends GeometryWriter with Serializable { + + /** + * @return Returns the number of geometries in this geometry. + */ + def getNumGeometries: Int + + /** + * Returns the shells of this geometry as a sequence of sequences of points. + * Each sequence of points represents a shell. + * + * @return Returns the shells of this geometry as a sequence of sequences of points. + */ + def getShellPoints: Seq[Seq[MosaicPoint]] + + /** + * Returns the holes of this geometry as a sequence of sequences of sequences of points. + * Each sequence of points represents a hole. + * Each sequence of holes is related to a single shell. + * If the geometry has 3 shells, 2 holes in first shell, 0 holes in second shell and + * 1 hole in third shell, the returned sequence will be as follows: + * Seq( Seq( Seq( hole1, hole2 ) ), Seq( Seq() ), Seq( Seq( hole3 ) ) ) + * where hole1, hole2 and hole3 are sequences of points. + * + * @return Returns the holes of this geometry as a sequence of sequences of sequences of points. + */ + def getHolePoints: Seq[Seq[Seq[MosaicPoint]]] + + /** + * @return Returns the number of points in this geometry. + */ + def numPoints: Int + + /** + * Translates this geometry by given x and y distances. + * + * @param xd x distance + * @param yd y distance + * @return Returns the translated geometry. + */ + def translate(xd: Double, yd: Double): MosaicGeometry + + /** + * Scales this geometry by given x and y distances. + * + * @param xd x distance + * @param yd y distance + * @return Returns the scaled geometry. + */ + def scale(xd: Double, yd: Double): MosaicGeometry + + /** + * Rotates this geometry by given angle. + * + * @param td angle in degrees + * @return Returns the rotated geometry. + */ + def rotate(td: Double): MosaicGeometry + + /** + * @return Returns the length of this geometry. + */ + def getLength: Double + + /** + * @param geom2 geometry to calculate distance to. + * @return Returns the distance between this geometry and given geometry. + */ + def distance(geom2: MosaicGeometry): Double + + /** + * @param geom geometry to calculate difference to. + * @return Returns the difference between this geometry and given geometry. + */ + def difference(geom: MosaicGeometry): MosaicGeometry + + /** + * @return Returns the validity of this geometry. + */ + def isValid: Boolean + + /** + * @return Returns the geometry type of this geometry. + */ + def getGeometryType: String + + /** + * @return Returns the area of this geometry. + */ + def getArea: Double + + /** + * @return Returns the centroid of this geometry. + */ + def getCentroid: MosaicPoint + + /** + * @return Returns the flag indicating if this geometry is empty. + */ + def isEmpty: Boolean + + /** + * @return Returns the boundary of this geometry. The boundary is also a geometry. + */ + def getBoundary: MosaicGeometry + + /** + * @return Returns shells of the geometry as a sequence of LineStrings. Each LineString is a shell. + */ + def getShells: Seq[MosaicLineString] + + /** + * Returns holes of the geometry as a sequence of sequences of LineStrings. Each LineString is a hole. + * Each inner sequence corresponds to a single shell. + * + * @return Returns holes of the geometry as a sequence of sequences of LineStrings. + */ + def getHoles: Seq[Seq[MosaicLineString]] + + /** + * Applies given function to each point of this geometry. + * + * @param f function to apply + * @return Returns the geometry with points transformed by given function. + */ + def mapXY(f: (Double, Double) => (Double, Double)): MosaicGeometry + + /** + * @return Returns the boundary of this geometry. + */ + def boundary: MosaicGeometry + + /** + * Buffer this geometry by provided distance. + * + * @param distance distance to buffer + * @return Returns the buffer of this geometry. + */ + def buffer(distance: Double): MosaicGeometry + + /** + * Simplifies this geometry with given tolerance. + * + * @param tolerance tolerance to use + * @return Returns the simplified geometry. + */ + def simplify(tolerance: Double): MosaicGeometry + + /** + * Computes intersection of this geometry with given geometry. + * + * @param other geometry to intersect with + * @return Returns the intersection of this geometry with given geometry. + */ + def intersection(other: MosaicGeometry): MosaicGeometry + + /** + * Computes the intersects flag of this geometry with given geometry. + * + * @param other geometry to union with + * @return Returns the intersects flag of this geometry with given geometry. + */ + def intersects(other: MosaicGeometry): Boolean + + /** + * @return Returns the envelope of this geometry. + */ + def envelope: MosaicGeometry + + /** + * Computes union of this geometry with given geometry. + * + * @param other geometry to union with + * @return Returns the union of this geometry with given geometry. + */ + def union(other: MosaicGeometry): MosaicGeometry + + /** + * @return Returns the unary union of this geometry. + */ + def unaryUnion: MosaicGeometry + + /** + * Computes the contains flag of this geometry with given geometry. + * + * @param other geometry to union with + * @return Returns the contains flag of this geometry with given geometry. + */ + def contains(other: MosaicGeometry): Boolean + + /** + * Flattens this geometry into a collection of geometries. + * + * @return Returns the flattened geometry sequence. + */ + def flatten: Seq[MosaicGeometry] + + /** + * @return Returns the equality flag of this geometry with given geometry. + */ + def equals(other: MosaicGeometry): Boolean + + /** + * @return Returns the equality flag of this geometry with given geometry. + */ + def equals(other: java.lang.Object): Boolean + + /** + * @return Returns the equality flag of this geometry with given geometry. + */ + def equalsTopo(other: MosaicGeometry): Boolean + + /** + * @return Returns the hash code of this geometry. + */ + def hashCode: Int + + /** + * @return Returns the convex hull of this geometry. + */ + def convexHull: MosaicGeometry + + /** + * Computes MIN or MAX coordinate of this geometry. + * The coordinate is selected by given dimension. + * The function is selected by given func. + * + * @param dimension dimension to select coordinate from + * (X, Y or Z) + * (case insensitive) + * @param func function to select coordinate by + * (MIN or MAX) + * @return Returns the MIN or MAX coordinate of this geometry. + */ + def minMaxCoord(dimension: String, func: String): Double = { + val coordArray = this.getShellPoints.map(shell => { + val unitArray = dimension.toUpperCase(Locale.ROOT) match { + case "X" => shell.map(_.getX) + case "Y" => shell.map(_.getY) + case "Z" => shell.map(_.getZ) + } + func.toUpperCase(Locale.ROOT) match { + case "MIN" => unitArray.min + case "MAX" => unitArray.max + } + }) + func.toUpperCase(Locale.ROOT) match { + case "MIN" => coordArray.min + case "MAX" => coordArray.max + } + } + + /** + * Transforms this geometry to given CRS. + * + * @param sridTo target CRS + * @return Returns the transformed geometry. + */ + def transformCRSXY(sridTo: Int): MosaicGeometry + + /** + * Transforms this geometry from given CRS to given CRS. + * + * @param sridTo target CRS + * @param sridFrom source CRS + * @return Returns the transformed geometry. + */ + def transformCRSXY(sridTo: Int, sridFrom: Int): MosaicGeometry = { + transformCRSXY(sridTo, Some(sridFrom)) + } + + /** + * Transforms this geometry from given CRS to given CRS. + * + * @param sridTo target CRS + * @param sridFrom source CRS + * @return Returns the transformed geometry. + */ + def transformCRSXY(sridTo: Int, sridFrom: Option[Int]): MosaicGeometry + + /** + * @return Returns the spatial reference of this geometry. + */ + def getSpatialReference: Int + + /** + * Sets the spatial reference of this geometry. + * + * @param srid spatial reference to set + */ + def setSpatialReference(srid: Int): Unit + + /** + * Checks if this geometry has all valid coordinates in given CRS. + * + * @param crsBoundsProvider CRS bounds provider + * (to get bounds of given CRS) + * @param crsCode CRS code to check coordinates in (e.g. EPSG:4326) + * @param which which bounds to check (bounds or reprojected_bounds) + * @return Returns the geometry type of this geometry. + */ + def hasValidCoords(crsBoundsProvider: CRSBoundsProvider, crsCode: String, which: String): Boolean = { + val crsCodeIn = crsCode.split(":") + val crsBounds = which.toLowerCase(Locale.ROOT) match { + case "bounds" => crsBoundsProvider.bounds(crsCodeIn(0), crsCodeIn(1).toInt) + case "reprojected_bounds" => crsBoundsProvider.reprojectedBounds(crsCodeIn(0), crsCodeIn(1).toInt) + case _ => throw new Error("Only boundary and reprojected_boundary supported for which argument.") + } + (Seq(getShellPoints) ++ getHolePoints).flatten.flatten.forall(point => + crsBounds.getLowerX <= point.getX && point.getX <= crsBounds.getUpperX && + crsBounds.getLowerY <= point.getY && point.getY <= crsBounds.getUpperY + ) + } + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryCollection.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryCollection.scala new file mode 100644 index 000000000..58f905a97 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryCollection.scala @@ -0,0 +1,17 @@ +package com.databricks.labs.mosaic.core.geometry + +/** + * A trait that adds GeometryCollection functionality to MosaicGeometry. + */ +//noinspection DuplicatedCode +trait MosaicGeometryCollection extends MosaicGeometry { + + def asSeq: Seq[MosaicGeometry] + + override def flatten: Seq[MosaicGeometry] + + override def getShellPoints: Seq[Seq[MosaicPoint]] + + override def getHolePoints: Seq[Seq[Seq[MosaicPoint]]] + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicLineString.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicLineString.scala new file mode 100644 index 000000000..be93bac8e --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicLineString.scala @@ -0,0 +1,18 @@ +package com.databricks.labs.mosaic.core.geometry + +/** + * A trait that adds LineString functionality to MosaicGeometry. + */ +trait MosaicLineString extends MosaicGeometry { + + def asSeq: Seq[MosaicPoint] + + override def getHoles: Seq[Seq[MosaicLineString]] + + override def getShells: Seq[MosaicLineString] + + override def flatten: Seq[MosaicGeometry] + + override def getHolePoints: Seq[Seq[Seq[MosaicPoint]]] + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiLineString.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiLineString.scala new file mode 100644 index 000000000..cc1d80ea8 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiLineString.scala @@ -0,0 +1,18 @@ +package com.databricks.labs.mosaic.core.geometry + +/** + * A trait that adds MultiLineString functionality to MosaicGeometry. + */ +trait MosaicMultiLineString extends MosaicGeometry { + + def asSeq: Seq[MosaicLineString] + + override def getHolePoints: Seq[Seq[Seq[MosaicPoint]]] + + override def getShellPoints: Seq[Seq[MosaicPoint]] + + override def getHoles: Seq[Seq[MosaicLineString]] + + override def flatten: Seq[MosaicGeometry] + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiPoint.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiPoint.scala new file mode 100644 index 000000000..1334a3b60 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiPoint.scala @@ -0,0 +1,20 @@ +package com.databricks.labs.mosaic.core.geometry + +/** + * A trait that adds MultiPoint functionality to MosaicGeometry. + */ +trait MosaicMultiPoint extends MosaicGeometry { + + def asSeq: Seq[MosaicPoint] + + override def getHoles: Seq[Seq[MosaicLineString]] + + override def flatten: Seq[MosaicGeometry] + + override def getHolePoints: Seq[Seq[Seq[MosaicPoint]]] + + override def getShellPoints: Seq[Seq[MosaicPoint]] + + override def getShells: Seq[MosaicLineString] + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiPolygon.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiPolygon.scala new file mode 100644 index 000000000..d2f294334 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicMultiPolygon.scala @@ -0,0 +1,17 @@ +package com.databricks.labs.mosaic.core.geometry + +/** + * A trait that adds MultiPolygon functionality to MosaicGeometry. + */ +//noinspection DuplicatedCode +trait MosaicMultiPolygon extends MosaicGeometry { + + def asSeq: Seq[MosaicGeometry] + + override def flatten: Seq[MosaicGeometry] + + override def getShellPoints: Seq[Seq[MosaicPoint]] + + override def getHolePoints: Seq[Seq[Seq[MosaicPoint]]] + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicPoint.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicPoint.scala new file mode 100644 index 000000000..39af7b84d --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicPoint.scala @@ -0,0 +1,30 @@ +package com.databricks.labs.mosaic.core.geometry + +import com.databricks.labs.mosaic.core.types.Coordinates + +/** + * A trait that adds Point functionality to MosaicGeometry. + */ +trait MosaicPoint extends MosaicGeometry { + + def getX: Double + + def getY: Double + + def getZ: Double + + def geoCoord: Coordinates + + def asSeq: Seq[Double] + + override def flatten: Seq[MosaicGeometry] + + override def getShellPoints: Seq[Seq[MosaicPoint]] + + override def getHolePoints: Seq[Seq[Seq[MosaicPoint]]] + + override def getShells: Seq[MosaicLineString] + + override def getHoles: Seq[Seq[MosaicLineString]] + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicPolygon.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicPolygon.scala new file mode 100644 index 000000000..5f8cdeccd --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/MosaicPolygon.scala @@ -0,0 +1,16 @@ +package com.databricks.labs.mosaic.core.geometry + +/** + * A trait that adds Polygon functionality to MosaicGeometry. + */ +trait MosaicPolygon extends MosaicGeometry { + + override def getHolePoints: Seq[Seq[Seq[MosaicPoint]]] + + override def flatten: Seq[MosaicGeometry] + + override def getShellPoints: Seq[Seq[MosaicPoint]] + + def asSeq: Seq[MosaicLineString] + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPI.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPI.scala new file mode 100644 index 000000000..a90a8c6df --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPI.scala @@ -0,0 +1,140 @@ +package com.databricks.labs.mosaic.core.geometry.api + +import com.databricks.labs.mosaic.core.codegen.format.GeometryIOCodeGen +import com.databricks.labs.mosaic.core.geometry._ +import com.databricks.labs.mosaic.core.types._ +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String + +/** + * An abstract class that defines the API for the geometry frameworks. + * In order to integrate a new geometry framework, this class must be extended. + * The fully qualified name of the class must be added to the META-INF/services/com.databricks.labs.mosaic.core.geometry.api.GeometryAPI file. + * This is where [[com.databricks.labs.mosaic.core.GenericServiceFactory.GeometryAPIFactory]] will look for the available geometry frameworks. + * + * @param reader An instance of [[GeometryReader]]. + */ +abstract class GeometryAPI( + reader: GeometryReader + ) extends Serializable { + + def name: String + + /** + * Constructs an instance of [[MosaicPoint]] based on a collection of + * [[Coordinates]]. + * + * @param points An instance of [[Coordinates]]. + * @param geomType The geometry type. + * @return An instance of [[MosaicPoint]]. + */ + def pointsToGeometry(points: Seq[MosaicPoint], geomType: GeometryTypeEnum.Value): MosaicGeometry = reader.fromSeq(points, geomType) + + /** + * Constructs an instance of [[MosaicGeometry]] based on an instance of + * spark internal data. + * + * @param inputData + * An instance of [[InternalRow]]. + * @param dataType + * A data type of the geometry. + * @return + * An instance of [[MosaicGeometry]]. + */ + def rowToGeometry(inputData: InternalRow, dataType: DataType): MosaicGeometry = { + dataType match { + case _: BinaryType => reader.fromWKB(inputData.getBinary(0)) + case _: StringType => reader.fromWKT(inputData.getString(0)) + case _: HexType => reader.fromHEX(inputData.get(0, HexType).asInstanceOf[InternalRow].getString(0)) + case _: GeoJSONType => reader.fromJSON(inputData.get(0, GeoJSONType).asInstanceOf[InternalRow].getString(0)) + case _ => throw new Error(s"$dataType not supported.") + } + } + + /** + * Constructs an instance of [[MosaicGeometry]] based on Any instance + * coming from spark nullSafeEval method. + * + * @param inputData + * An instance of [[InternalRow]]. + * @param dataType + * A data type of the geometry. + * @return + * An instance of [[MosaicGeometry]]. + */ + def valueToGeometry(inputData: Any, dataType: DataType): MosaicGeometry = { + dataType match { + case _: BinaryType => reader.fromWKB(inputData.asInstanceOf[Array[Byte]]) + case _: StringType => reader.fromWKT(inputData.asInstanceOf[UTF8String].toString) + case _: HexType => reader.fromHEX(inputData.asInstanceOf[InternalRow].getString(0)) + case _: GeoJSONType => reader.fromJSON(inputData.asInstanceOf[InternalRow].getString(0)) + case _ => throw new Error(s"$dataType not supported.") + } + } + + /** + * Serializes an instance of [[MosaicGeometry]] to a spark internal data. + * The format is selected based on the data type. + * + * @param geometry An instance of [[MosaicGeometry]]. + * @param dataType A data type representing the format. + * @return A spark internal data. + */ + def serialize(geometry: MosaicGeometry, dataType: DataType): Any = { + dataType match { + case _: BinaryType => geometry.toWKB + case _: StringType => UTF8String.fromString(geometry.toWKT) + case _: HexType => InternalRow.fromSeq(Seq(UTF8String.fromString(geometry.toHEX))) + case _: GeoJSONType => InternalRow.fromSeq(Seq(UTF8String.fromString(geometry.toJSON))) + case _ => throw new Error(s"$dataType not supported.") + } + } + + /** + * Constructs an instance of [[MosaicGeometry]] based on a collection of [[Coordinates]]. + * + * @param point An instance of [[Coordinates]]. + * @return An instance of [[MosaicGeometry]]. + */ + def fromGeoCoord(point: Coordinates): MosaicPoint + + /** + * Constructs an instance of [[MosaicGeometry]] based on a collection of [[Double]]. + * + * @param coords A collection of [[Double]]. + * @return An instance of [[MosaicGeometry]]. + */ + def fromCoords(coords: Seq[Double]): MosaicPoint + + /** + * Accessor for the [[GeometryIOCodeGen]]. + * + * @return An instance of [[GeometryIOCodeGen]]. + */ + def ioCodeGen: GeometryIOCodeGen + + /** + * Generates a try catch block around the code if required by the geometry framework. + * Not all geometry frameworks require this, so it is up to the implementation to decide. + * + * @param code The code to wrap. + * @return The wrapped code. + */ + def codeGenTryWrap(code: String): String + + /** + * The fully qualified class name of the geometry. + * + * @return The class name. + */ + def geometryClass: String + + /** + * The fully qualified class name of the mosaic geometry. + * + * @return The class name. + */ + def mosaicGeometryClass: String + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryReader.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryReader.scala new file mode 100644 index 000000000..9157faf70 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryReader.scala @@ -0,0 +1,24 @@ +package com.databricks.labs.mosaic.core.geometry.api + +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.types.GeometryTypeEnum + +/** + * A trait that defines the methods for reading geometry data. + * If a new format requires support, fromFormat method should be added to this trait. + */ +trait GeometryReader { + + val defaultSpatialReferenceId: Int = 4326 + + def fromWKB(wkb: Array[Byte]): MosaicGeometry + + def fromWKT(wkt: String): MosaicGeometry + + def fromJSON(geoJson: String): MosaicGeometry + + def fromHEX(hex: String): MosaicGeometry + + def fromSeq[T <: MosaicGeometry](geomSeq: Seq[T], geomType: GeometryTypeEnum.Value): MosaicGeometry + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryWriter.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryWriter.scala new file mode 100644 index 000000000..95f1b6244 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryWriter.scala @@ -0,0 +1,17 @@ +package com.databricks.labs.mosaic.core.geometry.api + +/** + * A trait that defines the methods for writing geometry data. + * If a new format requires support, toFormat method should be added to this trait. + */ +trait GeometryWriter { + + def toWKB: Array[Byte] + + def toWKT: String + + def toJSON: String + + def toHEX: String + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/index/IndexSystem.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/index/IndexSystem.scala new file mode 100644 index 000000000..1c7d8b83d --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/index/IndexSystem.scala @@ -0,0 +1,385 @@ +package com.databricks.labs.mosaic.core.index + +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.types.GeometryTypeEnum.{GEOMETRYCOLLECTION, LINESTRING, MULTILINESTRING, MULTIPOINT, MULTIPOLYGON, POINT, POLYGON} +import com.databricks.labs.mosaic.core.types.{Coordinates, GeometryTypeEnum, MosaicChip} +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String + +/** + * Defines the API that all index systems need to respect for Mosaic to support + * them. + */ +abstract class IndexSystem(var cellIdType: DataType) extends Serializable { + + /** + * Computes the distance between two cell IDs in the given index system. + * + * @param cellId Cell ID 1 + * @param cellId2 Cell ID 2 + * @return Distance between the two cell IDs measured in the index system's unit (number of cells). + */ + def distance(cellId: Long, cellId2: Long): Long + + /** + * @return Returns the data type of the cell IDs. + */ + def getCellIdDataType: DataType = cellIdType + + /** + * Sets the data type of the cell IDs. + * + * @param dataType Data type of the cell IDs. + */ + def setCellIdDataType(dataType: DataType): Unit = { + cellIdType = dataType + } + + /** + * Used for index systems that have human-readable resolutions formats. + * + * @return Returns the resolution as a string. + */ + def getResolutionStr(resolution: Int): String + + /** + * Converts a cell ID to a human-readable string. + * + * @return Returns the cell ID as a string. + */ + def formatCellId(cellId: Any, dt: DataType): Any = + (dt, cellId) match { + case (LongType, _: Long) => cellId + case (LongType, cid: String) => parse(cid) + case (LongType, cid: UTF8String) => parse(cid.toString) + case (StringType, cid: Long) => format(cid) + case (StringType, cid: UTF8String) => cid.toString + case (StringType, _: String) => cellId + case _ => throw new Error("Cell ID data type not supported.") + } + + /** + * Converts a cell ID to a human-readable string. + * + * @param cellId Cell ID to be converted. + * @return Returns the cell ID as a string. + */ + def formatCellId(cellId: Any): Any = formatCellId(cellId, getCellIdDataType) + + /** + * Serializes a cell ID to a type that is supported by spark. + * + * @param cellId Cell ID to be serialized. + * @return Returns the serialized cell ID. + */ + def serializeCellId(cellId: Any): Any = + (getCellIdDataType, cellId) match { + case (LongType, _: Long) => cellId + case (LongType, cid: String) => parse(cid) + case (LongType, cid: UTF8String) => parse(cid.toString) + case (StringType, cid: Long) => UTF8String.fromString(format(cid)) + case (StringType, _: UTF8String) => cellId + case (StringType, cid: String) => UTF8String.fromString(cid) + case _ => throw new Error("Cell ID data type not supported.") + } + + /** + * Converts a cell ID to a human-readable string. + * + * @param id Cell ID to be converted. + * @return Returns the cell ID as a string. + */ + def format(id: Long): String + + /** + * Parses a cell ID provided in a human-readable string. + * + * @param id Cell ID to be parsed. + * @return Returns the cell ID as a Long. + */ + def parse(id: String): Long + + /** + * Get the k ring of indices around the provided index id. + * + * @param index + * Index ID to be used as a center of k ring. + * @param n + * Number of k rings to be generated around the input index. + * @return + * A collection of index IDs forming a k ring. + */ + def kRing(index: Long, n: Int): Seq[Long] + + def kRing(index: String, n: Int): Seq[String] = kRing(parse(index), n).map(format) + + /** + * Get the k loop (hollow ring) of indices around the provided index id. + * + * @param index + * Index ID to be used as a center of k loop. + * @param n + * Distance of k loop to be generated around the input index. + * @return + * A collection of index IDs forming a k loop. + */ + def kLoop(index: Long, n: Int): Seq[Long] + + def kLoop(index: String, n: Int): Seq[String] = kLoop(parse(index), n).map(format) + + /** + * Returns the set of supported resolutions for the given index system. + * This doesnt have to be a continuous set of values. Only values provided + * in this set are considered valid. + * + * @return + * A set of supported resolutions. + */ + def resolutions: Set[Int] + + /** + * Returns the name of the IndexSystem. + * + * @return + * IndexSystem name. + */ + def name: String + + /** + * Returns the resolution value based on the nullSafeEval method inputs of + * type Any. Each Index System should ensure that only valid values of + * resolution are accepted. + * + * @param res + * Any type input to be parsed into the Int representation of resolution. + * @return + * Int value representing the resolution. + */ + @throws[IllegalStateException] + def getResolution(res: Any): Int + + /** + * Computes the radius of minimum enclosing circle of the polygon + * corresponding to the centroid index of the provided geometry. + * + * @param geometry + * An instance of [[MosaicGeometry]] for which we are computing the + * optimal buffer radius. + * @param resolution + * A resolution to be used to get the centroid index geometry. + * @return + * An optimal radius to buffer the geometry in order to avoid blind spots + * when performing polyfill. + */ + def getBufferRadius(geometry: MosaicGeometry, resolution: Int, geometryAPI: GeometryAPI): Double + + /** + * Returns a set of indices that represent the input geometry. Depending on + * the index system this set may include only indices whose centroids fall + * inside the input geometry or any index that intersects the input + * geometry. When extending make sure which is the guaranteed behavior of + * the index system. + * + * @param geometry + * Input geometry to be represented. + * @param resolution + * A resolution of the indices. + * @return + * A set of indices representing the input geometry. + */ + def polyfill(geometry: MosaicGeometry, resolution: Int, geometryAPI: Option[GeometryAPI] = None): Seq[Long] + + /** + * @see + * [[IndexSystem.getBorderChips()]] + * @param geometry + * Input geometry whose border is being represented. + * @param borderIndices + * Indices corresponding to the border area of the input geometry. + * @return + * A border area representation via [[MosaicChip]] set. + */ + def getBorderChips( + geometry: MosaicGeometry, + borderIndices: Seq[Long], + keepCoreGeom: Boolean, + geometryAPI: GeometryAPI + ): Seq[MosaicChip] = { + val intersections = for (index <- borderIndices) yield { + val indexGeom = indexToGeometry(index, geometryAPI) + val intersect = geometry.intersection(indexGeom) + val coerced = coerceChipGeometry(intersect, index, geometryAPI) + val isCore = coerced.equals(indexGeom) + + val chipGeom = if (!isCore || keepCoreGeom) coerced else null + + MosaicChip(isCore = isCore, Left(index), chipGeom) + } + intersections.filterNot(_.isEmpty) + } + + /** + * Return a set of [[MosaicChip]] instances computed based on the core + * indices. Each index is converted to an instance of [[MosaicChip]]. These + * chips do not contain chip geometry since they are full contained by the + * geometry whose core they represent. + * + * @param coreIndices + * Indices corresponding to the core area of the input geometry. + * @return + * A core area representation via [[MosaicChip]] set. + */ + def getCoreChips(coreIndices: Seq[Long], keepCoreGeom: Boolean, geometryAPI: GeometryAPI): Seq[MosaicChip] = { + coreIndices.map(index => { + val indexGeom = if (keepCoreGeom) indexToGeometry(index, geometryAPI) else null + MosaicChip(isCore = true, Left(index), indexGeom) + }) + } + + /** + * Get the geometry corresponding to the index with the input id. + * + * @param index + * Id of the index whose geometry should be returned. + * @return + * An instance of [[MosaicGeometry]] corresponding to index. + */ + def indexToGeometry(index: Long, geometryAPI: GeometryAPI): MosaicGeometry + + /** + * Get the geometry corresponding to the index with the input id. + * + * @param index + * Id of the index whose geometry should be returned. + * @return + * An instance of [[MosaicGeometry]] corresponding to index. + */ + def indexToGeometry(index: String, geometryAPI: GeometryAPI): MosaicGeometry + + /** + * Get the index ID corresponding to the provided coordinates. + * + * @param lon + * X coordinate of the point. + * @param lat + * Y coordinate of the point. + * @param resolution + * Resolution of the index. + * @return + * Index ID in this index system. + */ + def pointToIndex(lon: Double, lat: Double, resolution: Int): Long + + /** + * Get the centroid of the index with the input cell id. + * + * @param index Cell ID in this index system. + * @return Centroid of the cell. + */ + def indexToCenter(index: Long): Coordinates + + def indexToCenter(index: String): Coordinates = indexToCenter(parse(index)) + + /** + * Get the boundary of the index with the input cell id. + * + * @param index Cell ID in this index system. + * @return Boundary of the cell. + */ + def indexToBoundary(index: Long): Seq[Coordinates] + + def indexToBoundary(index: String): Seq[Coordinates] = indexToBoundary(parse(index)) + + /** + * Computes the area of the cell with the input cell id. + * ASSUMPTION: index cells are convex. If index cells are not convex, you must override this method + * + * @param index Cell ID in this index system. + * @return Area of the cell. + */ + def area(index: Long): Double = { + // Haversine distance between two coordinates in radians + def haversine(coords1: Coordinates, coords2: Coordinates): Double = { + val c = math.Pi / 180 + + val th1 = c * coords1.lat + val th2 = c * coords2.lat + val dph = c * (coords1.lng - coords2.lng) + + val dz = math.sin(th1) - math.sin(th2) + val dx = math.cos(dph) * math.cos(th1) - math.cos(th2) + val dy = math.sin(dph) * math.cos(th1) + + math.asin(math.sqrt(dx * dx + dy * dy + dz * dz) / 2) * 2 + } + + def triangle_area(boundary_coords: Seq[Coordinates], center_coord: Coordinates): Double = { + val a = haversine(center_coord, boundary_coords.head) + val b = haversine(boundary_coords.head, boundary_coords(1)) + val c = haversine(boundary_coords(1), center_coord) + + val s = (a + b + c) / 2 + val t = math.sqrt( + math.tan(s / 2) + * math.tan((s - a) / 2) + * math.tan((s - b) / 2) + * math.tan((s - c) / 2) + ) + + val e = 4 * math.atan(t) + + val r = 6371.0088 + val area = e * r * r + area + } + + val center = indexToCenter(index) + val boundary = indexToBoundary(index) + val boundary_ring = boundary ++ Seq(boundary.head) + val res = boundary_ring.sliding(2).map(b => triangle_area(b, center)).sum + res + } + + def area(index: String): Double = area(parse(index)) + + /** + * Coerce the geometry with respect to the input cell. This is necessary + * if the intersection returned by the geometry framework is a geometry collection. + * This occurs when intersection takes into account the boundaries of geometries. + * Not all frameworks return a geometry collection. + * + * @param geom Geometry to coerce + * @param cell Cell to coerce to + * @param geometryAPI Geometry API to use + * @return Coerced geometry + */ + def coerceChipGeometry(geom: MosaicGeometry, cell: Long, geometryAPI: GeometryAPI): MosaicGeometry = { + val geomType = GeometryTypeEnum.fromString(geom.getGeometryType) + if (geomType == GEOMETRYCOLLECTION) { + // This case can occur if partial geometry is a geometry collection + // or if the intersection includes a part of the boundary of the cell + geom.difference(indexToGeometry(cell, geometryAPI).getBoundary) + } else { + geom + } + } + + // TODO: This logic makes no sense. + // This logic makes no sense. + // Only one type of geometries is kept + // This should be fixed + def coerceChipGeometry(geometries: Seq[MosaicGeometry]): Seq[MosaicGeometry] = { + val types = geometries.map(_.getGeometryType).map(GeometryTypeEnum.fromString) + if (types.contains(MULTIPOLYGON) || types.contains(POLYGON)) { + geometries.filter(g => Seq(POLYGON, MULTIPOLYGON).contains(GeometryTypeEnum.fromString(g.getGeometryType))) + } else if (types.contains(MULTILINESTRING) || types.contains(LINESTRING)) { + geometries.filter(g => Seq(MULTILINESTRING, LINESTRING).contains(GeometryTypeEnum.fromString(g.getGeometryType))) + } else if (types.contains(MULTIPOINT) || types.contains(POINT)) { + geometries.filter(g => Seq(MULTIPOINT, POINT).contains(GeometryTypeEnum.fromString(g.getGeometryType))) + } else { + Nil + } + } + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/package.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/package.scala new file mode 100644 index 000000000..a1705f109 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/package.scala @@ -0,0 +1,19 @@ +package com.databricks.labs.mosaic + +/** + * This package object contains all the constants used in the Mosaic library. + */ +package object core { + + val DATABRICKS_SQL_FUNCTIONS_MODULE = "com.databricks.sql.functions" + val SPARK_DATABRICKS_GEO_H3_ENABLED = "spark.databricks.geo.h3.enabled" + + val MOSAIC_INDEX_SYSTEM = "spark.databricks.labs.mosaic.index.system" + val MOSAIC_INDEX_SYSTEM_FACTORY: String = "spark.databricks.labs.mosaic.index.system.factory" + val MOSAIC_GEOMETRY_API = "spark.databricks.labs.mosaic.geometry.api" + val MOSAIC_RASTER_API = "spark.databricks.labs.mosaic.raster.api" + val MOSAIC_GDAL_NATIVE = "spark.databricks.labs.mosaic.gdal.native" + val MOSAIC_RASTER_CHECKPOINT = "spark.databricks.labs.mosaic.raster.checkpoint" + val MOSAIC_RASTER_CHECKPOINT_DEFAULT = "dbfs:/tmp/mosaic/raster/checkpoint" + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/MosaicRaster.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/MosaicRaster.scala new file mode 100644 index 000000000..fc740c2e6 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/MosaicRaster.scala @@ -0,0 +1,91 @@ +package com.databricks.labs.mosaic.core.raster + +/** + * A base API for managing raster data in Mosaic. Any raster abstraction should + * extend this trait. + * + * @param path + * The path to the raster file. This has to be a path that can be read by the + * worker nodes. + * @param memSize + * The amount of memory occupied by the file in bytes. + */ +abstract class MosaicRaster(path: String, memSize: Long) extends Serializable { + + + /** + * @return Returns the geo transform of the raster. The geo transform is a + * a Seq[Double] of [xOrigin, xPixelSize, xSkew, yOrigin, ySkew, yPixelSize]. + */ + def getGeoTransform: Seq[Double] + + /** + * Writes out the current raster to the given checkpoint path. The raster + * is written out as a GeoTiff. Only single subdataset is supported. Apply + * mask to all bands. Trim down the raster to the provided extent. + * + * @param stageId + * the UUI of the computation stage generating the raster. Used to avoid + * writing collisions. + * @param rasterId + * the UUID of the raster. Used to avoid writing collisions. + * @param extent + * The extent to trim the raster to. + * @param checkpointPath + * The path to write the raster to. + * @return + * Returns the path to the written raster. + */ + def saveCheckpoint(stageId: String, rasterId: Long, extent: (Int, Int, Int, Int), checkpointPath: String): String + + /** @return Returns the metadata of the raster file. */ + def metadata: Map[String, String] + + /** + * @return + * Returns the key->value pairs of subdataset->description for the + * raster. + */ + def subdatasets: Map[String, String] + + /** @return Returns the number of bands in the raster. */ + def numBands: Int + + /** @return Returns the SRID in the raster. */ + def SRID: Int + + /** @return Returns the proj4 projection string in the raster. */ + def proj4String: String + + /** @return Returns the x size of the raster. */ + def xSize: Int + + /** @return Returns the y size of the raster. */ + def ySize: Int + + /** @return Returns the bandId-th Band from the raster. */ + def getBand(bandId: Int): MosaicRasterBand + + /** @return Returns the extent(xmin, ymin, xmax, ymax) of the raster. */ + def extent: Seq[Double] + + /** Cleans up the raster driver and references. */ + def cleanUp(): Unit + + /** @return Returns the amount of memory occupied by the file in bytes. */ + def getMemSize: Long = memSize + + /** + * A template method for transforming the raster bands into new bands. Each + * band is transformed into a new band using the transform function. + * Override this method for tiling, clipping, warping, etc. type of + * expressions. + * + * @tparam T + * The type of the result from the transformation of a band. + * @param f + * The transform function. Will be applied on each band. + */ + def transformBands[T](f: MosaicRasterBand => T): Seq[T] + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterBand.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterBand.scala new file mode 100644 index 000000000..af8f204a0 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterBand.scala @@ -0,0 +1,132 @@ +package com.databricks.labs.mosaic.core.raster + +/** + * A base API for managing raster bands in Mosaic. Any raster band abstraction + * should extend this trait. + */ +trait MosaicRasterBand extends Serializable { + + /** @return Returns the bandId of the band. */ + def index: Int + + /** @return Returns the description of the band. */ + def description: String + + /** @return Returns the metadata of the band. */ + def metadata: Map[String, String] + + /** @return Returns the unit type of the band pixels. */ + def units: String + + /** @return Returns the data type (numeric) of the band pixels. */ + def dataType: Int + + /** @return Returns the x size of the band. */ + def xSize: Int + + /** @return Returns the y size of the band. */ + def ySize: Int + + /** @return Returns the minimum pixel value of the band. */ + def minPixelValue: Double + + /** @return Returns the maximum pixel value of the band. */ + def maxPixelValue: Double + + /** + * @return + * Returns the value used to represent transparent pixels of the band. + */ + def noDataValue: Double + + /** + * @return + * Returns the scale in which pixels are represented. It is the unit + * value of a pixel. If the pixel value is 5.1 and pixel scale is 10.0 + * then the actual pixel value is 51.0. + */ + def pixelValueScale: Double + + /** + * @return + * Returns the offset in which pixels are represented. It is the unit + * value of a pixel. If the pixel value is 5.1 and pixel offset is 10.0 + * then the actual pixel value is 15.1. + */ + def pixelValueOffset: Double + + /** + * @return + * Returns the pixel value with scale and offset applied. If the pixel + * value is 5.1 and pixel scale is 10.0 and pixel offset is 10.0 then the + * actual pixel value is 61.0. + */ + def pixelValueToUnitValue(pixelValue: Double): Double + + /** + * @return + * Returns the pixels of the raster as a 1D array. + */ + def values: Array[Double] = values(0, 0, xSize, ySize) + + /** + * @return + * Returns the pixels of the raster as a 1D array. + */ + def maskValues: Array[Double] = maskValues(0, 0, xSize, ySize) + + /** + * @param xOffset + * The x offset of the raster. The x offset is the number of pixels to + * skip from the left. 0 <= xOffset < xSize + * + * @param yOffset + * The y offset of the raster. The y offset is the number of pixels to + * skip from the top. 0 <= yOffset < ySize + * + * @param xSize + * The x size of the raster to be read. + * + * @param ySize + * The y size of the raster to be read. + * @return + * Returns the pixels of the raster as a 1D array with offset and size + * applied. + */ + def values(xOffset: Int, yOffset: Int, xSize: Int, ySize: Int): Array[Double] + + /** + * @param xOffset + * The x offset of the raster. The x offset is the number of pixels to + * skip from the left. 0 <= xOffset < xSize + * + * @param yOffset + * The y offset of the raster. The y offset is the number of pixels to + * skip from the top. 0 <= yOffset < ySize + * + * @param xSize + * The x size of the raster to be read. + * + * @param ySize + * The y size of the raster to be read. + * @return + * Returns the mask pixels of the raster as a 1D array with offset and size + * applied. + */ + def maskValues(xOffset: Int, yOffset: Int, xSize: Int, ySize: Int): Array[Double] + + /** + * Apply f to all pixels in the raster. Overridden in subclasses to define + * the behavior. + * @param f + * the function to apply to each pixel. + * @param default + * the default value to use if the pixel is noData. + * @tparam T + * the return type of the function. + * @return + * an array of the results of applying f to each pixel. + */ + def transformValues[T](f: (Int, Int, Double) => T, default: T = null): Seq[Seq[T]] + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/RasterAPI.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/RasterAPI.scala new file mode 100644 index 000000000..2c84a24cd --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/RasterAPI.scala @@ -0,0 +1,76 @@ +package com.databricks.labs.mosaic.core.raster + +/** + * A base trait for all Raster API's. + * @param reader + * The RasterReader to use for reading the raster. + */ +abstract class RasterAPI(reader: RasterReader) extends Serializable { + + /** + * This method should be called in every raster expression if the RasterAPI + * requires enablement on worker nodes. + */ + def enable(): Unit + + /** @return Returns the name of the raster API. */ + def name: String + + /** + * Reads a raster from the given path. + * + * @param path + * The path to the raster. This path has to be a path to a single raster. + * Rasters with subdatasets are supported. + * @return + * Returns a Raster object. + */ + def raster(path: String): MosaicRaster = reader.readRaster(path) + + /** + * Reads a raster from the given path. It extracts the specified band from + * the raster. + * + * @param path + * The path to the raster. This path has to be a path to a single raster. + * Rasters with subdatasets are supported. + * @param bandIndex + * The index of the band to read from the raster. + * @return + * Returns a Raster band object. + */ + def band(path: String, bandIndex: Int): MosaicRasterBand = reader.readBand(path, bandIndex) + + /** + * Converts raster x, y coordinates to lat, lon coordinates. + * @param gt + * Geo transform of the raster. + * @param x + * X coordinate of the raster. + * @param y + * Y coordinate of the raster. + * @return + * Returns a tuple of (lat, lon). + */ + def toWorldCoord(gt: Seq[Double], x: Int, y: Int): (Double, Double) = { + val (xGeo, yGeo) = reader.toWorldCoord(gt, x, y) + (xGeo, yGeo) + } + + /** + * Converts lat, lon coordinates to raster x, y coordinates. + * @param gt + * Geo transform of the raster. + * @param x + * Latitude of the raster. + * @param y + * Longitude of the raster. + * @return + * Returns a tuple of (xPixel, yPixel). + */ + def fromWorldCoord(gt: Seq[Double], x: Double, y: Double): (Int, Int) = { + val (xPixel, yPixel) = reader.fromWorldCoord(gt, x, y) + (xPixel, yPixel) + } + +} \ No newline at end of file diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/RasterReader.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/RasterReader.scala new file mode 100644 index 000000000..34d88fd22 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/raster/RasterReader.scala @@ -0,0 +1,78 @@ +package com.databricks.labs.mosaic.core.raster + +import org.apache.spark.internal.Logging + +/** + * RasterReader is a trait that defines the interface for reading raster data + * from a file system path. It is used by the RasterAPI to read raster and + * raster band data. + * @note + * For subdatasets the path should be the path to the subdataset and not to + * the file. + */ +trait RasterReader extends Logging { + + /** + * Reads a raster from a file system path. Reads a subdataset if the path + * is to a subdataset. + * + * @example + * Raster: path = "file:///path/to/file.tif" Subdataset: path = + * "file:///path/to/file.tif:subdataset" + * @param path + * The path to the raster file. + * @return + * A MosaicRaster object. + */ + def readRaster(path: String): MosaicRaster + + /** + * Reads a raster band from a file system path. Reads a subdataset band if + * the path is to a subdataset. + * @example + * Raster: path = "file:///path/to/file.tif" Subdataset: path = + * "file:///path/to/file.tif:subdataset" + * @param path + * The path to the raster file. + * + * @param bandIndex + * The band index to read. + * @return + * A MosaicRaster object. + */ + def readBand(path: String, bandIndex: Int): MosaicRasterBand + + /** + * Take a geo transform matrix and x and y coordinates of a pixel and + * returns the x and y coordinates in the projection of the raster. + * + * @param geoTransform + * The geo transform matrix of the raster. + * + * @param x + * The x coordinate of the pixel. + * @param y + * The y coordinate of the pixel. + * @return + * A tuple of doubles with the x and y coordinates in the projection of + * the raster. + */ + def toWorldCoord(geoTransform: Seq[Double], x: Int, y: Int): (Double, Double) + + /** + * Take a geo transform matrix and x and y coordinates of a point and + * returns the x and y coordinates of the raster pixel. + * + * @param geoTransform + * The geo transform matrix of the raster. + * + * @param x + * The x coordinate of the point. + * @param y + * The y coordinate of the point. + * @return + * A tuple of integers with the x and y coordinates of the raster pixel. + */ + def fromWorldCoord(geoTransform: Seq[Double], x: Double, y: Double): (Int, Int) + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/ChipType.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/ChipType.scala new file mode 100644 index 000000000..dc6788271 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/ChipType.scala @@ -0,0 +1,30 @@ +package com.databricks.labs.mosaic.core.types + +import org.apache.spark.sql.types._ + +/** + * Type definition for Chip. Chip is defined as (is_core: boolean, index_id: long, + * wkb: binary). + */ +class ChipType(fields: Array[StructField]) extends StructType(fields) { + + override def simpleString: String = "CHIP" + + override def typeName: String = "struct" + +} + +object ChipType { + + def apply(idType: DataType): ChipType = { + require(Seq(LongType, IntegerType, StringType).contains(idType)) + new ChipType( + Array( + StructField("is_core", BooleanType), + StructField("index_id", idType), + StructField("wkb", BinaryType) + ) + ) + } + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/Coordinates.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/Coordinates.scala new file mode 100644 index 000000000..6b9ead00a --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/Coordinates.scala @@ -0,0 +1,3 @@ +package com.databricks.labs.mosaic.core.types + +case class Coordinates(lat: Double, lng: Double) diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/GeoJSONType.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/GeoJSONType.scala new file mode 100644 index 000000000..387e946d0 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/GeoJSONType.scala @@ -0,0 +1,21 @@ +package com.databricks.labs.mosaic.core.types + +import org.apache.spark.sql.types._ + +/** + * Type definition for JSON encoding. JSON encoding is defined as (json: + * string). This abstraction over StringType is needed to ensure matching can + * distinguish between StringType (WKT) and JSONType (GEOJSON). + */ +class GeoJSONType() + extends StructType( + Array( + StructField("json", StringType) + ) + ) { + + override def simpleString: String = "GEOJSON" + + override def typeName: String = "struct" + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/GeometryTypeEnum.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/GeometryTypeEnum.scala new file mode 100644 index 000000000..19279cc33 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/GeometryTypeEnum.scala @@ -0,0 +1,57 @@ +package com.databricks.labs.mosaic.core.types + +import java.util.Locale +import scala.collection.immutable + +/** + * Enumeration of geometry types supported by Mosaic. + */ +object GeometryTypeEnum extends Enumeration { + + val POINT: GeometryTypeEnum.Value = Value(1, "POINT") + val MULTIPOINT: GeometryTypeEnum.Value = Value(2, "MULTIPOINT") + val LINESTRING: GeometryTypeEnum.Value = Value(3, "LINESTRING") + val MULTILINESTRING: GeometryTypeEnum.Value = Value(4, "MULTILINESTRING") + val POLYGON: GeometryTypeEnum.Value = Value(5, "POLYGON") + val MULTIPOLYGON: GeometryTypeEnum.Value = Value(6, "MULTIPOLYGON") + // coercion type JTS boundary returns LinearRing instead of LineString + val LINEARRING: GeometryTypeEnum.Value = Value(7, "LINEARRING") + val GEOMETRYCOLLECTION: GeometryTypeEnum.Value = Value(8, "GEOMETRYCOLLECTION") + + val pointGeometries: Seq[GeometryTypeEnum.Value] = List(this.POINT, this.MULTIPOINT) + val linestringGeometries: Seq[GeometryTypeEnum.Value] = List(this.LINESTRING, this.MULTILINESTRING) + val polygonGeometries: immutable.Seq[GeometryTypeEnum.Value] = List(this.POLYGON, this.MULTIPOLYGON) + + val singleGeometries: Seq[GeometryTypeEnum.Value] = List(this.POINT, this.LINESTRING, this.POLYGON) + val multipleGeometries: Seq[GeometryTypeEnum.Value] = List(this.MULTIPOINT, this.MULTILINESTRING, this.MULTIPOLYGON, this.GEOMETRYCOLLECTION) + + def fromString(value: String): GeometryTypeEnum.Value = + GeometryTypeEnum.values + .find(_.toString == value.toUpperCase(Locale.ROOT)) + .getOrElse( + throw new Error( + s"Invalid mode for geometry type: $value." + + s" Must be one of ${GeometryTypeEnum.values.mkString(",")}" + ) + ) + + def fromId(id: Int): GeometryTypeEnum.Value = + GeometryTypeEnum.values + .find(_.id == id) + .getOrElse(throw new Error(s"Invalid value for geometry type id: $id.")) + + def groupOf(enumerator: GeometryTypeEnum.Value): GeometryTypeEnum.Value = + enumerator match { + case g if pointGeometries.contains(g) => this.POINT + case g if linestringGeometries.contains(g) => this.LINESTRING + case g if polygonGeometries.contains(g) => this.POLYGON + case _ => this.GEOMETRYCOLLECTION + } + + def isFlat(enumerator: GeometryTypeEnum.Value): Boolean = + enumerator match { + case g if singleGeometries.contains(g) => true + case g if multipleGeometries.contains(g) => false + } + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/HexType.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/HexType.scala new file mode 100644 index 000000000..dfd9ebb8a --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/HexType.scala @@ -0,0 +1,21 @@ +package com.databricks.labs.mosaic.core.types + +import org.apache.spark.sql.types._ + +/** + * Type definition for Hex encoding. Hex encoding is defined as (hex: string). + * This abstraction over StringType is needed to ensure matching can + * distinguish between StringType (WKT) and HexType (HEX). + */ +class HexType() + extends StructType( + Array( + StructField("hex", StringType) + ) + ) { + + override def simpleString: String = "HEX" + + override def typeName: String = "struct" + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/MosaicChip.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/MosaicChip.scala new file mode 100644 index 000000000..6c7e773a9 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/MosaicChip.scala @@ -0,0 +1,73 @@ +package com.databricks.labs.mosaic.core.types + +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.index.IndexSystem +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.types.{LongType, StringType} +import org.apache.spark.unsafe.types.UTF8String + +/** + * A case class modeling an instance of a mosaic chip. A chip can belong to + * either core or border set. + * + * @param isCore + * Whether the chip belongs to the core set. + * @param index + * Index ID. + * @param geom + * Geometry instance if the chip is a border chip. + */ +case class MosaicChip(isCore: Boolean, index: Either[Long, String], geom: MosaicGeometry) { + + /** + * Indicates whether the chip is outside of the representation of the + * geometry it was generated to represent (ie false positive index). + */ + def isEmpty: Boolean = !isCore & Option(geom).forall(_.isEmpty) + + /** + * Formats the index ID as the data type supplied by the index system. + * + * @param indexSystem Index system to use for formatting. + * @return MosaicChip with formatted index ID. + */ + def formatCellId(indexSystem: IndexSystem): MosaicChip = { + (indexSystem.getCellIdDataType, index) match { + case (_: LongType, Left(_)) => this + case (_: StringType, Right(_)) => this + case (_: LongType, Right(value)) => this.copy(index = Left(indexSystem.parse(value))) + case (_: StringType, Left(value)) => this.copy(index = Right(indexSystem.format(value))) + case _ => throw new IllegalArgumentException("Invalid cell id data type") + } + } + + def cellIdAsLong(indexSystem: IndexSystem): Long = index match { + case Left(value) => value + case _ => indexSystem.parse(index.right.get) + } + + def cellIdAsStr(indexSystem: IndexSystem): String = index match { + case Right(value) => value + case _ => indexSystem.format(index.left.get) + } + + /** + * Serialise to spark internal representation. + * + * @return + * An instance of [[InternalRow]]. + */ + def serialize: InternalRow = { + if (index.isLeft) InternalRow.fromSeq(Seq(isCore, index.left.get, encodeGeom)) + else InternalRow.fromSeq(Seq(isCore, UTF8String.fromString(index.right.get), encodeGeom)) + } + + /** + * Encodes the chip geometry as WKB. + * + * @return + * An instance of [[Array]] of [[Byte]] representing WKB. + */ + private def encodeGeom: Array[Byte] = Option(geom).map(_.toWKB).orNull + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/MosaicType.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/MosaicType.scala new file mode 100644 index 000000000..a2ca70b8f --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/MosaicType.scala @@ -0,0 +1,25 @@ +package com.databricks.labs.mosaic.core.types + +import org.apache.spark.sql.types._ + +/** + * Type definition for MosaicType. MosaicType is defined as (chips: + * array[chip]). + */ +class MosaicType(fields: Array[StructField]) extends StructType(fields) { + + override def simpleString: String = "MOSAIC" + + override def typeName: String = "struct" + +} + +object MosaicType { + def apply(idType: DataType): StructType = { + new MosaicType( + Array( + StructField("chips", ArrayType(ChipType(idType))) + ) + ) + } +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/package.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/package.scala new file mode 100644 index 000000000..c35abdc38 --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/types/package.scala @@ -0,0 +1,14 @@ +package com.databricks.labs.mosaic.core + +import org.apache.spark.sql.types._ + +/** + * Contains definition of all Mosaic specific data types. It provides methods + * for type inference over geometry columns. + */ +package object types { + + val HexType: DataType = new HexType() + val GeoJSONType: DataType = new GeoJSONType() + +} diff --git a/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/util/ResourceUtils.scala b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/util/ResourceUtils.scala new file mode 100644 index 000000000..0425d8c6c --- /dev/null +++ b/mosaic-core/src/main/scala/com/databricks/labs/mosaic/core/util/ResourceUtils.scala @@ -0,0 +1,26 @@ +package com.databricks.labs.mosaic.core.util + +import java.io.BufferedInputStream +import scala.language.postfixOps + +/** + * Utility for reading resources from the classpath. + * This is required for [[com.databricks.labs.mosaic.core.GenericServiceFactory[_]] to work. + * All [[com.databricks.labs.mosaic.core.geometry.api.GeometryAPI]], [[com.databricks.labs.mosaic.core.index.IndexSystem]] + * and [[com.databricks.labs.mosaic.core.raster.RasterAPI]] implementations are provided via META-INF/services. + */ +object ResourceUtils { + + def readResourceBytes(name: String): Array[Byte] = { + val bis = new BufferedInputStream(getClass.getResourceAsStream(name)) + try Stream.continually(bis.read()).takeWhile(-1 !=).map(_.toByte).toArray + finally bis.close() + } + + def readResourceLines(name: String): Array[String] = { + val bytes = readResourceBytes(name) + val lines = new String(bytes).split("\n") + lines + } + +} diff --git a/mosaic-core/src/main/scala/org/apache/spark/sql/adapters/Column.scala b/mosaic-core/src/main/scala/org/apache/spark/sql/adapters/Column.scala new file mode 100644 index 000000000..85d5d8420 --- /dev/null +++ b/mosaic-core/src/main/scala/org/apache/spark/sql/adapters/Column.scala @@ -0,0 +1,10 @@ +package org.apache.spark.sql.adapters + +import org.apache.spark.sql.{Column => SparkColumn} +import org.apache.spark.sql.catalyst.expressions.Expression + +object Column { + def apply(expr: Expression): SparkColumn = { + new SparkColumn(expr) + } +} diff --git a/mosaic-core/src/main/scala/org/apache/spark/sql/adapters/DataFrameReader.scala b/mosaic-core/src/main/scala/org/apache/spark/sql/adapters/DataFrameReader.scala new file mode 100644 index 000000000..43287f267 --- /dev/null +++ b/mosaic-core/src/main/scala/org/apache/spark/sql/adapters/DataFrameReader.scala @@ -0,0 +1,5 @@ +package org.apache.spark.sql.adapters + +import org.apache.spark.sql.SparkSession + +class DataFrameReader(sparkSession: SparkSession) extends org.apache.spark.sql.DataFrameReader(sparkSession) {} diff --git a/mosaic-core/src/test/resources/CRSBounds.csv b/mosaic-core/src/test/resources/CRSBounds.csv new file mode 100644 index 000000000..a628e3c74 --- /dev/null +++ b/mosaic-core/src/test/resources/CRSBounds.csv @@ -0,0 +1,2 @@ +name,bound_xmin,bound_ymin,bound_xmax,bound_ymax,reprojected_bound_xmin,reprojected_bound_ymin,reprojected_bound_xmax,reprojected_bound_ymax +EPSG:4326: WGS 84,-180.0000,-90.0000,180.0000,90.0000,-180.0000,-90.0000,180.0000,90.0000 \ No newline at end of file diff --git a/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.GeometryAPIRegister b/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.GeometryAPIRegister new file mode 100644 index 000000000..85115ba12 --- /dev/null +++ b/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.GeometryAPIRegister @@ -0,0 +1,2 @@ +com.databricks.labs.mosaic.core.GenericFactoryTest$$anon$1 +com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$1 \ No newline at end of file diff --git a/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.IndexSystemRegister b/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.IndexSystemRegister new file mode 100644 index 000000000..e6eb808f0 --- /dev/null +++ b/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.IndexSystemRegister @@ -0,0 +1,2 @@ +com.databricks.labs.mosaic.core.GenericFactoryTest$$anon$2 +com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$2 \ No newline at end of file diff --git a/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.RasterAPIRegister b/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.RasterAPIRegister new file mode 100644 index 000000000..1efd1fcfa --- /dev/null +++ b/mosaic-core/src/test/resources/META-INF/services/com.databricks.labs.mosaic.RasterAPIRegister @@ -0,0 +1,2 @@ +com.databricks.labs.mosaic.core.GenericFactoryTest$$anon$3 +com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$3 \ No newline at end of file diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/AdaptorsTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/AdaptorsTest.scala new file mode 100644 index 000000000..3deba2eff --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/AdaptorsTest.scala @@ -0,0 +1,16 @@ +package com.databricks.labs.mosaic.core + +import org.apache.spark.sql.adapters.Column +import org.apache.spark.sql.catalyst.expressions.{Add, Expression} +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.must.Matchers.{be, noException} + +class AdaptorsTest extends AnyFunSuite with MockFactory { + + test("Could should be constructable outside of spark") { + val mockExpression = mock[Expression] + noException should be thrownBy Column(Add(mockExpression, mockExpression)) + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/GenericFactoryTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/GenericFactoryTest.scala new file mode 100644 index 000000000..a3b3ab9fe --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/GenericFactoryTest.scala @@ -0,0 +1,46 @@ +package com.databricks.labs.mosaic.core + +import com.databricks.labs.mosaic.core.GenericServiceFactory.{GeometryAPIFactory, IndexSystemFactory, RasterAPIFactory} +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.core.raster.RasterAPI +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class GenericFactoryTest extends AnyFunSuite with MockFactory { + + test("GeometryAPIFactory should getGeometryAPI based on META-INF/services") { + // We need a mock GeometryAPI to test the factory + // The generated class path is com.databricks.labs.mosaic.core.geometry.api.GenericFactoryTest$$anon$1 + // Make sure that this is the first mock in the file ot match $$anon$1 + val mockGeometryAPI = mock[GeometryAPI] + mockGeometryAPI.name _ expects() returning "MockGeometryAPI" anyNumberOfTimes() + + noException should be thrownBy GeometryAPIFactory.getGeometryAPI("MockGeometryAPI", Array(this)) + an[IllegalArgumentException] should be thrownBy GeometryAPIFactory.getGeometryAPI("MockGeometryAPI") + } + + test("IndexSystemFactory should getGeometryAPI based on META-INF/services") { + // We need a mock IndexSystem to test the factory + // The generated class path is com.databricks.labs.mosaic.core.geometry.api.GenericFactoryTest$$anon$2 + // Make sure that this is the second mock in the file ot match $$anon$2 + val mockIndexSystem = mock[IndexSystem] + mockIndexSystem.name _ expects() returning "MockIndexSystem" anyNumberOfTimes() + + noException should be thrownBy IndexSystemFactory.getIndexSystem("MockIndexSystem", Array(this)) + an[IllegalArgumentException] should be thrownBy IndexSystemFactory.getIndexSystem("MockIndexSystem") + } + + test("RasterAPIFactory should getRasterAPI based on META-INF/services") { + // We need a mock RasterAPI to test the factory + // The generated class path is com.databricks.labs.mosaic.core.geometry.api.GenericFactoryTest$$anon$3 + // Make sure that this is the third mock in the file ot match $$anon$3 + val mockRasterAPI = mock[RasterAPI] + mockRasterAPI.name _ expects() returning "MockRasterAPI" anyNumberOfTimes() + + noException should be thrownBy RasterAPIFactory.getRasterAPI("MockRasterAPI", Array(this)) + an[IllegalArgumentException] should be thrownBy RasterAPIFactory.getRasterAPI("MockRasterAPI") + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/MosaicTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/MosaicTest.scala new file mode 100644 index 000000000..6faa24fb9 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/MosaicTest.scala @@ -0,0 +1,122 @@ +package com.databricks.labs.mosaic.core + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.geometry._ +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.core.types.MosaicChip +import org.apache.spark.sql.types.LongType +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class MosaicTest extends AnyFunSuite with MockFactory { + + val mockPoint: MosaicPoint = mock[MosaicPoint] + val mockMultiPoint: MosaicMultiPoint = mock[MosaicMultiPoint] + val mockLineString: MosaicLineString = mock[MosaicLineString] + val mockMultiLineString: MosaicMultiLineString = mock[MosaicMultiLineString] + val mockPolygon: MosaicPolygon = mock[MosaicPolygon] + val mockIndexSystem: IndexSystem = mock[IndexSystem] + val mockGeometryAPI: GeometryAPI = mock[GeometryAPI] + val mockMosaicChip: MosaicChip = mock[MosaicChip] + + def doMock(): Unit = { + mockPoint.getGeometryType _ expects() returning "POINT" anyNumberOfTimes() + mockPoint.getX _ expects() returning 1.0 anyNumberOfTimes() + mockPoint.getY _ expects() returning 1.0 anyNumberOfTimes() + mockPoint.isEmpty _ expects() returning false anyNumberOfTimes() + + mockMultiPoint.getGeometryType _ expects() returning "MULTIPOINT" anyNumberOfTimes() + mockMultiPoint.asSeq _ expects() returning Seq(mockPoint) anyNumberOfTimes() + + mockLineString.getGeometryType _ expects() returning "LINESTRING" anyNumberOfTimes() + mockLineString.getShells _ expects() returning Seq(mockLineString) anyNumberOfTimes() + mockLineString.asSeq _ expects() returning Seq(mockPoint) anyNumberOfTimes() + mockLineString.intersection _ expects mockPoint returning mockPoint anyNumberOfTimes() + mockLineString.buffer _ expects * returning mockPolygon anyNumberOfTimes() + + mockMultiLineString.getGeometryType _ expects() returning "MULTILINESTRING" anyNumberOfTimes() + mockMultiLineString.flatten _ expects() returning Seq(mockLineString) anyNumberOfTimes() + + mockPolygon.getGeometryType _ expects() returning "POLYGON" anyNumberOfTimes() + mockPolygon.buffer _ expects * returning mockPolygon anyNumberOfTimes() + mockPolygon.isEmpty _ expects() returning false anyNumberOfTimes() + mockPolygon.boundary _ expects() returning mockLineString anyNumberOfTimes() + mockPolygon.simplify _ expects * returning mockPolygon anyNumberOfTimes() + + mockIndexSystem.pointToIndex _ expects(*, *, *) returning 1L anyNumberOfTimes() + mockIndexSystem.getCellIdDataType _ expects() returning LongType anyNumberOfTimes() + (mockIndexSystem.indexToGeometry(_: Long, _: GeometryAPI)) expects(1, mockGeometryAPI) returning mockPoint anyNumberOfTimes() + (mockIndexSystem.kRing(_: Long, _: Int)) expects(1, 1) returning Seq(1L) anyNumberOfTimes() + mockIndexSystem.getBufferRadius _ expects(mockPolygon, 1, mockGeometryAPI) returning 1.0 anyNumberOfTimes() + mockIndexSystem.polyfill _ expects(mockPolygon, 1, Some(mockGeometryAPI)) returning Seq(1L) anyNumberOfTimes() + mockIndexSystem.getCoreChips _ expects(Seq(1L), false, mockGeometryAPI) returning Seq(mockMosaicChip) anyNumberOfTimes() + mockIndexSystem.getBorderChips _ expects(mockPolygon, Seq(), false, mockGeometryAPI) returning Seq() anyNumberOfTimes() + + mockMosaicChip.cellIdAsLong _ expects mockIndexSystem returning 1L anyNumberOfTimes() + } + + test("Mosaic should getChips") { + doMock() + val resolution = 1 + + Mosaic.getChips(mockPoint, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI) shouldBe a[Seq[MosaicChip]] + Mosaic.getChips(mockPoint, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI).map(_.cellIdAsLong(mockIndexSystem)) should contain theSameElementsAs Seq(1L) + + Mosaic.getChips(mockMultiPoint, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI) shouldBe a[Seq[MosaicChip]] + Mosaic.getChips(mockMultiPoint, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI).map(_.cellIdAsLong(mockIndexSystem)) should contain theSameElementsAs Seq(1L) + + Mosaic.getChips(mockLineString, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI) shouldBe a[Seq[MosaicChip]] + Mosaic.getChips(mockLineString, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI).map(_.cellIdAsLong(mockIndexSystem)) should contain theSameElementsAs Seq(1L) + + Mosaic.getChips(mockMultiLineString, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI) shouldBe a[Seq[MosaicChip]] + Mosaic.getChips(mockMultiLineString, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI).map(_.cellIdAsLong(mockIndexSystem)) should contain theSameElementsAs Seq(1L) + + Mosaic.getChips(mockPolygon, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI) shouldBe a[Seq[MosaicChip]] + Mosaic.getChips(mockPolygon, resolution, keepCoreGeom = false, mockIndexSystem, mockGeometryAPI).map(_.cellIdAsLong(mockIndexSystem)) should contain theSameElementsAs Seq(1L) + } + + test("Mosaic should mosaicFill for empty carved geometries") { + doMock() + val resolution = 1 + + val mockPolygon2 = mock[MosaicPolygon] + mockPolygon2.isEmpty _ expects() returning true anyNumberOfTimes() + mockPolygon2.getGeometryType _ expects() returning "POLYGON" anyNumberOfTimes() + mockPolygon2.buffer _ expects * returning mockPolygon2 anyNumberOfTimes() + mockPolygon2.simplify _ expects * returning mockPolygon2 anyNumberOfTimes() + mockIndexSystem.polyfill _ expects(mockPolygon2, 1, Some(mockGeometryAPI)) returning Seq(1L) anyNumberOfTimes() + mockIndexSystem.getBufferRadius _ expects(mockPolygon2, 1, mockGeometryAPI) returning 1.0 anyNumberOfTimes() + mockIndexSystem.getCoreChips _ expects(Seq(1L), true, mockGeometryAPI) returning Seq(mockMosaicChip) anyNumberOfTimes() + mockIndexSystem.getBorderChips _ expects(mockPolygon2, Seq(), true, mockGeometryAPI) returning Seq() anyNumberOfTimes() + + + Mosaic.getChips(mockPolygon2, resolution, keepCoreGeom = true, mockIndexSystem, mockGeometryAPI) shouldBe a[Seq[MosaicChip]] + Mosaic.getChips(mockPolygon2, resolution, keepCoreGeom = true, mockIndexSystem, mockGeometryAPI).map(_.cellIdAsLong(mockIndexSystem)) should contain theSameElementsAs Seq(1L) + } + + test("Mosaic should fail for lineFill on polygon") { + doMock() + + an[Error] should be thrownBy Mosaic.lineFill(mockPolygon, 1, mockIndexSystem, mockGeometryAPI) + } + + test("Mosaic should implement geometry kRing") { + doMock() + + (mockIndexSystem.kRing(_: Long, _: Int)) expects(1, 2) returning Seq(1L) anyNumberOfTimes() + + Mosaic.geometryKRing(mockPolygon, 1, 2, mockIndexSystem, mockGeometryAPI) shouldBe a[Set[Long]] + Mosaic.geometryKRing(mockPolygon, 1, 2, mockIndexSystem, mockGeometryAPI) should contain theSameElementsAs Seq(1L) + } + + test("Mosaic should implement geometry kLoop") { + doMock() + + (mockIndexSystem.kLoop(_: Long, _: Int)) expects(1, 2) returning Seq(1L, 2L) anyNumberOfTimes() + + Mosaic.geometryKLoop(mockPolygon, 1, 2, mockIndexSystem, mockGeometryAPI) shouldBe a[Set[Long]] + Mosaic.geometryKLoop(mockPolygon, 1, 2, mockIndexSystem, mockGeometryAPI) should contain theSameElementsAs Seq(2L) + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/codegen/format/ConvertToCodeGenTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/codegen/format/ConvertToCodeGenTest.scala new file mode 100644 index 000000000..675c2c2b3 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/codegen/format/ConvertToCodeGenTest.scala @@ -0,0 +1,203 @@ +package com.databricks.labs.mosaic.core.codegen.format + +import com.databricks.labs.mosaic.core.expressions.geometry.RequiresCRS +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.types.{GeoJSONType, HexType} +import org.apache.spark.sql.catalyst.expressions.codegen._ +import org.apache.spark.sql.types.{BinaryType, CalendarIntervalType, StringType} +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class ConvertToCodeGenTest extends AnyFunSuite with MockFactory { + + val mockGeometryAPI: GeometryAPI = mock[GeometryAPI] + val mockIO: GeometryIOCodeGen = mock[GeometryIOCodeGen] + val mocCtx: CodegenContext = mock[CodegenContext] + + def doMock(): Unit = { + mockIO.fromWKT _ expects(mocCtx, "eval1", mockGeometryAPI) returning( + "Geometry geom1 = new Geometry(eval1.toString());", "geom1" + ) anyNumberOfTimes() + mockIO.fromWKB _ expects(mocCtx, "eval1", mockGeometryAPI) returning( + "Geometry geom1 = new Geometry(eval1.bytes());", "geom1" + ) anyNumberOfTimes() + mockIO.fromHex _ expects(mocCtx, "eval1", mockGeometryAPI) returning( + "Geometry geom1 = new Geometry(eval1.hex().toBytes());", "geom1" + ) anyNumberOfTimes() + mockIO.fromGeoJSON _ expects(mocCtx, "eval1", mockGeometryAPI) returning( + "Geometry geom1 = Geometry.parseJSON(eval1);", "geom1" + ) anyNumberOfTimes() + + mockIO.toWKT _ expects(mocCtx, "geom1", mockGeometryAPI) returning( + "String wkt2 = geom1.toWKT();", "wkt2" + ) anyNumberOfTimes() + mockIO.toWKB _ expects(mocCtx, "geom1", mockGeometryAPI) returning( + "byte[] wkb2 = geom1.toWKB();", "wkb2" + ) anyNumberOfTimes() + mockIO.toHEX _ expects(mocCtx, "geom1", mockGeometryAPI) returning( + "String hex2 = geom1.toHex();", "hex2" + ) anyNumberOfTimes() + mockIO.toGeoJSON _ expects(mocCtx, "geom1", mockGeometryAPI) returning( + "String json2 = geom1.toJSON();", "json2" + ) anyNumberOfTimes() + + mockGeometryAPI.ioCodeGen _ expects() returning mockIO anyNumberOfTimes() + } + + test("ConvertToCodeGen should generate read code") { + doMock() + + ConvertToCodeGen.readGeometryCode( + mocCtx, "eval1", StringType, mockGeometryAPI + ) shouldEqual mockIO.fromWKT(mocCtx, "eval1", mockGeometryAPI) + + ConvertToCodeGen.readGeometryCode( + mocCtx, "eval1", BinaryType, mockGeometryAPI + ) shouldEqual mockIO.fromWKB(mocCtx, "eval1", mockGeometryAPI) + + ConvertToCodeGen.readGeometryCode( + mocCtx, "eval1", HexType, mockGeometryAPI + ) shouldEqual mockIO.fromHex(mocCtx, "eval1", mockGeometryAPI) + + ConvertToCodeGen.readGeometryCode( + mocCtx, "eval1", GeoJSONType, mockGeometryAPI + ) shouldEqual mockIO.fromGeoJSON(mocCtx, "eval1", mockGeometryAPI) + + an[Error] should be thrownBy ConvertToCodeGen.readGeometryCode( + mocCtx, "eval1", CalendarIntervalType, mockGeometryAPI + ) + } + + test("ConvertToCodeGen should generate write code") { + doMock() + + ConvertToCodeGen.writeGeometryCode( + mocCtx, "geom1", StringType, mockGeometryAPI + ) shouldEqual mockIO.toWKT(mocCtx, "geom1", mockGeometryAPI) + + ConvertToCodeGen.writeGeometryCode( + mocCtx, "geom1", BinaryType, mockGeometryAPI + ) shouldEqual mockIO.toWKB(mocCtx, "geom1", mockGeometryAPI) + + ConvertToCodeGen.writeGeometryCode( + mocCtx, "geom1", HexType, mockGeometryAPI + ) shouldEqual mockIO.toHEX(mocCtx, "geom1", mockGeometryAPI) + + ConvertToCodeGen.writeGeometryCode( + mocCtx, "geom1", "GEOJSON", mockGeometryAPI + ) shouldEqual mockIO.toGeoJSON(mocCtx, "geom1", mockGeometryAPI) + + an[Error] should be thrownBy ConvertToCodeGen.writeGeometryCode( + mocCtx, "eval1", "other", mockGeometryAPI + ) + } + + test("ConvertToCodeGen should generate code for different input and output types") { + doMock() + + // Cannot mock due to inheritance issues + val valueEv = VariableValue("eval3", null) + val evCode = ExprCode(null, valueEv) + + val expectedCode: String = + s""" + |Geometry geom1 = new Geometry(eval1.toString()); + |byte[] wkb2 = geom1.toWKB(); + |eval3 = wkb2; + |""".stripMargin + + mockGeometryAPI.codeGenTryWrap _ expects expectedCode returning + s"""try{$expectedCode}""" anyNumberOfTimes() + + + val result = ConvertToCodeGen.fromEval( + mocCtx, + evCode, + "eval1", + StringType, + "WKB", + mockGeometryAPI + ) + + result.contains(expectedCode) shouldBe true + result.contains("try") && result.contains("{") && result.contains("}") shouldBe true + + val nullSafeWrapper: (CodegenContext, ExprCode, String => String) => ExprCode = { + (_: CodegenContext, _: ExprCode, _: String => String) => { + val code = ConvertToCodeGen.fromEval( + mocCtx, + evCode, + "eval1", + StringType, + "WKB", + mockGeometryAPI) + ExprCode(null, VariableValue(code, null)) + } + } + + val codeGen = ConvertToCodeGen.doCodeGen( + mocCtx, + evCode, + nullSafeWrapper, + StringType, + "WKB", + mockGeometryAPI + ) + + codeGen.value.code.contains(expectedCode) shouldBe true + + } + + test("ConvertToCodeGen should generate code for same input and output types") { + doMock() + + // Cannot mock due to inheritance issues + val valueEv = VariableValue("eval2", null) + val evCode = ExprCode(null, valueEv) + + + val expectedCode: String = + s""" + |eval2 = eval1; + |""".stripMargin + + + val result = ConvertToCodeGen.fromEval( + mocCtx, + evCode, + "eval1", + BinaryType, + "binary", + mockGeometryAPI + ) + + result.contains(expectedCode) shouldBe true + !result.contains("try") && !result.contains("{") && !result.contains("}") shouldBe true + + val nullSafeWrapper: (CodegenContext, ExprCode, String => String) => ExprCode = { + (_: CodegenContext, _: ExprCode, _: String => String) => ExprCode(null, VariableValue(expectedCode, null)) + } + + val codeGen = ConvertToCodeGen.doCodeGen( + mocCtx, + evCode, + nullSafeWrapper, + BinaryType, + "WKB", + mockGeometryAPI + ) + + codeGen.value.code.contains(expectedCode) shouldBe true + } + + test("RequiresCRS should return correct encoding for each geometry type") { + doMock() + + object TestObject extends RequiresCRS {} + + noException should be thrownBy TestObject.checkEncoding(GeoJSONType) + an[Exception] should be thrownBy TestObject.checkEncoding(StringType) + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryFormatTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryFormatTest.scala new file mode 100644 index 000000000..9ef7874ff --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/codegen/format/GeometryFormatTest.scala @@ -0,0 +1,19 @@ +package com.databricks.labs.mosaic.core.codegen.format + +import com.databricks.labs.mosaic.core.types.{GeoJSONType, HexType} +import org.apache.spark.sql.types.{BinaryType, CalendarIntervalType, StringType} +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class GeometryFormatTest extends AnyFunSuite with MockFactory { + + test("GeometryFormat should handle valid and invalid data types") { + noException should be thrownBy GeometryFormat.getDefaultFormat(BinaryType) + noException should be thrownBy GeometryFormat.getDefaultFormat(StringType) + noException should be thrownBy GeometryFormat.getDefaultFormat(HexType) + noException should be thrownBy GeometryFormat.getDefaultFormat(GeoJSONType) + an[Error] should be thrownBy GeometryFormat.getDefaultFormat(CalendarIntervalType) + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/crs/CRSBoundsProviderTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/crs/CRSBoundsProviderTest.scala new file mode 100644 index 000000000..33f47702e --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/crs/CRSBoundsProviderTest.scala @@ -0,0 +1,74 @@ +package com.databricks.labs.mosaic.core.crs + +import com.databricks.labs.mosaic.core.expressions.geometry.RequiresCRS +import com.databricks.labs.mosaic.core.geometry.MosaicPoint +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.types.GeoJSONType +import org.apache.spark.sql.types.StringType +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class CRSBoundsProviderTest extends AnyFunSuite with MockFactory { + + val mockGeometryAPI: GeometryAPI = mock[GeometryAPI] + val mockPoint1: MosaicPoint = mock[MosaicPoint] + val mockPoint2: MosaicPoint = mock[MosaicPoint] + + + def doMock(): Unit = { + mockPoint1.getX _ expects() returning -180.00 anyNumberOfTimes() + mockPoint1.getY _ expects() returning -90.00 anyNumberOfTimes() + mockPoint2.getX _ expects() returning 180.00 anyNumberOfTimes() + mockPoint2.getY _ expects() returning 90.00 anyNumberOfTimes() + mockGeometryAPI.fromCoords _ expects Seq(-180.00, -90.00) returning mockPoint1 anyNumberOfTimes() + mockGeometryAPI.fromCoords _ expects Seq(180.00, 90.00) returning mockPoint2 anyNumberOfTimes() + } + + + test("CRSBoundsProvider should load resource file and return correct bounds for EPSG:4326") { + doMock() + + val boundsProvider = CRSBoundsProvider(geometryAPI = mockGeometryAPI) + val bounds4326 = boundsProvider.bounds("EPSG", 4326) + + bounds4326.lowerLeft.getX shouldBe -180.00 + bounds4326.lowerLeft.getY shouldBe -90.00 + bounds4326.upperRight.getX shouldBe 180.00 + bounds4326.upperRight.getY shouldBe 90.00 + bounds4326.getUpperX shouldBe 180.00 + bounds4326.getUpperY shouldBe 90.00 + bounds4326.getLowerX shouldBe -180.00 + bounds4326.getLowerY shouldBe -90.00 + } + + test("CRSBoundsProvider should load resource file and return correct reprojected bounds for EPSG:4326") { + doMock() + + val boundsProvider = CRSBoundsProvider(geometryAPI = mockGeometryAPI) + val bounds4326 = boundsProvider.reprojectedBounds("EPSG", 4326) + + bounds4326.lowerLeft.getX shouldBe -180.00 + bounds4326.lowerLeft.getY shouldBe -90.00 + bounds4326.upperRight.getX shouldBe 180.00 + bounds4326.upperRight.getY shouldBe 90.00 + } + + test("CRSBoundsProvider should fail to load resource file and throw exception for EPSG:-9999") { + doMock() + + val boundsProvider = CRSBoundsProvider(geometryAPI = mockGeometryAPI) + + an[Exception] should be thrownBy boundsProvider.bounds("EPSG", 9999) + an[Exception] should be thrownBy boundsProvider.reprojectedBounds("EPSG", 9999) + } + + test("RequiresCRS should return correct encoding for each geometry type") { + object TestObject extends RequiresCRS {} + + + noException should be thrownBy TestObject.checkEncoding(GeoJSONType) + an[Exception] should be thrownBy TestObject.checkEncoding(StringType) + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/GenericExpressionFactoryTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/GenericExpressionFactoryTest.scala new file mode 100644 index 000000000..04c5a03a4 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/GenericExpressionFactoryTest.scala @@ -0,0 +1,28 @@ +package com.databricks.labs.mosaic.core.expressions + +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder +import org.apache.spark.sql.catalyst.expressions.{Add, Expression} +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class GenericExpressionFactoryTest extends AnyFunSuite with MockFactory { + + test("GenericExpressionFactory should make copy of Add expression") { + val addExpression = Add(mock[Expression], mock[Expression]) + val newArgs = Array(mock[Expression], mock[Expression]) + + GenericExpressionFactory.makeCopyImpl[Add]( + addExpression, newArgs.map(_.asInstanceOf[AnyRef]), 2, mock[MosaicExpressionConfig] + ) shouldBe Add(newArgs(0), newArgs(1)) + } + + test("GenericExpressionFactory should generate a base builder") { + val mockExpr = mock[Expression] + mockExpr.toString _ expects() returning "mockExpr" anyNumberOfTimes() + GenericExpressionFactory.getBaseBuilder[Add](2, mock[MosaicExpressionConfig]) shouldBe a[FunctionBuilder] + val builder = GenericExpressionFactory.getBaseBuilder[Add](2, mock[MosaicExpressionConfig]) + builder.apply(Seq(mockExpr, mockExpr)) shouldBe Add(mockExpr, mockExpr) + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/MosaicExpressionConfigTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/MosaicExpressionConfigTest.scala new file mode 100644 index 000000000..4a3f8a69c --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/MosaicExpressionConfigTest.scala @@ -0,0 +1,55 @@ +package com.databricks.labs.mosaic.core.expressions + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.core.raster.RasterAPI +import com.databricks.labs.mosaic.core.{MOSAIC_GEOMETRY_API, MOSAIC_INDEX_SYSTEM, MOSAIC_RASTER_API, MOSAIC_RASTER_CHECKPOINT} +import org.apache.spark.SharedSparkContext +import org.apache.spark.sql.SparkSession +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +//noinspection ScalaUnusedSymbol +class MosaicExpressionConfigTest extends AnyFunSuite with SharedSparkContext with MockFactory { + + test("MosaicExpressionConfig") { + val spark = SparkSession.builder().getOrCreate() + spark.sparkContext.setLogLevel("FATAL") + + val mockGeometryAPI = mock[GeometryAPI] + val mockIndexSystem = mock[IndexSystem] + val mockRasterAPI = mock[RasterAPI] + + // Class paths match the scalamock macro generated classes, the order needs to be persevered + val configs = Map( + MOSAIC_GEOMETRY_API -> "com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$1", + MOSAIC_INDEX_SYSTEM -> "com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$2", + MOSAIC_RASTER_API -> "com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$3", + MOSAIC_RASTER_CHECKPOINT -> "mosaic-raster-checkpoint" + ) + + val mosaicExpressionConfig = MosaicExpressionConfig(configs) + + noException should be thrownBy mosaicExpressionConfig.updateSparkConf() + + noException should be thrownBy mosaicExpressionConfig.getGeometryAPI(Array(this)) + noException should be thrownBy mosaicExpressionConfig.getIndexSystem(Array(this)) + noException should be thrownBy mosaicExpressionConfig.getRasterAPI(Array(this)) + noException should be thrownBy mosaicExpressionConfig.getRasterCheckpoint + + noException should be thrownBy mosaicExpressionConfig.setGeometryAPI("geometryAPI") + noException should be thrownBy mosaicExpressionConfig.setIndexSystem("indexSystem") + noException should be thrownBy mosaicExpressionConfig.setRasterAPI("rasterAPI") + noException should be thrownBy mosaicExpressionConfig.setRasterCheckpoint("rasterCheckpoint") + noException should be thrownBy mosaicExpressionConfig.setConfig("key", "value") + + spark.conf.set(MOSAIC_GEOMETRY_API, "com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$1") + spark.conf.set(MOSAIC_INDEX_SYSTEM, "com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$2") + spark.conf.set(MOSAIC_RASTER_API, "com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfigTest$$anon$3") + spark.conf.set(MOSAIC_RASTER_CHECKPOINT, "mosaic-raster-checkpoint") + MosaicExpressionConfig(spark) shouldBe a[MosaicExpressionConfig] + + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/WithExpressionInfoTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/WithExpressionInfoTest.scala new file mode 100644 index 000000000..1ed4b956c --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/WithExpressionInfoTest.scala @@ -0,0 +1,25 @@ +package com.databricks.labs.mosaic.core.expressions + +import org.apache.spark.sql.catalyst.expressions.ExpressionInfo +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +//noinspection ScalaUnusedSymbol +class WithExpressionInfoTest extends AnyFunSuite with MockFactory { + + abstract class TestExpression extends WithExpressionInfo { + override final def usage: String = super.usage + override final def example: String = super.example + override final def group: String = super.group + override final def database: Option[String] = super.database + } + + val mockExpression: TestExpression = mock[TestExpression] + + test("MosaicExpressionConfig") { + mockExpression.name _ expects() returning "test" + mockExpression.getExpressionInfo() shouldBe a[ExpressionInfo] + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/BinaryVectorExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/BinaryVectorExpressionTest.scala new file mode 100644 index 000000000..0800b44fc --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/BinaryVectorExpressionTest.scala @@ -0,0 +1,122 @@ +package com.databricks.labs.mosaic.core.expressions.geometry + +import com.databricks.labs.mosaic.core.codegen.format.GeometryIOCodeGen +import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import org.apache.spark.sql.catalyst.expressions.{Add, Expression} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, VariableValue} +import org.apache.spark.sql.types.{BinaryType, StringType} +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class BinaryVectorExpressionTest extends AnyFunSuite with MockFactory { + + val mockLeftExpression: Expression = mock[Expression] + val mockRightExpression: Expression = mock[Expression] + val mockGeometryAPI: GeometryAPI = mock[GeometryAPI] + val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig] + + // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class + // We are using Add as a template in order to test makeCopy which is linked to GenericExpressionFactory + abstract class DummyExpr extends BinaryVectorExpression[Add]( + mockLeftExpression, mockRightExpression, true, mockExpressionConfig + ) { + // For partial mocking, make methods that are testable final, scalamock will not mock final methods + override final def nullSafeEval(leftGeometryRow: Any, rightGeometryRow: Any): Any = + super.nullSafeEval(leftGeometryRow, rightGeometryRow) + + override final def left: Expression = super.left + + override final def right: Expression = super.right + + override final def geometryAPI: GeometryAPI = super.geometryAPI + + override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs) + + override final def withNewChildrenInternal(newFirst: Expression, newSecond: Expression): Expression = + super.withNewChildrenInternal(newFirst, newSecond) + + override final def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = super.doGenCode(ctx, ev) + + // We are making inherited nullSafeCodeGen final and passthrough so that we can test the nested behavior in doGenCode + override final def nullSafeCodeGen(ctx: CodegenContext, ev: ExprCode, f: (String, String) => String): ExprCode = { + ExprCode(null, VariableValue(f("geom1", "geom2"), null)) + } + + } + + val bytes: Array[Byte] = "POINT EMPTY".getBytes + + val mockExpression: DummyExpr = mock[DummyExpr] + val mockIndexSystem: IndexSystem = mock[IndexSystem] + val mockPoint: MosaicGeometry = mock[MosaicGeometry] + val mockCtx: CodegenContext = mock[CodegenContext] + val mockIO: GeometryIOCodeGen = mock[GeometryIOCodeGen] + + val expectedCode: String = + """ + |Geometry geom1 = Geometry(wkb1); + |Geometry geom2 = Geometry(wkt1); + |MosaicGeometry geom3 = MosaicGeometry(geom1).add(MosaicGeometry(geom2)); + |byte[] wkb3 = geom3.toWKB(); + |eval1 = wkb3; + |""".stripMargin + + def doMock(): Unit = { + mockExpressionConfig.getGeometryAPI _ expects * returning mockGeometryAPI anyNumberOfTimes() + + mockLeftExpression.dataType _ expects() returning BinaryType anyNumberOfTimes() + mockRightExpression.dataType _ expects() returning StringType anyNumberOfTimes() + + mockGeometryAPI.valueToGeometry _ expects(bytes, mockLeftExpression.dataType) returning mockPoint anyNumberOfTimes() + mockGeometryAPI.valueToGeometry _ expects("POINT EMPTY", mockRightExpression.dataType) returning mockPoint anyNumberOfTimes() + mockGeometryAPI.ioCodeGen _ expects() returning mockIO anyNumberOfTimes() + mockGeometryAPI.codeGenTryWrap _ expects expectedCode returning expectedCode anyNumberOfTimes() + + mockIO.fromWKB _ expects(mockCtx, "geom1", mockGeometryAPI) returning ("Geometry geom1 = Geometry(wkb1);", "geom1") anyNumberOfTimes() + mockIO.fromWKT _ expects(mockCtx, "geom2", mockGeometryAPI) returning ("Geometry geom2 = Geometry(wkt1);", "geom2") anyNumberOfTimes() + + mockExpression.geometryTransform _ expects(*, *) returning mockPoint anyNumberOfTimes() + mockExpression.serialise _ expects(mockPoint, true, BinaryType) returning bytes anyNumberOfTimes() + mockExpression.mosaicGeometryRef _ expects "geom1" returning "MosaicGeometry(geom1)" anyNumberOfTimes() + mockExpression.mosaicGeometryRef _ expects "geom2" returning "MosaicGeometry(geom2)" anyNumberOfTimes() + mockExpression.geometryCodeGen _ expects(*, *, *) returning ("MosaicGeometry geom3 = MosaicGeometry(geom1).add(MosaicGeometry(geom2));", "geom3") anyNumberOfTimes() + mockExpression.serialiseCodegen _ expects(*, *, *, *) returning ("byte[] wkb3 = geom3.toWKB();", "wkb3") anyNumberOfTimes() + + } + + + test("BinaryVectorExpression should implement accessor methods") { + doMock() + + mockExpression.left shouldBe mockLeftExpression + mockExpression.right shouldBe mockRightExpression + mockExpression.geometryAPI shouldBe mockGeometryAPI + mockExpression.makeCopy(Array(mockLeftExpression, mockRightExpression)) shouldBe Add(mockLeftExpression, mockRightExpression) + mockExpression.withNewChildrenInternal(mockLeftExpression, mockRightExpression) shouldBe Add(mockLeftExpression, mockRightExpression) + } + + test("VectorExpression should evaluate") { + doMock() + + val result = mockExpression.nullSafeEval( + bytes, + "POINT EMPTY" + ) + + result shouldBe bytes + + } + + test("VectorExpression should doGenCode") { + doMock() + + val exprCode = ExprCode(null, VariableValue("eval1", null)) + mockExpression.doGenCode(mockCtx, exprCode).value.code shouldBe expectedCode + } + + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/RequiresCRSTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/RequiresCRSTest.scala new file mode 100644 index 000000000..c3fbe4a18 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/RequiresCRSTest.scala @@ -0,0 +1,17 @@ +package com.databricks.labs.mosaic.core.expressions.geometry + +import com.databricks.labs.mosaic.core.types.GeoJSONType +import org.apache.spark.sql.types.StringType +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class RequiresCRSTest extends AnyFunSuite with MockFactory { + + test("RequiresCRS should return correct encoding for each geometry type") { + object TestObject extends RequiresCRS {} + noException should be thrownBy TestObject.checkEncoding(GeoJSONType) + an[Exception] should be thrownBy TestObject.checkEncoding(StringType) + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector1ArgExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector1ArgExpressionTest.scala new file mode 100644 index 000000000..065b1d7af --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector1ArgExpressionTest.scala @@ -0,0 +1,113 @@ +package com.databricks.labs.mosaic.core.expressions.geometry + +import com.databricks.labs.mosaic.core.codegen.format.GeometryIOCodeGen +import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, VariableValue} +import org.apache.spark.sql.catalyst.expressions.{Add, Expression} +import org.apache.spark.sql.types.BinaryType +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class UnaryVector1ArgExpressionTest extends AnyFunSuite with MockFactory { + + val mockLeftExpression: Expression = mock[Expression] + val mockRightExpression: Expression = mock[Expression] + val mockGeometryAPI: GeometryAPI = mock[GeometryAPI] + val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig] + + // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class + // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory + abstract class DummyExpr extends UnaryVector1ArgExpression[Add]( + mockLeftExpression, mockRightExpression, true, mockExpressionConfig + ) { + // For partial mocking, make methods that are testable final, scalamock will not mock final methods + override final def nullSafeEval(leftGeometryRow: Any, arg1Row: Any): Any = + super.nullSafeEval(leftGeometryRow, arg1Row) + + override final def left: Expression = super.left + + override final def right: Expression = super.right + + override final def geometryAPI: GeometryAPI = super.geometryAPI + + override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs) + + override final def withNewChildrenInternal(newFirst: Expression, newSecond: Expression): Expression = + super.withNewChildrenInternal(newFirst, newSecond) + + override final def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = super.doGenCode(ctx, ev) + + // We are making inherited nullSafeCodeGen final and passthrough so that we can test the nested behavior in doGenCode + override final def nullSafeCodeGen(ctx: CodegenContext, ev: ExprCode, f: (String, String) => String): ExprCode = { + ExprCode(null, VariableValue(f("geom1", "arg1"), null)) + } + + } + + val bytes: Array[Byte] = "POINT EMPTY".getBytes + + val mockExpression: DummyExpr = mock[DummyExpr] + val mockIndexSystem: IndexSystem = mock[IndexSystem] + val mockPoint: MosaicGeometry = mock[MosaicGeometry] + val mockCtx: CodegenContext = mock[CodegenContext] + val mockIO: GeometryIOCodeGen = mock[GeometryIOCodeGen] + + val expectedCode: String = + """ + |Geometry geom1 = Geometry(wkb1); + |MosaicGeometry geom3 = MosaicGeometry(geom1).buffer(arg1); + |byte[] wkb3 = geom3.toWKB(); + |eval1 = wkb3; + |""".stripMargin + + def doMock(): Unit = { + mockExpressionConfig.getGeometryAPI _ expects * returning mockGeometryAPI anyNumberOfTimes() + + mockLeftExpression.dataType _ expects() returning BinaryType anyNumberOfTimes() + + mockGeometryAPI.valueToGeometry _ expects(bytes, mockLeftExpression.dataType) returning mockPoint anyNumberOfTimes() + mockGeometryAPI.ioCodeGen _ expects() returning mockIO anyNumberOfTimes() + mockGeometryAPI.codeGenTryWrap _ expects expectedCode returning expectedCode anyNumberOfTimes() + + mockIO.fromWKB _ expects(mockCtx, "geom1", mockGeometryAPI) returning("Geometry geom1 = Geometry(wkb1);", "geom1") anyNumberOfTimes() + + mockExpression.geometryTransform _ expects (*, *) returning mockPoint anyNumberOfTimes() + mockExpression.serialise _ expects(mockPoint, true, BinaryType) returning bytes anyNumberOfTimes() + mockExpression.mosaicGeometryRef _ expects "geom1" returning "MosaicGeometry(geom1)" anyNumberOfTimes() + mockExpression.geometryCodeGen _ expects(*, *, *) returning("MosaicGeometry geom3 = MosaicGeometry(geom1).buffer(arg1);", "geom3") anyNumberOfTimes() + mockExpression.serialiseCodegen _ expects(*, *, *, *) returning("byte[] wkb3 = geom3.toWKB();", "wkb3") anyNumberOfTimes() + + } + + + test("BinaryVectorExpression should implement accessor methods") { + doMock() + + mockExpression.left shouldBe mockLeftExpression + mockExpression.right shouldBe mockRightExpression + mockExpression.geometryAPI shouldBe mockGeometryAPI + mockExpression.makeCopy(Array(mockLeftExpression, mockRightExpression)) shouldBe Add(mockLeftExpression, mockRightExpression) + mockExpression.withNewChildrenInternal(mockLeftExpression, mockRightExpression) shouldBe Add(mockLeftExpression, mockRightExpression) + } + + test("VectorExpression should evaluate") { + doMock() + + val result = mockExpression.nullSafeEval(bytes, 1) + + result shouldBe bytes + } + + test("VectorExpression should doGenCode") { + doMock() + + val exprCode = ExprCode(null, VariableValue("eval1", null)) + mockExpression.doGenCode(mockCtx, exprCode).value.code shouldBe expectedCode + } + + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector2ArgExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector2ArgExpressionTest.scala new file mode 100644 index 000000000..8eab70db5 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVector2ArgExpressionTest.scala @@ -0,0 +1,119 @@ +package com.databricks.labs.mosaic.core.expressions.geometry + +import com.databricks.labs.mosaic.core.codegen.format.GeometryIOCodeGen +import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, VariableValue} +import org.apache.spark.sql.catalyst.expressions.{Conv, Expression} +import org.apache.spark.sql.types.BinaryType +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class UnaryVector2ArgExpressionTest extends AnyFunSuite with MockFactory { + + val mockFirstExpression: Expression = mock[Expression] + val mockSecondExpression: Expression = mock[Expression] + val mockThirdExpression: Expression = mock[Expression] + val mockGeometryAPI: GeometryAPI = mock[GeometryAPI] + val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig] + + // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class + // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory + abstract class DummyExpr extends UnaryVector2ArgExpression[Conv]( + mockFirstExpression, mockSecondExpression, mockThirdExpression, true, mockExpressionConfig + ) { + // For partial mocking, make methods that are testable final, scalamock will not mock final methods + override final def nullSafeEval(leftGeometryRow: Any, arg1Row: Any, arg2Row: Any): Any = + super.nullSafeEval(leftGeometryRow, arg1Row, arg2Row) + + override final def first: Expression = super.first + + override final def second: Expression = super.second + + override final def third: Expression = super.third + + override final def geometryAPI: GeometryAPI = super.geometryAPI + + override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs) + + override final def withNewChildrenInternal(newFirst: Expression, newSecond: Expression, newThird: Expression): Expression = + super.withNewChildrenInternal(newFirst, newSecond, newThird) + + override final def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = super.doGenCode(ctx, ev) + + // We are making inherited nullSafeCodeGen final and passthrough so that we can test the nested behavior in doGenCode + override final def nullSafeCodeGen(ctx: CodegenContext, ev: ExprCode, f: (String, String, String) => String): ExprCode = { + ExprCode(null, VariableValue(f("geom1", "arg1", "arg2"), null)) + } + + } + + val bytes: Array[Byte] = "POINT EMPTY".getBytes + + val mockExpression: DummyExpr = mock[DummyExpr] + val mockIndexSystem: IndexSystem = mock[IndexSystem] + val mockPoint: MosaicGeometry = mock[MosaicGeometry] + val mockCtx: CodegenContext = mock[CodegenContext] + val mockIO: GeometryIOCodeGen = mock[GeometryIOCodeGen] + + val expectedCode: String = + """ + |Geometry geom1 = Geometry(wkb1); + |MosaicGeometry geom3 = MosaicGeometry(geom1).buffer(arg1 + arg2); + |byte[] wkb3 = geom3.toWKB(); + |eval1 = wkb3; + |""".stripMargin + + def doMock(): Unit = { + mockExpressionConfig.getGeometryAPI _ expects * returning mockGeometryAPI anyNumberOfTimes() + + mockFirstExpression.dataType _ expects() returning BinaryType anyNumberOfTimes() + + mockGeometryAPI.valueToGeometry _ expects(bytes, mockFirstExpression.dataType) returning mockPoint anyNumberOfTimes() + mockGeometryAPI.ioCodeGen _ expects() returning mockIO anyNumberOfTimes() + mockGeometryAPI.codeGenTryWrap _ expects expectedCode returning expectedCode anyNumberOfTimes() + + mockIO.fromWKB _ expects(mockCtx, "geom1", mockGeometryAPI) returning("Geometry geom1 = Geometry(wkb1);", "geom1") anyNumberOfTimes() + + mockExpression.geometryTransform _ expects (*, *, *) returning mockPoint anyNumberOfTimes() + mockExpression.serialise _ expects(mockPoint, true, BinaryType) returning bytes anyNumberOfTimes() + mockExpression.mosaicGeometryRef _ expects "geom1" returning "MosaicGeometry(geom1)" anyNumberOfTimes() + mockExpression.geometryCodeGen _ expects(*, *, *, *) returning("MosaicGeometry geom3 = MosaicGeometry(geom1).buffer(arg1 + arg2);", "geom3") anyNumberOfTimes() + mockExpression.serialiseCodegen _ expects(*, *, *, *) returning("byte[] wkb3 = geom3.toWKB();", "wkb3") anyNumberOfTimes() + + } + + + test("BinaryVectorExpression should implement accessor methods") { + doMock() + + mockExpression.first shouldBe mockFirstExpression + mockExpression.second shouldBe mockSecondExpression + mockExpression.third shouldBe mockThirdExpression + mockExpression.geometryAPI shouldBe mockGeometryAPI + mockExpression.makeCopy(Array(mockFirstExpression, mockSecondExpression, mockThirdExpression)) shouldBe + Conv(mockFirstExpression, mockSecondExpression, mockThirdExpression) + mockExpression.withNewChildrenInternal(mockFirstExpression, mockSecondExpression, mockThirdExpression) shouldBe + Conv(mockFirstExpression, mockSecondExpression, mockThirdExpression) + } + + test("VectorExpression should evaluate") { + doMock() + + val result = mockExpression.nullSafeEval(bytes, 1, 2) + + result shouldBe bytes + } + + test("VectorExpression should doGenCode") { + doMock() + + val exprCode = ExprCode(null, VariableValue("eval1", null)) + mockExpression.doGenCode(mockCtx, exprCode).value.code shouldBe expectedCode + } + + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVectorExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVectorExpressionTest.scala new file mode 100644 index 000000000..7e5dfae9e --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/UnaryVectorExpressionTest.scala @@ -0,0 +1,109 @@ +package com.databricks.labs.mosaic.core.expressions.geometry + +import com.databricks.labs.mosaic.core.codegen.format.GeometryIOCodeGen +import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, VariableValue} +import org.apache.spark.sql.catalyst.expressions.{Abs, Expression} +import org.apache.spark.sql.types.BinaryType +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class UnaryVectorExpressionTest extends AnyFunSuite with MockFactory { + + val mockLeftExpression: Expression = mock[Expression] + val mockGeometryAPI: GeometryAPI = mock[GeometryAPI] + val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig] + + // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class + // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory + abstract class DummyExpr extends UnaryVectorExpression[Abs]( + mockLeftExpression, true, mockExpressionConfig + ) { + // For partial mocking, make methods that are testable final, scalamock will not mock final methods + override final def nullSafeEval(leftGeometryRow: Any): Any = + super.nullSafeEval(leftGeometryRow) + + override final def child: Expression = super.child + + override final def geometryAPI: GeometryAPI = super.geometryAPI + + override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs) + + override final def withNewChildInternal(newFirst: Expression): Expression = + super.withNewChildInternal(newFirst) + + override final def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = super.doGenCode(ctx, ev) + + // We are making inherited nullSafeCodeGen final and passthrough so that we can test the nested behavior in doGenCode + override final def nullSafeCodeGen(ctx: CodegenContext, ev: ExprCode, f: String => String): ExprCode = { + ExprCode(null, VariableValue(f("geom1"), null)) + } + + } + + val bytes: Array[Byte] = "POINT EMPTY".getBytes + + val mockExpression: DummyExpr = mock[DummyExpr] + val mockIndexSystem: IndexSystem = mock[IndexSystem] + val mockPoint: MosaicGeometry = mock[MosaicGeometry] + val mockCtx: CodegenContext = mock[CodegenContext] + val mockIO: GeometryIOCodeGen = mock[GeometryIOCodeGen] + + val expectedCode: String = + """ + |Geometry geom1 = Geometry(wkb1); + |MosaicGeometry geom3 = MosaicGeometry(geom1); + |byte[] wkb3 = geom3.toWKB(); + |eval1 = wkb3; + |""".stripMargin + + def doMock(): Unit = { + mockExpressionConfig.getGeometryAPI _ expects * returning mockGeometryAPI anyNumberOfTimes() + + mockLeftExpression.dataType _ expects() returning BinaryType anyNumberOfTimes() + + mockGeometryAPI.valueToGeometry _ expects(bytes, mockLeftExpression.dataType) returning mockPoint anyNumberOfTimes() + mockGeometryAPI.ioCodeGen _ expects() returning mockIO anyNumberOfTimes() + mockGeometryAPI.codeGenTryWrap _ expects expectedCode returning expectedCode anyNumberOfTimes() + + mockIO.fromWKB _ expects(mockCtx, "geom1", mockGeometryAPI) returning ("Geometry geom1 = Geometry(wkb1);", "geom1") anyNumberOfTimes() + + mockExpression.geometryTransform _ expects * returning mockPoint anyNumberOfTimes() + mockExpression.serialise _ expects(mockPoint, true, BinaryType) returning bytes anyNumberOfTimes() + mockExpression.mosaicGeometryRef _ expects "geom1" returning "MosaicGeometry(geom1)" anyNumberOfTimes() + mockExpression.geometryCodeGen _ expects(*, *) returning ("MosaicGeometry geom3 = MosaicGeometry(geom1);", "geom3") anyNumberOfTimes() + mockExpression.serialiseCodegen _ expects(*, *, *, *) returning ("byte[] wkb3 = geom3.toWKB();", "wkb3") anyNumberOfTimes() + + } + + + test("BinaryVectorExpression should implement accessor methods") { + doMock() + + mockExpression.child shouldBe mockLeftExpression + mockExpression.geometryAPI shouldBe mockGeometryAPI + mockExpression.makeCopy(Array(mockLeftExpression)) shouldBe Abs(mockLeftExpression) + mockExpression.withNewChildInternal(mockLeftExpression) shouldBe Abs(mockLeftExpression) + } + + test("VectorExpression should evaluate") { + doMock() + + val result = mockExpression.nullSafeEval(bytes) + + result shouldBe bytes + } + + test("VectorExpression should doGenCode") { + doMock() + + val exprCode = ExprCode(null, VariableValue("eval1", null)) + mockExpression.doGenCode(mockCtx, exprCode).value.code shouldBe expectedCode + } + + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/VectorExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/VectorExpressionTest.scala new file mode 100644 index 000000000..7e13b36d9 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/geometry/VectorExpressionTest.scala @@ -0,0 +1,106 @@ +package com.databricks.labs.mosaic.core.expressions.geometry + +import com.databricks.labs.mosaic.core.codegen.format.GeometryIOCodeGen +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext +import org.apache.spark.sql.types.{BinaryType, StringType} +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class VectorExpressionTest extends AnyFunSuite with MockFactory { + + val mockExpression: VectorExpression = mock[VectorExpression] + val mockIndexSystem: IndexSystem = mock[IndexSystem] + val mockGeometryAPI: GeometryAPI = mock[GeometryAPI] + val mockPoint: MosaicGeometry = mock[MosaicGeometry] + val mockCtx: CodegenContext = mock[CodegenContext] + val mockIO: GeometryIOCodeGen = mock[GeometryIOCodeGen] + + def doMock(): Unit = { + mockExpression.geometryAPI _ expects() returning mockGeometryAPI anyNumberOfTimes() + mockExpression.mosaicGeomClass _ expects() returning "M_GEOMETRY" anyNumberOfTimes() + mockExpression.geomClass _ expects() returning "GEOMETRY" anyNumberOfTimes() + mockExpression.CRSBoundsProviderClass _ expects() returning "CRSBoundsProvider" anyNumberOfTimes() + mockExpression.geometryAPIClass _ expects() returning "GEOMETRY_API" anyNumberOfTimes() + mockIndexSystem.name _ expects() returning "INDEX_SYSTEM" anyNumberOfTimes() + mockGeometryAPI.name _ expects() returning "GEOMETRY_API" anyNumberOfTimes() + mockGeometryAPI.serialize _ expects(mockPoint, StringType) returning "POINT EMPTY" anyNumberOfTimes() + mockGeometryAPI.ioCodeGen _ expects() returning mockIO anyNumberOfTimes() + mockGeometryAPI.geometryClass _ expects() returning "GEOMETRY" anyNumberOfTimes() + mockGeometryAPI.mosaicGeometryClass _ expects() returning "M_GEOMETRY" anyNumberOfTimes() + mockPoint.toWKT _ expects() returning "POINT EMPTY" anyNumberOfTimes() + mockCtx.freshName _ expects "baseGeometry" returning "baseGeometry1" anyNumberOfTimes() + mockIO.toWKT _ expects(mockCtx, "baseGeometry1", mockGeometryAPI) returning + ("String wkt1 = baseGeometry1.toWKT();", "wkt1") anyNumberOfTimes() + } + + + test("VectorExpression should be correctly initialised from expressionConfig") { + doMock() + mockExpression.mosaicGeomClass shouldBe "M_GEOMETRY" + mockExpression.geomClass shouldBe "GEOMETRY" + mockExpression.CRSBoundsProviderClass shouldBe "CRSBoundsProvider" + mockExpression.geometryAPIClass shouldBe "GEOMETRY_API" + } + + test("VectorExpression should serialise a result or a geometry result") { + doMock() + + object TestObject extends VectorExpression { + override def geometryAPI: GeometryAPI = mockGeometryAPI + } + + TestObject.serialise("POINT EMPTY", returnsGeometry = false, StringType) shouldBe "POINT EMPTY" + TestObject.serialise("POINT EMPTY".getBytes, returnsGeometry = false, BinaryType) shouldBe "POINT EMPTY".getBytes + TestObject.serialise(mockPoint, returnsGeometry = true, StringType) shouldBe "POINT EMPTY" + + } + + test("VectorExpression should generate serialise code") { + doMock() + + object TestObject extends VectorExpression { + override def geometryAPI: GeometryAPI = mockGeometryAPI + } + + TestObject.serialiseCodegen("geom1", returnsGeometry = true, StringType, mockCtx) shouldBe + ( + """ + |GEOMETRY baseGeometry1 = geom1.getGeom(); + |String wkt1 = baseGeometry1.toWKT(); + |""".stripMargin, "wkt1") + + TestObject.serialiseCodegen("geom1", returnsGeometry = false, StringType, mockCtx) shouldBe + ("", "geom1") + } + + test("VectorExpression should return correct mosaicGeometryRef") { + doMock() + + object TestObject extends VectorExpression { + override def geometryAPI: GeometryAPI = mockGeometryAPI + } + + TestObject.mosaicGeometryRef("geom1") shouldBe "M_GEOMETRY.apply(geom1)" + + } + + test("VectorExpression should implement accessor methods") { + doMock() + + object TestObject extends VectorExpression { + override def geometryAPI: GeometryAPI = mockGeometryAPI + } + + noException should be thrownBy TestObject.mosaicGeomClass + noException should be thrownBy TestObject.geomClass + noException should be thrownBy TestObject.CRSBoundsProviderClass + noException should be thrownBy TestObject.geometryAPIClass + + } + + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster1ArgExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster1ArgExpressionTest.scala new file mode 100644 index 000000000..06284c56c --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster1ArgExpressionTest.scala @@ -0,0 +1,86 @@ +package com.databricks.labs.mosaic.core.expressions.raster + +import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig +import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI} +import org.apache.spark.sql.catalyst.expressions.{Add, Expression} +import org.apache.spark.sql.types.{BinaryType, DataType} +import org.apache.spark.unsafe.types.UTF8String +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class Raster1ArgExpressionTest extends AnyFunSuite with MockFactory { + + val mockLeftExpression: Expression = mock[Expression] + val mockRightExpression: Expression = mock[Expression] + val mockRasterAPI: RasterAPI = mock[RasterAPI] + val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig] + mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes() + mockRasterAPI.enable _ expects() returning mockRasterAPI anyNumberOfTimes() + val mockRaster: MosaicRaster = mock[MosaicRaster] + + // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class + // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory + abstract class DummyExpr extends Raster1ArgExpression[Add]( + mockLeftExpression, mockRightExpression, BinaryType, mockExpressionConfig + ) { + // For partial mocking, make methods that are testable final, scalamock will not mock final methods + override final def nullSafeEval(leftGeometryRow: Any, arg1Row: Any): Any = + super.nullSafeEval(leftGeometryRow, arg1Row) + + override final def left: Expression = super.left + + override final def right: Expression = super.right + + override final def dataType: DataType = super.dataType + + override final val rasterAPI: RasterAPI = mockRasterAPI + + override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs) + + override final def withNewChildrenInternal(newFirst: Expression, newArg1: Expression): Expression = + super.withNewChildrenInternal(newFirst, newArg1) + + } + + val bytes: Array[Byte] = "POINT EMPTY".getBytes + + val mockExpression: DummyExpr = mock[DummyExpr] + + + def doMock(): Unit = { + mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes() + mockExpression.rasterTransform _ expects(mockRaster, 1) returning mockRaster anyNumberOfTimes() + mockRasterAPI.raster _ expects "path" returning mockRaster anyNumberOfTimes() + mockRaster.cleanUp _ expects() returning null anyNumberOfTimes() + mockLeftExpression.toString _ expects() returning "left" anyNumberOfTimes() + mockRightExpression.toString _ expects() returning "right" anyNumberOfTimes() + } + + + test("Raster1ArgExpression should implement accessor methods") { + doMock() + + mockExpression.left shouldBe mockLeftExpression + mockExpression.right shouldBe mockRightExpression + mockExpression.dataType shouldBe BinaryType + } + + test("Raster1ArgExpression should evaluate") { + doMock() + + val runtimePath = UTF8String.fromString("path") + val result = mockExpression.nullSafeEval(runtimePath, 1) + + result shouldBe mockRaster + } + + test("Raster1ArgExpression should make copy") { + doMock() + + mockExpression.makeCopy(Array[AnyRef](mockLeftExpression, mockRightExpression)) shouldBe a[Add] + mockExpression.withNewChildrenInternal(mockLeftExpression, mockRightExpression) shouldBe a[Add] + } + + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster2ArgExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster2ArgExpressionTest.scala new file mode 100644 index 000000000..ac218dadb --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/Raster2ArgExpressionTest.scala @@ -0,0 +1,91 @@ +package com.databricks.labs.mosaic.core.expressions.raster + +import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig +import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI} +import org.apache.spark.sql.catalyst.expressions.{Conv, Expression} +import org.apache.spark.sql.types.{BinaryType, DataType} +import org.apache.spark.unsafe.types.UTF8String +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers.{a, _} + +class Raster2ArgExpressionTest extends AnyFunSuite with MockFactory { + + val mockFirstExpression: Expression = mock[Expression] + val mockSecondExpression: Expression = mock[Expression] + val mockThirdExpression: Expression = mock[Expression] + val mockRasterAPI: RasterAPI = mock[RasterAPI] + val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig] + mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes() + mockRasterAPI.enable _ expects() returning mockRasterAPI anyNumberOfTimes() + val mockRaster: MosaicRaster = mock[MosaicRaster] + + // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class + // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory + abstract class DummyExpr extends Raster2ArgExpression[Conv]( + mockFirstExpression, mockSecondExpression, mockThirdExpression, BinaryType, mockExpressionConfig + ) { + // For partial mocking, make methods that are testable final, scalamock will not mock final methods + override final def nullSafeEval(leftGeometryRow: Any, arg1Row: Any, arg2Row: Any): Any = + super.nullSafeEval(leftGeometryRow, arg1Row, arg2Row) + + override final def first: Expression = super.first + + override final def second: Expression = super.second + + override final def third: Expression = super.third + + override final def dataType: DataType = super.dataType + + override final val rasterAPI: RasterAPI = mockRasterAPI + + override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs) + + override final def withNewChildrenInternal(newFirst: Expression, newArg1: Expression, newArg2: Expression): Expression = + super.withNewChildrenInternal(newFirst, newArg1, newArg2) + + } + + val bytes: Array[Byte] = "POINT EMPTY".getBytes + + val mockExpression: DummyExpr = mock[DummyExpr] + + + def doMock(): Unit = { + mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes() + mockExpression.rasterTransform _ expects(mockRaster, 1, 2) returning mockRaster anyNumberOfTimes() + mockRasterAPI.raster _ expects "path" returning mockRaster anyNumberOfTimes() + mockRaster.cleanUp _ expects() returning null anyNumberOfTimes() + mockFirstExpression.toString _ expects() returning "first" anyNumberOfTimes() + mockSecondExpression.toString _ expects() returning "second" anyNumberOfTimes() + mockThirdExpression.toString _ expects() returning "third" anyNumberOfTimes() + } + + + test("Raster2ArgExpression should implement accessor methods") { + doMock() + + mockExpression.first shouldBe mockFirstExpression + mockExpression.second shouldBe mockSecondExpression + mockExpression.third shouldBe mockThirdExpression + mockExpression.dataType shouldBe BinaryType + } + + test("Raster2ArgExpression should evaluate") { + doMock() + + val runtimePath = UTF8String.fromString("path") + val result = mockExpression.nullSafeEval(runtimePath, 1, 2) + + result shouldBe mockRaster + } + + test("Raster2ArgExpression should make copy") { + doMock() + + mockExpression.makeCopy(Array[AnyRef](mockFirstExpression, mockSecondExpression, mockThirdExpression)) shouldBe a[Conv] + mockExpression.withNewChildrenInternal(mockFirstExpression, mockSecondExpression, mockThirdExpression) shouldBe a[Conv] + } + + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterBandExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterBandExpressionTest.scala new file mode 100644 index 000000000..b3564aa0c --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterBandExpressionTest.scala @@ -0,0 +1,88 @@ +package com.databricks.labs.mosaic.core.expressions.raster + +import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig +import com.databricks.labs.mosaic.core.raster.{MosaicRaster, MosaicRasterBand, RasterAPI} +import org.apache.spark.sql.catalyst.expressions.{Add, Expression} +import org.apache.spark.sql.types.{BinaryType, DataType} +import org.apache.spark.unsafe.types.UTF8String +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers.{a, _} + +class RasterBandExpressionTest extends AnyFunSuite with MockFactory { + + val mockLeftExpression: Expression = mock[Expression] + val mockRightExpression: Expression = mock[Expression] + val mockRasterAPI: RasterAPI = mock[RasterAPI] + val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig] + mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes() + mockRasterAPI.enable _ expects() returning mockRasterAPI anyNumberOfTimes() + val mockRaster: MosaicRaster = mock[MosaicRaster] + val mockBand: MosaicRasterBand = mock[MosaicRasterBand] + + // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class + // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory + abstract class DummyExpr extends RasterBandExpression[Add]( + mockLeftExpression, mockRightExpression, BinaryType, mockExpressionConfig + ) { + // For partial mocking, make methods that are testable final, scalamock will not mock final methods + override final def nullSafeEval(leftRasterRow: Any, rightRasterRow: Any): Any = + super.nullSafeEval(leftRasterRow, rightRasterRow) + + override final def left: Expression = super.left + + override final def right: Expression = super.right + + override final def dataType: DataType = super.dataType + + override final val rasterAPI: RasterAPI = mockRasterAPI + + override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs) + + override final def withNewChildrenInternal(newLeft: Expression, newRight: Expression): Expression = + super.withNewChildrenInternal(newLeft, newRight) + + } + + val bytes: Array[Byte] = "POINT EMPTY".getBytes + + val mockExpression: DummyExpr = mock[DummyExpr] + + + def doMock(): Unit = { + mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes() + mockExpression.bandTransform _ expects(mockRaster, mockBand) returning mockRaster anyNumberOfTimes() + mockRasterAPI.raster _ expects "path" returning mockRaster anyNumberOfTimes() + mockRaster.cleanUp _ expects() returning null anyNumberOfTimes() + mockRaster.getBand _ expects 1 returning mockBand anyNumberOfTimes() + mockLeftExpression.toString _ expects() returning "left" anyNumberOfTimes() + mockRightExpression.toString _ expects() returning "right" anyNumberOfTimes() + } + + + test("RasterBandExpression should implement accessor methods") { + doMock() + + mockExpression.left shouldBe mockLeftExpression + mockExpression.right shouldBe mockRightExpression + mockExpression.dataType shouldBe BinaryType + } + + test("RasterBandExpression should evaluate") { + doMock() + + val runtimePath = UTF8String.fromString("path") + val result = mockExpression.nullSafeEval(runtimePath, 1) + + result shouldBe mockRaster + } + + test("RasterBandExpression should make copy") { + doMock() + + mockExpression.makeCopy(Array[AnyRef](mockLeftExpression, mockRightExpression)) shouldBe a[Add] + mockExpression.withNewChildrenInternal(mockLeftExpression, mockRightExpression) shouldBe a[Add] + } + + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterExpressionTest.scala new file mode 100644 index 000000000..e22f7964f --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterExpressionTest.scala @@ -0,0 +1,81 @@ +package com.databricks.labs.mosaic.core.expressions.raster + +import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig +import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI} +import org.apache.spark.sql.catalyst.expressions.{Abs, Expression} +import org.apache.spark.sql.types.{BinaryType, DataType} +import org.apache.spark.unsafe.types.UTF8String +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers.{a, _} + +class RasterExpressionTest extends AnyFunSuite with MockFactory { + + val mockChildExpression: Expression = mock[Expression] + val mockRasterAPI: RasterAPI = mock[RasterAPI] + val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig] + mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes() + mockRasterAPI.enable _ expects() returning mockRasterAPI anyNumberOfTimes() + val mockRaster: MosaicRaster = mock[MosaicRaster] + + // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class + // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory + abstract class DummyExpr extends RasterExpression[Abs]( + mockChildExpression, BinaryType, mockExpressionConfig + ) { + // For partial mocking, make methods that are testable final, scalamock will not mock final methods + override final def nullSafeEval(childRasterRow: Any): Any = + super.nullSafeEval(childRasterRow) + + override final def child: Expression = super.child + + override final def dataType: DataType = super.dataType + + override final val rasterAPI: RasterAPI = mockRasterAPI + + override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs) + + override final def withNewChildInternal(newChild: Expression): Expression = + super.withNewChildInternal(newChild) + + } + + val bytes: Array[Byte] = "POINT EMPTY".getBytes + + val mockExpression: DummyExpr = mock[DummyExpr] + + + def doMock(): Unit = { + mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes() + mockRasterAPI.raster _ expects "path" returning mockRaster anyNumberOfTimes() + mockRaster.cleanUp _ expects() returning null anyNumberOfTimes() + mockChildExpression.toString _ expects() returning "child" anyNumberOfTimes() + mockExpression.rasterTransform _ expects mockRaster returning mockRaster anyNumberOfTimes() + } + + + test("RasterExpression should implement accessor methods") { + doMock() + + mockExpression.child shouldBe mockChildExpression + mockExpression.dataType shouldBe BinaryType + } + + test("RasterExpression should evaluate") { + doMock() + + val runtimePath = UTF8String.fromString("path") + val result = mockExpression.nullSafeEval(runtimePath) + + result shouldBe mockRaster + } + + test("RasterExpression should make copy") { + doMock() + + mockExpression.makeCopy(Array[AnyRef](mockChildExpression)) shouldBe a[Abs] + mockExpression.withNewChildInternal(mockChildExpression) shouldBe a[Abs] + } + + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterGeneratorExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterGeneratorExpressionTest.scala new file mode 100644 index 000000000..15f6db7a6 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterGeneratorExpressionTest.scala @@ -0,0 +1,95 @@ +package com.databricks.labs.mosaic.core.expressions.raster + +import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig +import com.databricks.labs.mosaic.core.raster.{MosaicRaster, RasterAPI} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Abs, Expression} +import org.apache.spark.sql.types.StructType +import org.apache.spark.unsafe.types +import org.apache.spark.unsafe.types.UTF8String +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers.{a, _} + +class RasterGeneratorExpressionTest extends AnyFunSuite with MockFactory { + + val mockChildExpression: Expression = mock[Expression] + val mockRasterAPI: RasterAPI = mock[RasterAPI] + val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig] + mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes() + mockRasterAPI.enable _ expects() returning mockRasterAPI anyNumberOfTimes() + val mockRaster: MosaicRaster = mock[MosaicRaster] + + // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class + // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory + abstract class DummyExpr extends RasterGeneratorExpression[Abs]( + mockChildExpression, mockExpressionConfig + ) { + // For partial mocking, make methods that are testable final, scalamock will not mock final methods + override final def eval(childRasterRow: InternalRow): TraversableOnce[InternalRow] = + super.eval(childRasterRow) + + override final def position: Boolean = super.position + + override final def inline: Boolean = super.inline + + override final def elementSchema: StructType = super.elementSchema + + override final val rasterAPI: RasterAPI = mockRasterAPI + + override final def makeCopy(newArgs: Array[AnyRef]): Expression = super.makeCopy(newArgs) + + override final def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = + super.withNewChildrenInternal(newChildren) + + } + + val bytes: Array[Byte] = "POINT EMPTY".getBytes + val utf8Str: UTF8String = types.UTF8String.fromString("path") + val tiles: Seq[(Long, (Int, Int, Int, Int))] = + Seq((1L, (1, 2, 3, 4)), (2L, (5, 6, 7, 8)), (3L, (9, 10, 11, 12))) + + val mockExpression: DummyExpr = mock[DummyExpr] + + + def doMock(): Unit = { + mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes() + mockExpressionConfig.getRasterCheckpoint _ expects() returning "path" anyNumberOfTimes() + mockRasterAPI.raster _ expects "path" returning mockRaster anyNumberOfTimes() + mockRaster.cleanUp _ expects() returning null anyNumberOfTimes() + tiles.foreach( + tile => mockRaster.saveCheckpoint _ expects(*, tile._1, tile._2, "path") returning "path" anyNumberOfTimes() + ) + mockChildExpression.toString _ expects() returning "child" anyNumberOfTimes() + mockChildExpression.eval _ expects * returning utf8Str anyNumberOfTimes() + mockExpression.rasterGenerator _ expects mockRaster returning tiles anyNumberOfTimes() + mockExpression.children _ expects() returning IndexedSeq(mockChildExpression) anyNumberOfTimes() + } + + + test("RasterGeneratorExpression should implement accessor methods") { + doMock() + + mockExpression.position shouldBe false + mockExpression.inline shouldBe false + mockExpression.elementSchema shouldBe a[StructType] + } + + test("RasterGeneratorExpression should evaluate") { + doMock() + + val runtimePath = UTF8String.fromString("path") + val result = mockExpression.eval(InternalRow(runtimePath)) + + result shouldBe a[List[String]] + } + + test("RasterGeneratorExpression should make copy") { + doMock() + + mockExpression.makeCopy(Array[AnyRef](mockChildExpression)) shouldBe a[Abs] + mockExpression.withNewChildrenInternal(Array(mockChildExpression)) shouldBe a[Abs] + } + + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterTest.scala new file mode 100644 index 000000000..82d19018a --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterTest.scala @@ -0,0 +1,18 @@ +package com.databricks.labs.mosaic.core.expressions.raster + +import org.apache.spark.sql.catalyst.util.ArrayBasedMapData +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers.{a, convertToAnyShouldWrapper} + +class RasterTest extends AnyFunSuite with MockFactory { + + test("package object should implement buildMapString and buildMapDouble") { + val map1 = Map("a" -> "b", "c" -> "d") + val map2 = Map("a" -> 1.0, "c" -> 2.0) + + com.databricks.labs.mosaic.core.expressions.raster.buildMapString(map1) shouldBe a[ArrayBasedMapData] + com.databricks.labs.mosaic.core.expressions.raster.buildMapDouble(map2) shouldBe a[ArrayBasedMapData] + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterToGridExpressionTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterToGridExpressionTest.scala new file mode 100644 index 000000000..0e6404b22 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/expressions/raster/RasterToGridExpressionTest.scala @@ -0,0 +1,102 @@ +package com.databricks.labs.mosaic.core.expressions.raster + +import com.databricks.labs.mosaic.core.expressions.MosaicExpressionConfig +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.core.raster.{MosaicRaster, MosaicRasterBand, RasterAPI} +import org.apache.spark.sql.catalyst.expressions.{Add, Expression} +import org.apache.spark.sql.catalyst.util.GenericArrayData +import org.apache.spark.sql.types.{BinaryType, LongType} +import org.apache.spark.unsafe.types.UTF8String +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers.{a, _} + +class RasterToGridExpressionTest extends AnyFunSuite with MockFactory { + + val mockPathExpression: Expression = mock[Expression] + val mockResolutionExpression: Expression = mock[Expression] + val mockRasterAPI: RasterAPI = mock[RasterAPI] + val mockIndexSystem: IndexSystem = mock[IndexSystem] + val mockExpressionConfig: MosaicExpressionConfig = mock[MosaicExpressionConfig] + val mockRaster: MosaicRaster = mock[MosaicRaster] + val mockRasterBand: MosaicRasterBand = mock[MosaicRasterBand] + val geoTransform: Seq[Double] = Seq(1.0, 2.0, 3.0, 4.0, 5.0, 6.0) + val cellMeasurePairs: Seq[Map[Long, Int]] = Seq(Map(1L -> 1), Map(2L -> 2), Map(3L -> 3), Map(4L -> 4), Map(5L -> 5), Map(6L -> 6)) + + def doMock(): Unit = { + mockExpressionConfig.getRasterAPI _ expects * returning mockRasterAPI anyNumberOfTimes() + mockExpressionConfig.getGeometryAPI _ expects * returning null anyNumberOfTimes() + mockExpressionConfig.getIndexSystem _ expects * returning mockIndexSystem anyNumberOfTimes() + mockExpressionConfig.getCellIdType _ expects() returning LongType anyNumberOfTimes() + mockRasterAPI.raster _ expects "path" returning mockRaster anyNumberOfTimes() + mockRasterAPI.enable _ expects() returning mockRasterAPI anyNumberOfTimes() + mockRaster.cleanUp _ expects() returning null anyNumberOfTimes() + mockRaster.getGeoTransform _ expects() returning geoTransform anyNumberOfTimes() + mockRaster.transformBands[Map[Long, _]] _ expects * returning cellMeasurePairs anyNumberOfTimes() + (mockRasterBand.transformValues[(Long, Double)](_: (Int, Int, Double) => (Long, Double), _: (Long, Double))) expects(*, *) returning Seq( + Seq((1L, 1.0)), Seq((2L, 2.0)), Seq((3L, 3.0)), Seq((4L, 4.0)), Seq((5L, 5.0)), Seq((6L, 6.0)) + ) anyNumberOfTimes() + mockPathExpression.toString _ expects() returning "left" anyNumberOfTimes() + mockResolutionExpression.toString _ expects() returning "right" anyNumberOfTimes() + mockIndexSystem.pointToIndex _ expects(*, *, *) returning 1L anyNumberOfTimes() + cellMeasurePairs.foreach( + pairs => pairs.foreach { + case (cellId, _) => mockIndexSystem.serializeCellId _ expects cellId returning UTF8String.fromString("1") anyNumberOfTimes() + } + ) + } + + // DummyExpr needs mocks to be set up before it is instantiated, so we call doMock() before declaring the class + doMock() + + // Mocking doesn't work well with templates, so we create a dummy class to extend the abstract class + // We are using Abs as a template in order to test makeCopy which is linked to GenericExpressionFactory + abstract class DummyExpr extends RasterToGridExpression[Add, Int]( + mockPathExpression, mockResolutionExpression, BinaryType, mockExpressionConfig + ) { + // For partial mocking, make methods that are testable final, scalamock will not mock final methods + override final def rasterTransform(raster: MosaicRaster, arg1: Any): Any = + super.rasterTransform(raster, arg1) + + override final def pixelTransformer(gt: Seq[Double], resolution: Int)(x: Int, y: Int, value: Double): (Long, Double) = + super.pixelTransformer(gt, resolution)(x, y, value) + + override final def bandTransformer(band: MosaicRasterBand, resolution: Int, gt: Seq[Double]): Map[Long, Int] = + super.bandTransformer(band, resolution, gt) + + } + + val mockExpression: DummyExpr = mock[DummyExpr] + + + test("RasterToGridExpression should implement pixelTransformer") { + doMock() + + val result = mockExpression.pixelTransformer(geoTransform, 1)(1, 1, 1.0) + + result shouldBe a[(Long, Double)] + result shouldBe(1L, 1.0) + } + + test("RasterToGridExpression should implement rasterTransform") { + doMock() + + val result = mockExpression.rasterTransform(mockRaster, 1) + + result shouldBe a[GenericArrayData] + result.asInstanceOf[GenericArrayData].array.length shouldBe 6 + } + + test("RasterToGridExpression should implement bandTransformer") { + doMock() + + mockExpression.valuesCombiner _ expects * returning 1 anyNumberOfTimes() + + val result = mockExpression.bandTransformer(mockRasterBand, 1, geoTransform) + + result shouldBe a[Map[Long, Int]] + result.keys.toSeq.length shouldBe 6 + } + + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryTest.scala new file mode 100644 index 000000000..79f63a2c1 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/geometry/MosaicGeometryTest.scala @@ -0,0 +1,66 @@ +package com.databricks.labs.mosaic.core.geometry + +import com.databricks.labs.mosaic.core.crs.{CRSBounds, CRSBoundsProvider} +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.must.Matchers.be +import org.scalatest.matchers.should.Matchers.{an, convertToAnyShouldWrapper} + +import scala.collection.immutable + +class MosaicGeometryTest extends AnyFunSuite with MockFactory { + + abstract class TestMosaicGeometry extends MosaicGeometry { + override final def transformCRSXY(sridTo: Int, sridFrom: Int): MosaicGeometry = super.transformCRSXY(sridTo, sridFrom) + + override final def minMaxCoord(coord: String, minMax: String): Double = super.minMaxCoord(coord, minMax) + + override final def hasValidCoords(crsBoundsProvider: CRSBoundsProvider, crsCode: String, which: String): Boolean = super.hasValidCoords(crsBoundsProvider, crsCode, which) + } + + class TestCRSBoundsProvider extends CRSBoundsProvider(null) {} + + val mockMosaicGeometry: MosaicGeometry = mock[TestMosaicGeometry] + val mockMosaicPoints: Seq[immutable.IndexedSeq[MosaicPoint]] = Seq((0 to 5).map(_ => mock[MosaicPoint])) + val mockCRSBoundsProvider: CRSBoundsProvider = mock[CRSBoundsProvider] + val mockBounds: CRSBounds = mock[CRSBounds] + + def doMock(): Unit = { + mockMosaicPoints.foreach(_.zipWithIndex.foreach { case (point, index) => + point.getX _ expects() returning index.toDouble anyNumberOfTimes() + point.getY _ expects() returning index.toDouble anyNumberOfTimes() + point.getZ _ expects() returning index.toDouble anyNumberOfTimes() + }) + mockMosaicGeometry.getShellPoints _ expects() returning mockMosaicPoints anyNumberOfTimes() + mockMosaicGeometry.getHolePoints _ expects() returning Seq.empty anyNumberOfTimes() + mockCRSBoundsProvider.bounds _ expects("EPSG", "4326".toInt) returning mockBounds anyNumberOfTimes() + mockCRSBoundsProvider.reprojectedBounds _ expects("EPSG", "4326".toInt) returning mockBounds anyNumberOfTimes() + mockBounds.getUpperX _ expects() returning 180 anyNumberOfTimes() + mockBounds.getUpperY _ expects() returning 90 anyNumberOfTimes() + mockBounds.getLowerX _ expects() returning -180 anyNumberOfTimes() + mockBounds.getLowerY _ expects() returning -90 anyNumberOfTimes() + } + + test("MosaicGeometry should return minMaxCoord") { + doMock() + mockMosaicGeometry.minMaxCoord("x", "min") shouldBe 0.0 + mockMosaicGeometry.minMaxCoord("x", "max") shouldBe 5.0 + mockMosaicGeometry.minMaxCoord("y", "min") shouldBe 0.0 + mockMosaicGeometry.minMaxCoord("y", "max") shouldBe 5.0 + mockMosaicGeometry.minMaxCoord("z", "min") shouldBe 0.0 + mockMosaicGeometry.minMaxCoord("z", "max") shouldBe 5.0 + } + + test("MosaicGeometry should run hasValidCoords") { + doMock() + mockMosaicGeometry.hasValidCoords(mockCRSBoundsProvider, "EPSG:4326", "bounds") shouldBe true + mockMosaicGeometry.hasValidCoords(mockCRSBoundsProvider, "EPSG:4326", "reprojected_bounds") shouldBe true + an[Error] should be thrownBy mockMosaicGeometry.hasValidCoords(mockCRSBoundsProvider, "EPSG:4326", "invalid") + } + + test("MosaicGeometry should fail for transformCRSXY") { + doMock() + an[Exception] should be thrownBy mockMosaicGeometry.transformCRSXY(4326, 4326) + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPITest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPITest.scala new file mode 100644 index 000000000..efe2b3339 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/geometry/api/GeometryAPITest.scala @@ -0,0 +1,84 @@ +package com.databricks.labs.mosaic.core.geometry.api + +import com.databricks.labs.mosaic.core.geometry.{MosaicGeometry, MosaicPoint} +import com.databricks.labs.mosaic.core.types.{GeoJSONType, GeometryTypeEnum, HexType} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.types.{BinaryType, DataType, DateType, StringType} +import org.apache.spark.unsafe.types.UTF8String +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class GeometryAPITest extends AnyFunSuite with MockFactory { + + val mockReader: GeometryReader = mock[GeometryReader] + + abstract class TestGeometryAPI extends GeometryAPI(mockReader) { + override final def pointsToGeometry(points: Seq[MosaicPoint], geomType: GeometryTypeEnum.Value): MosaicGeometry = super.pointsToGeometry(points, geomType) + + override final def rowToGeometry(inputData: InternalRow, dataType: DataType): MosaicGeometry = super.rowToGeometry(inputData, dataType) + + override final def valueToGeometry(inputData: Any, dataType: DataType): MosaicGeometry = super.valueToGeometry(inputData, dataType) + + override final def serialize(geometry: MosaicGeometry, dataType: DataType): Any = super.serialize(geometry, dataType) + } + + val mockApi: TestGeometryAPI = mock[TestGeometryAPI] + val mockRow: InternalRow = mock[InternalRow] + val mockPoint: MosaicPoint = mock[MosaicPoint] + val bytes: Array[Byte] = "POINT (1 1)".getBytes + val subRow: InternalRow = InternalRow.fromSeq(Seq(UTF8String.fromString("POINT (1 1)"))) + + def doMock(): Unit = { + mockRow.getString _ expects 0 returning "POINT (1 1)" anyNumberOfTimes() + mockRow.getBinary _ expects 0 returning bytes anyNumberOfTimes() + mockRow.get _ expects(0, HexType) returning subRow anyNumberOfTimes() + mockRow.get _ expects(0, GeoJSONType) returning subRow anyNumberOfTimes() + mockReader.fromWKB _ expects bytes returning mock[MosaicPoint] anyNumberOfTimes() + mockReader.fromWKT _ expects "POINT (1 1)" returning mock[MosaicPoint] anyNumberOfTimes() + mockReader.fromHEX _ expects "POINT (1 1)" returning mock[MosaicPoint] anyNumberOfTimes() + mockReader.fromJSON _ expects "POINT (1 1)" returning mock[MosaicPoint] anyNumberOfTimes() + mockReader.fromSeq _ expects(Seq(mockPoint), GeometryTypeEnum.POINT) returning mock[MosaicPoint] anyNumberOfTimes() + mockPoint.toWKT _ expects() returning "POINT (1 1)" anyNumberOfTimes() + mockPoint.toWKB _ expects() returning bytes anyNumberOfTimes() + mockPoint.toJSON _ expects() returning "POINT (1 1)" anyNumberOfTimes() + mockPoint.toHEX _ expects() returning "POINT (1 1)" anyNumberOfTimes() + } + + test("GeometryAPI should convert points to geometry") { + doMock() + + mockApi.pointsToGeometry(Seq(mockPoint), GeometryTypeEnum.POINT) shouldBe a[MosaicGeometry] + } + + test("GeometryAPI should convert row to geometry") { + doMock() + + mockApi.rowToGeometry(mockRow, BinaryType) shouldBe a[MosaicGeometry] + mockApi.rowToGeometry(mockRow, StringType) shouldBe a[MosaicGeometry] + mockApi.rowToGeometry(mockRow, GeoJSONType) shouldBe a[MosaicGeometry] + mockApi.rowToGeometry(mockRow, HexType) shouldBe a[MosaicGeometry] + an[Error] should be thrownBy mockApi.rowToGeometry(mockRow, DateType) + } + + test("GeometryAPI should convert value to geometry") { + doMock() + + mockApi.valueToGeometry(bytes, BinaryType) shouldBe a[MosaicGeometry] + mockApi.valueToGeometry(UTF8String.fromString("POINT (1 1)"), StringType) shouldBe a[MosaicGeometry] + mockApi.valueToGeometry(subRow, GeoJSONType) shouldBe a[MosaicGeometry] + mockApi.valueToGeometry(subRow, HexType) shouldBe a[MosaicGeometry] + an[Error] should be thrownBy mockApi.valueToGeometry("POINT (1 1)", DateType) + } + + test("GeometryAPI should serialize geometry") { + doMock() + + mockApi.serialize(mockPoint, BinaryType) shouldBe a[Array[Byte]] + mockApi.serialize(mockPoint, StringType) shouldBe a[UTF8String] + mockApi.serialize(mockPoint, GeoJSONType) shouldBe a[InternalRow] + mockApi.serialize(mockPoint, HexType) shouldBe a[InternalRow] + an[Error] should be thrownBy mockApi.serialize(mockPoint, DateType) + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/index/IndexSystemTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/index/IndexSystemTest.scala new file mode 100644 index 000000000..216610b41 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/index/IndexSystemTest.scala @@ -0,0 +1,202 @@ +package com.databricks.labs.mosaic.core.index + +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.types.{Coordinates, MosaicChip} +import org.apache.spark.sql.types.{DataType, DateType, LongType, StringType} +import org.apache.spark.unsafe.types.UTF8String +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class IndexSystemTest extends AnyFunSuite with MockFactory { + + abstract class TestIndexSystem extends IndexSystem(LongType) { + // Override as final all the methods whose behavior we want to test, this will prevent the compiler from mocking those methods + override final def getCellIdDataType: DataType = super.getCellIdDataType + + override final def setCellIdDataType(dataType: DataType): Unit = super.setCellIdDataType(dataType) + + override final def formatCellId(cellId: Any, dt: DataType): Any = super.formatCellId(cellId, dt) + + override final def formatCellId(cellId: Any): Any = super.formatCellId(cellId) + + override final def serializeCellId(cellId: Any): Any = super.serializeCellId(cellId) + + override final def kRing(index: String, n: Int): Seq[String] = super.kRing(index, n) + + override final def kLoop(index: String, n: Int): Seq[String] = super.kLoop(index, n) + + override final def getBorderChips(geometry: MosaicGeometry, borderIndices: Seq[Long], keepCoreGeom: Boolean, geometryAPI: GeometryAPI): Seq[MosaicChip] = + super.getBorderChips(geometry, borderIndices, keepCoreGeom, geometryAPI) + + override final def getCoreChips(coreIndices: Seq[Long], keepCoreGeom: Boolean, geometryAPI: GeometryAPI): Seq[MosaicChip] = + super.getCoreChips(coreIndices, keepCoreGeom, geometryAPI) + + override final def area(index: Long): Double = super.area(index) + + override final def area(index: String): Double = super.area(index) + + override final def indexToCenter(index: String): Coordinates = super.indexToCenter(index) + + override final def indexToBoundary(index: String): Seq[Coordinates] = super.indexToBoundary(index) + + override final def coerceChipGeometry(geometries: Seq[MosaicGeometry]): Seq[MosaicGeometry] = super.coerceChipGeometry(geometries) + + override final def coerceChipGeometry(geom: MosaicGeometry, cell: Long, geometryAPI: GeometryAPI): MosaicGeometry = super.coerceChipGeometry(geom, cell, geometryAPI) + } + + val mockIndexSystem: IndexSystem = mock[TestIndexSystem] + val mockGeometry: MosaicGeometry = mock[MosaicGeometry] + val mockGeometry2: MosaicGeometry = mock[MosaicGeometry] + val mockGeometry3: MosaicGeometry = mock[MosaicGeometry] + val mockGeometry4: MosaicGeometry = mock[MosaicGeometry] + val mockGeometryAPI: GeometryAPI = mock[GeometryAPI] + + def doMock(): Unit = { + mockIndexSystem.format _ expects 123456789L returning "123456789" anyNumberOfTimes() + mockIndexSystem.parse _ expects "123456789" returning 123456789L anyNumberOfTimes() + mockIndexSystem.parse _ expects "10000000001" returning 10000000001L anyNumberOfTimes() + + (mockIndexSystem.kRing(_: Long, _: Int)) expects(123456789L, 1) returning Seq(123456789L) anyNumberOfTimes() + (mockIndexSystem.kLoop(_: Long, _: Int)) expects(123456789L, 1) returning Seq(123456789L) anyNumberOfTimes() + + (mockIndexSystem.indexToGeometry(_: Long, _: GeometryAPI)) expects(10000000001L, mockGeometryAPI) returning mockGeometry anyNumberOfTimes() + (mockIndexSystem.indexToGeometry(_: Long, _: GeometryAPI)) expects(10000000002L, mockGeometryAPI) returning mockGeometry anyNumberOfTimes() + (mockIndexSystem.indexToGeometry(_: Long, _: GeometryAPI)) expects(10000000003L, mockGeometryAPI) returning mockGeometry2 anyNumberOfTimes() + + (mockIndexSystem.indexToCenter(_: Long)) expects 10000000001L returning Coordinates(1.5, 1.5) anyNumberOfTimes() + (mockIndexSystem.indexToBoundary(_: Long)) expects 10000000001L returning + Seq(Coordinates(1, 1), Coordinates(1, 2), Coordinates(2, 2), Coordinates(1, 2), Coordinates(1, 1)) anyNumberOfTimes() + + mockGeometry.intersection _ expects mockGeometry returning mockGeometry anyNumberOfTimes() + mockGeometry.intersection _ expects mockGeometry2 returning mockGeometry2 anyNumberOfTimes() + mockGeometry.getGeometryType _ expects() returning "POINT" anyNumberOfTimes() + mockGeometry2.getGeometryType _ expects() returning "GEOMETRYCOLLECTION" anyNumberOfTimes() + mockGeometry3.getGeometryType _ expects() returning "POLYGON" anyNumberOfTimes() + mockGeometry4.getGeometryType _ expects() returning "LINESTRING" anyNumberOfTimes() + mockGeometry.equals _ expects mockGeometry returning true anyNumberOfTimes() + mockGeometry2.equals _ expects mockGeometry returning false anyNumberOfTimes() + mockGeometry2.equals _ expects mockGeometry2 returning false anyNumberOfTimes() + mockGeometry.equals _ expects mockGeometry2 returning false anyNumberOfTimes() + mockGeometry.isEmpty _ expects() returning false anyNumberOfTimes() + mockGeometry2.isEmpty _ expects() returning false anyNumberOfTimes() + mockGeometry2.getBoundary _ expects() returning mockGeometry2 anyNumberOfTimes() + mockGeometry.difference _ expects mockGeometry2 returning mockGeometry anyNumberOfTimes() + mockGeometry2.difference _ expects mockGeometry2 returning mockGeometry anyNumberOfTimes() + + } + + test("IndexSystem should get and set cellID data type") { + mockIndexSystem.getCellIdDataType shouldBe LongType + noException should be thrownBy mockIndexSystem.setCellIdDataType(LongType) + } + + test("IndexSystem should format cellID") { + doMock() + mockIndexSystem.formatCellId(123456789L, LongType) shouldBe 123456789L + mockIndexSystem.formatCellId("123456789", LongType) shouldBe 123456789L + mockIndexSystem.formatCellId(UTF8String.fromString("123456789"), LongType) shouldBe 123456789L + mockIndexSystem.formatCellId(123456789L, StringType) shouldBe "123456789" + mockIndexSystem.formatCellId("123456789", StringType) shouldBe "123456789" + mockIndexSystem.formatCellId(UTF8String.fromString("123456789"), StringType) shouldBe "123456789" + + an[Error] should be thrownBy mockIndexSystem.formatCellId(123456789L, DateType) + + mockIndexSystem.formatCellId(123456789L) shouldBe 123456789L + } + + test("IndexSystem should serializeCellId") { + doMock() + + mockIndexSystem.setCellIdDataType(StringType) + mockIndexSystem.getCellIdDataType shouldBe StringType + + mockIndexSystem.serializeCellId(123456789L) shouldBe UTF8String.fromString("123456789") + mockIndexSystem.serializeCellId("123456789") shouldBe UTF8String.fromString("123456789") + mockIndexSystem.serializeCellId(UTF8String.fromString("123456789")) shouldBe UTF8String.fromString("123456789") + + mockIndexSystem.setCellIdDataType(LongType) + mockIndexSystem.getCellIdDataType shouldBe LongType + + mockIndexSystem.serializeCellId(123456789L) shouldBe 123456789L + mockIndexSystem.serializeCellId("123456789") shouldBe 123456789L + mockIndexSystem.serializeCellId(UTF8String.fromString("123456789")) shouldBe 123456789L + + + an[Error] should be thrownBy mockIndexSystem.serializeCellId(1.0) + } + + test("IndexSystem should implement kRing and kLoop for StringType") { + doMock() + mockIndexSystem.kRing("123456789", 1) shouldBe a[Seq[_]] + mockIndexSystem.kLoop("123456789", 1) shouldBe a[Seq[_]] + } + + test("IndexSystem should implement getBorderChips") { + doMock() + + val result = mockIndexSystem.getBorderChips( + mockGeometry, + Seq(10000000001L, 10000000002L, 10000000003L), + keepCoreGeom = true, + mockGeometryAPI + ) + + result shouldBe a[Seq[_]] + result.last shouldBe a[MosaicChip] + result.last.index.left.get shouldBe 10000000003L + + val result2 = mockIndexSystem.getBorderChips( + mockGeometry, + Seq(10000000001L, 10000000002L, 10000000003L), + keepCoreGeom = false, + mockGeometryAPI + ) + + result2.map(_.geom).flatMap(Option(_)).length shouldBe 1 + } + + test("IndexSystem should implement getCoreChips") { + doMock() + + val result = mockIndexSystem.getCoreChips( + Seq(10000000001L, 10000000002L, 10000000003L), + keepCoreGeom = true, + mockGeometryAPI + ) + + result shouldBe a[Seq[_]] + result.last shouldBe a[MosaicChip] + result.last.index.left.get shouldBe 10000000003L + + val result2 = mockIndexSystem.getCoreChips( + Seq(10000000001L, 10000000002L, 10000000003L), + keepCoreGeom = false, + mockGeometryAPI + ) + + result2.map(_.geom).flatMap(Option(_)).length shouldBe 0 + } + + test("IndexSystem should implement area") { + doMock() + mockIndexSystem.area(10000000001L) shouldBe 12360.971936046964 + mockIndexSystem.area("10000000001") shouldBe 12360.971936046964 + } + + test("IndexSystem should implement indexToCenter and indexToBoundary for string type") { + doMock() + mockIndexSystem.indexToCenter("10000000001") + mockIndexSystem.indexToBoundary("10000000001") + } + + test("IndexSystem should implement coerceChipGeometry") { + doMock() + mockIndexSystem.coerceChipGeometry(Seq(mockGeometry, mockGeometry2)) shouldBe Seq(mockGeometry) + mockIndexSystem.coerceChipGeometry(Seq(mockGeometry, mockGeometry3)) shouldBe Seq(mockGeometry3) + mockIndexSystem.coerceChipGeometry(Seq(mockGeometry, mockGeometry4)) shouldBe Seq(mockGeometry4) + mockIndexSystem.coerceChipGeometry(Nil) shouldBe Nil + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterBandTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterBandTest.scala new file mode 100644 index 000000000..944f1d5fd --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterBandTest.scala @@ -0,0 +1,26 @@ +package com.databricks.labs.mosaic.core.raster + +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class MosaicRasterBandTest extends AnyFunSuite with MockFactory { + + abstract class TestMosaicRasterBand extends MosaicRasterBand { + override final def values: Array[Double] = super.values + + override final def maskValues: Array[Double] = super.maskValues + } + + test("MosaicRasterBand") { + val mockMosaicRasterBand = mock[TestMosaicRasterBand] + mockMosaicRasterBand.xSize _ expects() returning 1 anyNumberOfTimes() + mockMosaicRasterBand.ySize _ expects() returning 1 anyNumberOfTimes() + (mockMosaicRasterBand.values(_: Int, _: Int, _: Int, _: Int)) expects(0, 0, 1, 1) returning Array(1.0) anyNumberOfTimes() + (mockMosaicRasterBand.maskValues(_: Int, _: Int, _: Int, _: Int)) expects(0, 0, 1, 1) returning Array(1.0) anyNumberOfTimes() + + mockMosaicRasterBand.values shouldBe Array(1.0) + mockMosaicRasterBand.maskValues shouldBe Array(1.0) + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterTest.scala new file mode 100644 index 000000000..9e1994deb --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/MosaicRasterTest.scala @@ -0,0 +1,18 @@ +package com.databricks.labs.mosaic.core.raster + +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class MosaicRasterTest extends AnyFunSuite with MockFactory { + + abstract class TestMosaicRaster extends MosaicRaster("path", 1234) { + override final def getMemSize: Long = super.getMemSize + } + + test("MosaicRaster") { + val mockMosaicRaster = mock[TestMosaicRaster] + mockMosaicRaster.getMemSize shouldBe 1234 + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/RasterAPITest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/RasterAPITest.scala new file mode 100644 index 000000000..e127449f8 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/raster/RasterAPITest.scala @@ -0,0 +1,40 @@ +package com.databricks.labs.mosaic.core.raster + +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class RasterAPITest extends AnyFunSuite with MockFactory { + + val mockRasterReader: RasterReader = mock[RasterReader] + val mockMosaicRaster: MosaicRaster = mock[MosaicRaster] + val mockMosaicRasterBand: MosaicRasterBand = mock[MosaicRasterBand] + + abstract class TestRasterAPI extends RasterAPI(mockRasterReader) { + override final def raster(path: String): MosaicRaster = super.raster(path) + + override final def band(path: String, bandIndex: Int): MosaicRasterBand = super.band(path, bandIndex) + + override final def toWorldCoord(gt: Seq[Double], x: Int, y: Int): (Double, Double) = + super.toWorldCoord(gt, x, y) + + override final def fromWorldCoord(gt: Seq[Double], x: Double, y: Double): (Int, Int) = + super.fromWorldCoord(gt, x, y) + } + + test("RasterAPI") { + val mockRasterAPI = mock[TestRasterAPI] + + mockRasterReader.readRaster _ expects "path" returning mockMosaicRaster anyNumberOfTimes() + mockRasterReader.readBand _ expects("path", 1) returning mockMosaicRasterBand anyNumberOfTimes() + mockRasterReader.toWorldCoord _ expects(Seq(1.0, 2.0, 3.0, 4.0, 5.0, 6.0), 1, 1) returning (3.0, 4.0) anyNumberOfTimes() + mockRasterReader.fromWorldCoord _ expects(Seq(1.0, 2.0, 3.0, 4.0, 5.0, 6.0), 3.0, 4.0) returning (1, 1) anyNumberOfTimes() + + mockRasterAPI.raster("path") shouldBe mockMosaicRaster + mockRasterAPI.band("path", 1) shouldBe mockMosaicRasterBand + + mockRasterAPI.toWorldCoord(Seq(1.0, 2.0, 3.0, 4.0, 5.0, 6.0), 1, 1) shouldBe (3.0, 4.0) + mockRasterAPI.fromWorldCoord(Seq(1.0, 2.0, 3.0, 4.0, 5.0, 6.0), 3.0, 4.0) shouldBe (1, 1) + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/ChipTypeTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/ChipTypeTest.scala new file mode 100644 index 000000000..2b0ba4a55 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/ChipTypeTest.scala @@ -0,0 +1,15 @@ +package com.databricks.labs.mosaic.core.types + +import org.apache.spark.sql.types.LongType +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class ChipTypeTest extends AnyFunSuite with MockFactory { + + test("ChipType") { + ChipType(LongType).typeName should be ("struct") + ChipType(LongType).simpleString should be ("CHIP") + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/GeoJSONTypeTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/GeoJSONTypeTest.scala new file mode 100644 index 000000000..745d1aa29 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/GeoJSONTypeTest.scala @@ -0,0 +1,15 @@ +package com.databricks.labs.mosaic.core.types + +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class GeoJSONTypeTest extends AnyFunSuite with MockFactory { + + test("GeoJSONType") { + val geoJSONType = new GeoJSONType() + geoJSONType.typeName should be ("struct") + geoJSONType.simpleString should be ("GEOJSON") + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/GeometryTypeEnumTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/GeometryTypeEnumTest.scala new file mode 100644 index 000000000..abff36453 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/GeometryTypeEnumTest.scala @@ -0,0 +1,50 @@ +package com.databricks.labs.mosaic.core.types + +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ +import com.databricks.labs.mosaic.core.types.GeometryTypeEnum._ + +class GeometryTypeEnumTest extends AnyFunSuite with MockFactory { + + test("HexType") { + + GeometryTypeEnum.fromString("POINT") shouldBe POINT + GeometryTypeEnum.fromString("LINESTRING") shouldBe LINESTRING + GeometryTypeEnum.fromString("POLYGON") shouldBe POLYGON + GeometryTypeEnum.fromString("MULTIPOINT") shouldBe MULTIPOINT + GeometryTypeEnum.fromString("MULTILINESTRING") shouldBe MULTILINESTRING + GeometryTypeEnum.fromString("MULTIPOLYGON") shouldBe MULTIPOLYGON + GeometryTypeEnum.fromString("GEOMETRYCOLLECTION") shouldBe GEOMETRYCOLLECTION + GeometryTypeEnum.fromString("LINEARRING") shouldBe LINEARRING + an[Error] should be thrownBy GeometryTypeEnum.fromString("NOT A GEOM") + + GeometryTypeEnum.fromId(1) shouldBe POINT + GeometryTypeEnum.fromId(2) shouldBe MULTIPOINT + GeometryTypeEnum.fromId(3) shouldBe LINESTRING + GeometryTypeEnum.fromId(4) shouldBe MULTILINESTRING + GeometryTypeEnum.fromId(5) shouldBe POLYGON + GeometryTypeEnum.fromId(6) shouldBe MULTIPOLYGON + GeometryTypeEnum.fromId(7) shouldBe LINEARRING + GeometryTypeEnum.fromId(8) shouldBe GEOMETRYCOLLECTION + an[Error] should be thrownBy GeometryTypeEnum.fromId(9) + + GeometryTypeEnum.groupOf(POINT) shouldBe POINT + GeometryTypeEnum.groupOf(MULTIPOINT) shouldBe POINT + GeometryTypeEnum.groupOf(LINESTRING) shouldBe LINESTRING + GeometryTypeEnum.groupOf(MULTILINESTRING) shouldBe LINESTRING + GeometryTypeEnum.groupOf(POLYGON) shouldBe POLYGON + GeometryTypeEnum.groupOf(MULTIPOLYGON) shouldBe POLYGON + GeometryTypeEnum.groupOf(GEOMETRYCOLLECTION) shouldBe GEOMETRYCOLLECTION + + GeometryTypeEnum.isFlat(POINT) shouldBe true + GeometryTypeEnum.isFlat(MULTIPOINT) shouldBe false + GeometryTypeEnum.isFlat(LINESTRING) shouldBe true + GeometryTypeEnum.isFlat(MULTILINESTRING) shouldBe false + GeometryTypeEnum.isFlat(POLYGON) shouldBe true + GeometryTypeEnum.isFlat(MULTIPOLYGON) shouldBe false + GeometryTypeEnum.isFlat(GEOMETRYCOLLECTION) shouldBe false + + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/HexTypeTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/HexTypeTest.scala new file mode 100644 index 000000000..ad115490f --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/HexTypeTest.scala @@ -0,0 +1,15 @@ +package com.databricks.labs.mosaic.core.types + +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class HexTypeTest extends AnyFunSuite with MockFactory { + + test("HexType") { + val hexType = new HexType() + hexType.typeName should be ("struct") + hexType.simpleString should be ("HEX") + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/MosaicChipTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/MosaicChipTest.scala new file mode 100644 index 000000000..95781f2a3 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/MosaicChipTest.scala @@ -0,0 +1,51 @@ +package com.databricks.labs.mosaic.core.types + +import com.databricks.labs.mosaic.core.geometry.MosaicGeometry +import com.databricks.labs.mosaic.core.index.IndexSystem +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.types.{BinaryType, LongType, StringType} +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class MosaicChipTest extends AnyFunSuite with MockFactory { + + test("ChipType") { + val mockMosaicGeometry = mock[MosaicGeometry] + val mockIndexSystem = mock[IndexSystem] + mockIndexSystem.format _ expects 1L returning "1" anyNumberOfTimes() + mockIndexSystem.parse _ expects "1" returning 1L anyNumberOfTimes() + mockMosaicGeometry.toWKB _ expects() returning Array[Byte](1, 2, 3) anyNumberOfTimes() + + MosaicChip(isCore = false, Left(1L), null).isEmpty shouldBe true + MosaicChip(isCore = true, Left(1L), null).isEmpty shouldBe false + + mockIndexSystem.getCellIdDataType _ expects() returning LongType once() + MosaicChip(isCore = false, Left(1L), null).formatCellId(mockIndexSystem) shouldBe MosaicChip(isCore = false, Left(1L), null) + + mockIndexSystem.getCellIdDataType _ expects() returning StringType once() + MosaicChip(isCore = false, Left(1L), null).formatCellId(mockIndexSystem) shouldBe MosaicChip(isCore = false, Right("1"), null) + + mockIndexSystem.getCellIdDataType _ expects() returning LongType once() + MosaicChip(isCore = false, Right("1"), null).formatCellId(mockIndexSystem) shouldBe MosaicChip(isCore = false, Left(1L), null) + + mockIndexSystem.getCellIdDataType _ expects() returning StringType once() + MosaicChip(isCore = false, Right("1"), null).formatCellId(mockIndexSystem) shouldBe MosaicChip(isCore = false, Right("1"), null) + + mockIndexSystem.getCellIdDataType _ expects() returning BinaryType once() + an[IllegalArgumentException] should be thrownBy MosaicChip(isCore = false, Left(1L), null).formatCellId(mockIndexSystem) + + MosaicChip(isCore = false, Left(1L), null).cellIdAsLong(mockIndexSystem) shouldBe 1L + MosaicChip(isCore = false, Right("1"), null).cellIdAsLong(mockIndexSystem) shouldBe 1L + + MosaicChip(isCore = false, Left(1L), null).cellIdAsStr(mockIndexSystem) shouldBe "1" + MosaicChip(isCore = false, Right("1"), null).cellIdAsStr(mockIndexSystem) shouldBe "1" + + MosaicChip(isCore = false, Left(1L), null).serialize shouldBe a[InternalRow] + MosaicChip(isCore = false, Right("1"), null).serialize shouldBe a[InternalRow] + + MosaicChip(isCore = false, Left(1L), mockMosaicGeometry).serialize shouldBe a[InternalRow] + + } + +} diff --git a/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/MosaicTypeTest.scala b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/MosaicTypeTest.scala new file mode 100644 index 000000000..a6b7a0ca6 --- /dev/null +++ b/mosaic-core/src/test/scala/com/databricks/labs/mosaic/core/types/MosaicTypeTest.scala @@ -0,0 +1,16 @@ +package com.databricks.labs.mosaic.core.types + +import org.apache.spark.sql.types.LongType +import org.scalamock.scalatest.MockFactory +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers._ + +class MosaicTypeTest extends AnyFunSuite with MockFactory { + + test("MosaicType") { + val mosaicType = MosaicType(LongType) + mosaicType.typeName should be ("struct") + mosaicType.simpleString should be ("MOSAIC") + } + +} diff --git a/mosaic-index/.gitignore b/mosaic-index/.gitignore new file mode 100644 index 000000000..5ff6309b7 --- /dev/null +++ b/mosaic-index/.gitignore @@ -0,0 +1,38 @@ +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/**/target/ +!**/src/test/**/target/ + +### IntelliJ IDEA ### +.idea/modules.xml +.idea/jarRepositories.xml +.idea/compiler.xml +.idea/libraries/ +*.iws +*.iml +*.ipr + +### Eclipse ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ + +### Mac OS ### +.DS_Store \ No newline at end of file diff --git a/mosaic-index/pom.xml b/mosaic-index/pom.xml new file mode 100644 index 000000000..296c9aff8 --- /dev/null +++ b/mosaic-index/pom.xml @@ -0,0 +1,13 @@ + + 4.0.0 + + com.databricks.labs + mosaic + 0.4.1 + + mosaic-index + mosaic-index + + + diff --git a/mosaic-raster/pom.xml b/mosaic-raster/pom.xml new file mode 100644 index 000000000..3fa7f639e --- /dev/null +++ b/mosaic-raster/pom.xml @@ -0,0 +1,28 @@ + + + + com.databricks.labs + mosaic + 0.4.1 + + 4.0.0 + + mosaic-raster + mosaic-raster + jar + + + + org.scala-lang + scala-library + + + + + + + + + + diff --git a/mosaic-vector/pom.xml b/mosaic-vector/pom.xml new file mode 100644 index 000000000..0ded74ad0 --- /dev/null +++ b/mosaic-vector/pom.xml @@ -0,0 +1,76 @@ + + + + com.databricks.labs + mosaic + 0.4.1 + + 4.0.0 + + mosaic-vector + mosaic-vector + jar + + + + org.scala-lang + scala-library + + + ${project.groupId} + mosaic-core + ${project.version} + + + + junit + junit + test + + + org.scalatest + scalatest_${scala.compat.version} + + + org.scalamock + scalamock_${scala.compat.version} + + + org.apache.spark + spark-core_${scala.compat.version} + test-jar + test + + + org.apache.spark + spark-sql_${scala.compat.version} + test-jar + test + + + org.apache.spark + spark-catalyst_${scala.compat.version} + test-jar + test + + + + org.apache.spark + spark-core_${scala.compat.version} + provided + + + org.apache.spark + spark-sql_${scala.compat.version} + provided + + + + + + + + + + diff --git a/pom.xml b/pom.xml index 1d5344c09..6369c0a95 100644 --- a/pom.xml +++ b/pom.xml @@ -1,279 +1,131 @@ - - - + + 4.0.0 + com.databricks.labs mosaic - ${mosaic.version} + pom + 0.4.1 + mosaic-parent + + + 3.0 + 80 true - 1.11 - 1.11 + 1.8 + 1.8 UTF-8 4.2.0 - - - org.scala-lang - scala-library - ${scala.version} - - - - junit - junit - 4.13.2 - test - - - org.scalatest - scalatest_${scala.compat.version} - 3.2.14 - test - - - org.scalamock - scalamock_${scala.compat.version} - 5.2.0 - test - - - org.apache.spark - spark-core_${scala.compat.version} - ${spark.version} - test-jar - test - - - org.apache.spark - spark-sql_${scala.compat.version} - ${spark.version} - test-jar - test - - - org.apache.spark - spark-catalyst_${scala.compat.version} - ${spark.version} - test-jar - test - - - - org.apache.spark - spark-core_${scala.compat.version} - ${spark.version} - provided - - - org.apache.spark - spark-sql_${scala.compat.version} - ${spark.version} - provided - - - org.apache.spark - spark-mllib_${scala.compat.version} - ${spark.version} - provided - - - io.delta - delta-core_${scala.compat.version} - 2.1.0 - provided - - - - com.uber - h3 - - - 3.7.0 - - - org.locationtech.jts - jts-core - 1.19.0 - - - org.locationtech.jts.io - jts-io-common - 1.19.0 - - - com.esri.geometry - esri-geometry-api - 2.2.4 - - - com.fasterxml.jackson.core - jackson-core - - - - - org.locationtech.proj4j - proj4j-epsg - 1.2.2 - - - org.locationtech.proj4j - proj4j - 1.2.2 - - - org.gdal - gdal - 3.4.0 - + + + + org.scala-lang + scala-library + ${scala.version} + + + + junit + junit + 4.13.2 + test + + + org.scalatest + scalatest_${scala.compat.version} + 3.2.14 + test + + + org.scalamock + scalamock_${scala.compat.version} + 5.2.0 + test + + + org.apache.spark + spark-core_${scala.compat.version} + ${spark.version} + test-jar + test + + + org.apache.spark + spark-sql_${scala.compat.version} + ${spark.version} + test-jar + test + + + org.apache.spark + spark-catalyst_${scala.compat.version} + ${spark.version} + test-jar + test + + + + org.apache.spark + spark-core_${scala.compat.version} + ${spark.version} + provided + + + org.apache.spark + spark-sql_${scala.compat.version} + ${spark.version} + provided + + + + com.uber + h3 + + + 3.7.0 + + + org.locationtech.jts + jts-core + 1.19.0 + + + org.locationtech.jts.io + jts-io-common + 1.19.0 + - + + org.locationtech.proj4j + proj4j-epsg + 1.2.2 + + + org.locationtech.proj4j + proj4j + 1.2.2 + + + org.gdal + gdal + 3.4.0 + + + + + + + github + GitHub Packages + https://maven.pkg.github.com/databrickslabs/mosaic + + - - src/main/scala - src/test/scala - - - org.apache.maven.plugins - maven-assembly-plugin - 3.6.0 - - - jar-with-dependencies - - - - - assemble-all - package - - single - - - - - - - net.alchim31.maven - scala-maven-plugin - 4.7.1 - - - - - compile - testCompile - - - - -dependencyfile - ${project.build.directory}/.scala_dependencies - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - 3.1.0 - - - true - - - - org.scalatest - scalatest-maven-plugin - 2.0.0 - - ${project.build.directory}/test-reports - - - - test - - test - - - - - - org.scalastyle - scalastyle-maven-plugin - 1.0.0 - - true - true - false - false - ${basedir}/src/main/scala - ${basedir}/src/test/scala - scalastyle-config.xml - ${basedir}/target/scalastyle-output.xml - ${encoding} - ${encoding} - - - - - check - - - - - - org.scoverage - scoverage-maven-plugin - 1.4.11 - - - scoverage-report - package - - check - report-only - - - - - ${minimum.coverage} - true - ${scala.version} - skipTests=false - - - - org.apache.maven.plugins - maven-resources-plugin - 3.0.2 - - - copy-files-on-build - package - - copy-resources - - - ${basedir}/python/mosaic/lib - - - ${basedir}/target/ - *.jar - false - - - - - - - - standard @@ -288,15 +140,16 @@ 2.12.10 2.12 3.3.2 - 0.3.11 + ${version} - - - github - GitHub Packages - https://maven.pkg.github.com/databrickslabs/mosaic - - + + + mosaic-core + mosaic-vector + mosaic-raster + mosaic-index + +