Skip to content

Commit

Permalink
GEOMESA-3355 support geojson style geometries in json converter infer…
Browse files Browse the repository at this point in the history
…ence (#3097)

Co-authored-by: Kyle Stamper <[email protected]>
Co-authored-by: Emilio Lahr-Vivaz <[email protected]>
Co-authored-by: David Helfrick <[email protected]>
Co-authored-by: Chad Bowman <[email protected]>
  • Loading branch information
5 people authored Apr 19, 2024
1 parent c3c70b1 commit ef6e57c
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ import org.locationtech.geomesa.utils.text.WKTUtils
import org.locationtech.jts.geom._
import org.locationtech.jts.geom.impl.CoordinateArraySequence

import java.util.Collections
import java.util.{Collections, Locale}
import scala.util.{Failure, Success, Try}


trait GeoJsonParsing {
Expand Down Expand Up @@ -82,45 +83,97 @@ trait GeoJsonParsing {
* @return
*/
def parseGeometry(el: JsonElement): Geometry = el match {
case o: JsonObject => parseGeometryObject(o)
case o: JsonObject => parseGeometryObject(o).get
case o: JsonPrimitive => WKTUtils.read(o.getAsString)
case _: JsonNull => null.asInstanceOf[Geometry]
case _ => throw new IllegalArgumentException(s"Unknown geometry type: $el")
}


/**
* Parse a json element
*
* @param elem element
* @param path json path to the object
* @return map of key is json path to value, value is a string, boolean or list[string]
*/
def parseElement(elem: JsonElement, path: String): Seq[(String, Any)] = {
elem match {
case e: JsonPrimitive =>
if (e.isBoolean) {
Seq(path -> e.getAsBoolean)
} else {
// note: gson numbers don't have a defined type so type checking doesn't work
Seq(path -> e.getAsString)
}

case e: JsonObject =>
parseGeometryObject(e) match {
case Success(geom) => Seq(path -> geom)
case Failure(_) =>
val builder = Seq.newBuilder[(String, Any)]
e.entrySet().asScala.foreach { entry =>
builder ++= parseElement(entry.getValue, s"$path['${entry.getKey}']")
}
builder.result
}

case e: JsonArray =>
val list = new java.util.ArrayList[String](e.size())
var i = 0
while (i < e.size()) {
list.add(e.get(i).toString)
i += 1
}
Seq(path -> Collections.unmodifiableList(list))

case _ =>
Seq.empty // no-op
}
}

/**
* Parse a geometry object
*
* @param obj object
* @return
*/
private def parseGeometryObject(obj: JsonObject): Geometry = {
obj.get(TypeKey).getAsString.toLowerCase match {
private def parseGeometryObject(obj: JsonObject): Try[Geometry] = {
val geomType = obj.get(TypeKey)

if (geomType == null || !geomType.isJsonPrimitive) {
return Failure(new RuntimeException("Not a Geometry object"))
}

geomType.getAsString.toLowerCase(Locale.US) match {
case "point" =>
factory.createPoint(toPointCoords(obj.get(CoordinatesKey)))
Try(factory.createPoint(toPointCoords(obj.get(CoordinatesKey))))

case "linestring" =>
factory.createLineString(toCoordSeq(obj.get(CoordinatesKey)))
Try(factory.createLineString(toCoordSeq(obj.get(CoordinatesKey))))

case "polygon" =>
toPolygon(obj.get(CoordinatesKey))
Try(toPolygon(obj.get(CoordinatesKey)))

case "multipoint" =>
factory.createMultiPoint(toCoordSeq(obj.get(CoordinatesKey)))
Try(factory.createMultiPoint(toCoordSeq(obj.get(CoordinatesKey))))

case "multilinestring" =>
val coords = obj.get(CoordinatesKey).getAsJsonArray.asScala
.map(c => factory.createLineString(toCoordSeq(c))).toArray
factory.createMultiLineString(coords)
Try {
val coords =
obj.get(CoordinatesKey).getAsJsonArray.asScala
.map(c => factory.createLineString(toCoordSeq(c))).toArray
factory.createMultiLineString(coords)
}

case "multipolygon" =>
factory.createMultiPolygon(obj.get(CoordinatesKey).getAsJsonArray.asScala.map(toPolygon).toArray)
Try(factory.createMultiPolygon(obj.get(CoordinatesKey).getAsJsonArray.asScala.map(toPolygon).toArray))

case "geometrycollection" =>
factory.createGeometryCollection(obj.get(GeometriesKey).getAsJsonArray.asScala.map(parseGeometry).toArray)
Try(factory.createGeometryCollection(obj.get(GeometriesKey).getAsJsonArray.asScala.map(parseGeometry).toArray))

case unknown =>
throw new NotImplementedError(s"Can't parse geometry type of $unknown")
Failure(new NotImplementedError(s"Can't parse geometry type of $unknown"))
}
}

Expand Down Expand Up @@ -151,9 +204,7 @@ trait GeoJsonParsing {
}
}

object GeoJsonParsing {

import scala.collection.JavaConverters._
object GeoJsonParsing extends GeoJsonParsing {

/**
* Parsed geojson feature element
Expand All @@ -176,41 +227,4 @@ object GeoJsonParsing {
private val GeometriesKey = "geometries"
private val IdKey = "id"

/**
* Parse a json element
*
* @param elem element
* @param path json path to the object
* @return map of key is json path to value, value is a string, boolean or list[string]
*/
def parseElement(elem: JsonElement, path: String): Seq[(String, Any)] = {
elem match {
case e: JsonPrimitive =>
if (e.isBoolean) {
Seq(path -> e.getAsBoolean)
} else {
// note: gson numbers don't have a defined type so type checking doesn't work
Seq(path -> e.getAsString)
}

case e: JsonObject =>
val builder = Seq.newBuilder[(String, Any)]
e.entrySet().asScala.foreach { entry =>
builder ++= parseElement(entry.getValue, s"$path['${entry.getKey}']")
}
builder.result

case e: JsonArray =>
val list = new java.util.ArrayList[String](e.size())
var i = 0
while (i < e.size()) {
list.add(e.get(i).toString)
i += 1
}
Seq(path -> Collections.unmodifiableList(list))

case _ =>
Seq.empty // no-op
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,6 @@ class JsonConverterFactory extends AbstractConverterFactory[JsonConverter, JsonC
}
if (path.isEmpty) {
DerivedField(inferredType.name, transform)
} else if (path == JsonConverterFactory.GeoJsonGeometryPath) {
GeometryJsonField(inferredType.name, path, pathIsRoot = false, None)
} else {
inferredType.typed match {
case ObjectType.BOOLEAN =>
Expand All @@ -205,6 +203,8 @@ class JsonConverterFactory extends AbstractConverterFactory[JsonConverter, JsonC
// if type is list, that means the transform is 'identity', but we need to replace it with jsonList.
// this is due to GeoJsonParsing decoding the json array for us, above
ArrayJsonField(inferredType.name, path, pathIsRoot = false, Some(Expression("try(jsonList('string',$0),null)")))
case t if transform.isEmpty && (t == ObjectType.GEOMETRY || ObjectType.GeometrySubtypes.contains(t)) =>
GeometryJsonField(inferredType.name, path, pathIsRoot = false, None)
case _ =>
// all other types will be parsed as strings with appropriate transforms
StringJsonField(inferredType.name, path, pathIsRoot = false, transform)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1589,6 +1589,80 @@ class JsonConverterTest extends Specification {
features.map(_.getAttributes.asScala) must containTheSameElementsAs(expected)
}
}

"infer schema from non-geojson files with geojson points" >> {
val json =
"""{
| "DataSource": { "name": "myjson" },
| "Features": [
| {
| "id": 1,
| "name": "phil",
| "physicals": {
| "age": 32,
| "weight": 150.2
| },
| "hobbies": [ "baseball", "soccer" ],
| "languages": {
| "java": 100,
| "scala": 70
| },
| "geometry": { "type": "Point", "coordinates": [55, 56] }
| },
| {
| "id": 2,
| "name": "fred",
| "physicals": {
| "age": 33,
| "weight": 150.1
| },
| "hobbies": [ "archery", "tennis" ],
| "languages": {
| "c++": 10,
| "fortran": 50
| },
| "geometry": { "type": "Point", "coordinates": [45, 46] }
| }
| ]
|}
""".stripMargin

def bytes = new ByteArrayInputStream(json.getBytes(StandardCharsets.UTF_8))

val inferred = new JsonConverterFactory().infer(bytes, None, Map[String, AnyRef]("featurePath" -> "$.Features[*]"))

inferred must beASuccessfulTry

val sft = inferred.get._1

sft.getAttributeDescriptors.asScala.map(d => (d.getLocalName, d.getType.getBinding)) mustEqual
Seq(
("id_0", classOf[Integer]),
("name", classOf[String]),
("physicals_age", classOf[Integer]),
("physicals_weight", classOf[java.lang.Double]),
("hobbies", classOf[java.util.List[_]]),
("languages_java", classOf[Integer]),
("languages_scala", classOf[Integer]),
("geometry", classOf[Point]),
("languages_c", classOf[Integer]),
("languages_fortran", classOf[Integer]),
)

WithClose(SimpleFeatureConverter(sft, inferred.get._2)) { converter =>
converter must not(beNull)

val features = WithClose(converter.process(bytes))(_.toList)
features must haveLength(2)

val expected = Seq(
Seq(1, "phil", 32, 150.2, Seq("baseball", "soccer").asJava, 100, 70, WKTUtils.read("POINT (55 56)"), null, null),
Seq(2, "fred", 33, 150.1, Seq("archery", "tennis").asJava, null, null, WKTUtils.read("POINT (45 46)"), 10, 50),
)
features.map(_.getAttributes.asScala) must containTheSameElementsAs(expected)
}
}

"infer schema from non-geojson files with feature path" >> {
val json =
"""[{ "name": "name1", "demographics": { "age": 1 }, "geom": "POINT (41 51)" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ object ObjectType extends Enumeration {
// string sub-types
val JSON = Value

val GeometrySubtypes = Seq(POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, MULTIPOLYGON, GEOMETRY_COLLECTION)

/**
* @see selectType(clazz: Class[_], metadata: java.util.Map[_, _])
*
Expand Down

0 comments on commit ef6e57c

Please sign in to comment.