-
Notifications
You must be signed in to change notification settings - Fork 434
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
GEOMESA-3426 FSDS - Support path filtering for converter queries (#3245)
Co-authored-by: Forrest Feaser <[email protected]>
- Loading branch information
Showing
19 changed files
with
366 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 change: 1 addition & 0 deletions
1
...NF/services/org.locationtech.geomesa.fs.storage.converter.pathfilter.PathFilteringFactory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
org.locationtech.geomesa.fs.storage.converter.pathfilter.DtgPathFiltering$DtgPathFilteringFactory |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
101 changes: 101 additions & 0 deletions
101
...ain/scala/org/locationtech/geomesa/fs/storage/converter/pathfilter/DtgPathFiltering.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
/*********************************************************************** | ||
* Copyright (c) 2013-2024 Commonwealth Computer Research, Inc. | ||
* All rights reserved. This program and the accompanying materials | ||
* are made available under the terms of the Apache License, Version 2.0 | ||
* which accompanies this distribution and is available at | ||
* http://www.opensource.org/licenses/apache2.0.php. | ||
***********************************************************************/ | ||
|
||
package org.locationtech.geomesa.fs.storage.converter.pathfilter | ||
|
||
import com.typesafe.scalalogging.LazyLogging | ||
import org.apache.hadoop.fs.PathFilter | ||
import org.geotools.api.filter.Filter | ||
import org.locationtech.geomesa.filter.Bounds.Bound | ||
import org.locationtech.geomesa.filter.{Bounds, FilterHelper} | ||
import org.locationtech.geomesa.fs.storage.api.NamedOptions | ||
|
||
import java.time.format.DateTimeFormatter | ||
import java.time.{ZoneOffset, ZonedDateTime} | ||
import java.util.regex.Pattern | ||
import scala.concurrent.duration.Duration | ||
import scala.util.control.NonFatal | ||
|
||
class DtgPathFiltering(attribute: String, pattern: Pattern, format: DateTimeFormatter, buffer: Duration) | ||
extends PathFiltering with LazyLogging { | ||
|
||
def apply(filter: Filter): PathFilter = { | ||
val filterIntervals = FilterHelper.extractIntervals(filter, attribute, handleExclusiveBounds = true) | ||
path => try { | ||
val time = parseDtg(path.getName).toInstant | ||
val millis = buffer.toMillis | ||
val lower = ZonedDateTime.ofInstant(time.minusMillis(millis), ZoneOffset.UTC) | ||
val upper = ZonedDateTime.ofInstant(time.plusMillis(millis), ZoneOffset.UTC) | ||
val buffered = Bounds(Bound.inclusive(lower), Bound.inclusive(upper)) | ||
val included = filterIntervals.exists(bounds => bounds.intersects(buffered)) | ||
logger.whenDebugEnabled { | ||
if (included) { | ||
logger.debug(s"Including path ${path.getName} for filter $filter") | ||
} else { | ||
logger.debug(s"Excluding path ${path.getName} for filter $filter") | ||
} | ||
} | ||
included | ||
} catch { | ||
case NonFatal(ex) => | ||
logger.warn(s"Failed to evaluate filter for path '${path.getName}'", ex) | ||
true | ||
} | ||
} | ||
|
||
private def parseDtg(name: String): ZonedDateTime = { | ||
Option(name) | ||
.map(pattern.matcher) | ||
.filter(_.matches) | ||
.filter(_.groupCount > 0) | ||
.map(_.group(1)) | ||
.map(ZonedDateTime.parse(_, format)) | ||
.getOrElse { | ||
throw new IllegalArgumentException(s"Failed to parse ${classOf[ZonedDateTime].getName} " + | ||
s"from file name '$name' for pattern '$pattern' and format '$format'") | ||
} | ||
} | ||
|
||
override def toString: String = { | ||
s"${this.getClass.getName}(attribute = $attribute, pattern = $pattern, format = $format, buffer = $buffer)" | ||
} | ||
} | ||
|
||
object DtgPathFiltering extends LazyLogging { | ||
|
||
val Name = "dtg" | ||
|
||
object Config { | ||
val Attribute = "attribute" | ||
val Pattern = "pattern" | ||
val Format = "format" | ||
val Buffer = "buffer" | ||
} | ||
|
||
class DtgPathFilteringFactory extends PathFilteringFactory { | ||
override def load(config: NamedOptions): Option[PathFiltering] = { | ||
if (config.name != Name) { None } else { | ||
val attribute = config.options.getOrElse(Config.Attribute, null) | ||
require(attribute != null, s"$Name path filter requires a dtg attribute config '${Config.Attribute}'") | ||
val patternConfig = config.options.getOrElse(Config.Pattern, null) | ||
require(patternConfig != null, s"$Name path filter requires a dtg pattern config '${Config.Pattern}'") | ||
val formatConfig = config.options.getOrElse(Config.Format, null) | ||
require(formatConfig != null, s"$Name path filter requires a dtg format config '${Config.Format}'") | ||
val bufferConfig = config.options.getOrElse(Config.Buffer, null) | ||
require(bufferConfig != null, s"$Name path filter requires a buffer duration config '${Config.Buffer}'") | ||
|
||
val pattern = Pattern.compile(patternConfig) | ||
val format = DateTimeFormatter.ofPattern(formatConfig).withZone(ZoneOffset.UTC) | ||
val buffer = Duration.apply(bufferConfig) | ||
val pathFiltering = new DtgPathFiltering(attribute, pattern, format, buffer) | ||
logger.info(s"Loaded PathFiltering: $pathFiltering") | ||
Some(pathFiltering) | ||
} | ||
} | ||
} | ||
} |
16 changes: 16 additions & 0 deletions
16
...c/main/scala/org/locationtech/geomesa/fs/storage/converter/pathfilter/PathFiltering.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
/*********************************************************************** | ||
* Copyright (c) 2013-2024 Commonwealth Computer Research, Inc. | ||
* All rights reserved. This program and the accompanying materials | ||
* are made available under the terms of the Apache License, Version 2.0 | ||
* which accompanies this distribution and is available at | ||
* http://www.opensource.org/licenses/apache2.0.php. | ||
***********************************************************************/ | ||
|
||
package org.locationtech.geomesa.fs.storage.converter.pathfilter | ||
|
||
import org.apache.hadoop.fs.PathFilter | ||
import org.geotools.api.filter.Filter | ||
|
||
trait PathFiltering { | ||
def apply(filter: Filter): PathFilter | ||
} |
26 changes: 26 additions & 0 deletions
26
...scala/org/locationtech/geomesa/fs/storage/converter/pathfilter/PathFilteringFactory.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
/*********************************************************************** | ||
* Copyright (c) 2013-2024 Commonwealth Computer Research, Inc. | ||
* All rights reserved. This program and the accompanying materials | ||
* are made available under the terms of the Apache License, Version 2.0 | ||
* which accompanies this distribution and is available at | ||
* http://www.opensource.org/licenses/apache2.0.php. | ||
***********************************************************************/ | ||
|
||
package org.locationtech.geomesa.fs.storage.converter.pathfilter | ||
|
||
import org.locationtech.geomesa.fs.storage.api.NamedOptions | ||
|
||
import java.util.ServiceLoader | ||
|
||
trait PathFilteringFactory { | ||
def load(config: NamedOptions): Option[PathFiltering] | ||
} | ||
|
||
object PathFilteringFactory { | ||
|
||
import scala.collection.JavaConverters._ | ||
|
||
private lazy val factories = ServiceLoader.load(classOf[PathFilteringFactory]).asScala.toSeq | ||
|
||
def load(config: NamedOptions): Option[PathFiltering] = factories.toStream.flatMap(_.load(config)).headOption | ||
} |
File renamed without changes.
3 changes: 3 additions & 0 deletions
3
...torage-convert/src/test/resources/example-convert-test-2/2024/12/11/data-202412110600.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
ID,Name,Age,Dtg,Lon,Dat | ||
0,Harry,17,2024-12-11T01:00:00.000Z,0.0,0.0 | ||
1,Hermione,18,2024-12-11T11:00:00.000Z,0.0,0.0 |
3 changes: 3 additions & 0 deletions
3
...torage-convert/src/test/resources/example-convert-test-2/2024/12/11/data-202412111200.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
ID,Name,Age,Dtg,Lon,Dat | ||
2,Ronald,17,2024-12-11T07:00:00.000Z,0.0,0.0 | ||
3,Draco,18,2024-12-11T11:00:00.000Z,0.0,0.0 |
3 changes: 3 additions & 0 deletions
3
...torage-convert/src/test/resources/example-convert-test-2/2024/12/11/data-202412111800.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
ID,Name,Age,Dtg,Lon,Dat | ||
4,Neville,17,2024-12-11T13:00:00.000Z,0.0,0.0 | ||
5,Rubeus,43,2024-12-11T08:00:00.000Z,0.0,0.0 |
3 changes: 3 additions & 0 deletions
3
...torage-convert/src/test/resources/example-convert-test-2/2024/12/11/data-202412112330.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
ID,Name,Age,Dtg,Lon,Dat | ||
6,Severus,52,2024-12-11T19:00:00.000Z,0.0,0.0 | ||
7,Alfred,78,2024-12-11T23:00:00.000Z,0.0,0.0 |
3 changes: 3 additions & 0 deletions
3
...torage-convert/src/test/resources/example-convert-test-2/2024/12/12/data-202412120100.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
ID,Name,Age,Dtg,Lon,Dat | ||
8,Dean,17,2024-12-11T23:50:00.000Z,0.0,0.0 | ||
9,Minerva,57,2024-12-12T00:30:00.000Z,0.0,0.0 |
3 changes: 3 additions & 0 deletions
3
...torage-convert/src/test/resources/example-convert-test-2/2024/12/12/data-202412120600.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
ID,Name,Age,Dtg,Lon,Dat | ||
10,Luna,17,2024-12-12T03:00:00.000Z,0.0,0.0 | ||
11,Dudley,19,2024-12-12T05:00:00.000Z,0.0,0.0 |
Oops, something went wrong.