Skip to content

Commit

Permalink
GEOMESA-3321 Fix disjoint queries being blocked by full table scan ch…
Browse files Browse the repository at this point in the history
…ecks
  • Loading branch information
elahrvivaz committed Jan 3, 2024
1 parent 89ad428 commit a6c04c6
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ import org.locationtech.geomesa.process.tube.TubeSelectProcess
import org.locationtech.geomesa.utils.collection.SelfClosingIterator
import org.locationtech.geomesa.utils.conf.{GeoMesaProperties, SemanticVersion}
import org.locationtech.geomesa.utils.geotools.{FeatureUtils, SimpleFeatureTypes}
import org.locationtech.geomesa.utils.io.WithClose
import org.locationtech.geomesa.utils.io.{CloseWithLogging, WithClose}
import org.opengis.feature.simple.SimpleFeature
import org.opengis.filter.Filter
import org.specs2.matcher.MatchResult
import org.specs2.mutable.Specification
import org.specs2.runner.JUnitRunner

import java.io.File
import java.io.{File, StringWriter}
import java.util.Collections

@RunWith(classOf[JUnitRunner])
Expand Down Expand Up @@ -172,6 +172,43 @@ class HBaseDataStoreTest extends Specification with LazyLogging {
testCount(ds)
testExactCount(ds)

def testMergedView(): MatchResult[_] = {
val hbaseConf = {
val sw = new StringWriter()
MiniCluster.cluster.getConfiguration.writeXml(sw)
sw.toString
}

val conf =
s"""{
| "stores" = [
| {
| "hbase.catalog": "${params(HBaseCatalogParam.key)}",
| "${ConfigsParam.key}": \"\"\"$hbaseConf\"\"\",
| "geomesa.merged.store.filter": "dtg >= 2014-01-05T17:00:00Z",
| },
| {
| "hbase.catalog": "${params(HBaseCatalogParam.key)}",
| "${ConfigsParam.key}": \"\"\"$hbaseConf\"\"\",
| "geomesa.merged.store.filter": "dtg < 2014-01-05T17:00:00Z",
| }
| ]
|}""".stripMargin
val ds = DataStoreFinder.getDataStore(Map("geomesa.merged.stores" -> conf).asJava)
ds must not(beNull)
QueryProperties.BlockFullTableScans.threadLocalValue.set("true")
try {
val query = new Query(typeName, ECQL.toFilter("dtg DURING 2014-01-01T00:00:00.000Z/2014-01-04T12:00:00.000Z"))
val features = SelfClosingIterator(ds.getFeatureReader(query, Transaction.AUTO_COMMIT)).toList
features must haveLength(4)
} finally {
QueryProperties.BlockFullTableScans.threadLocalValue.remove()
CloseWithLogging(ds)
}
}

testMergedView()

ds.getFeatureSource(typeName).removeFeatures(ECQL.toFilter("INCLUDE"))

forall(Seq("INCLUDE",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ object StrategyDecider extends MethodProfiling with LazyLogging {
// corresponds to filter.exclude
// we still need to return something so that we can handle reduce steps, if needed
explain("No filter plans found - creating empty plan")
FilterPlan(Seq(FilterStrategy(new EmptyIndex(ds, sft), None, None, temporal = false, 1f)))
FilterPlan(Seq(FilterStrategy(new EmptyIndex(ds, sft), Some(Filter.EXCLUDE), None, temporal = false, 1f)))
} else if (options.lengthCompare(1) == 0) {
// only a single option, so don't bother with cost
explain(s"Filter plan: ${options.head}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import org.geotools.util.factory.Hints
import org.junit.runner.RunWith
import org.locationtech.geomesa.features.ScalaSimpleFeature
import org.locationtech.geomesa.index.TestGeoMesaDataStore
import org.locationtech.geomesa.index.conf.QueryProperties
import org.locationtech.geomesa.index.geotools.GeoMesaDataStoreTest._
import org.locationtech.geomesa.index.index.EmptyIndex
import org.locationtech.geomesa.index.index.attribute.AttributeIndex
Expand Down Expand Up @@ -165,6 +166,47 @@ class GeoMesaDataStoreTest extends Specification {
ds2.dispose()
}
}
"block queries which would cause a full table scan via sys props" in {
val sft = SimpleFeatureTypes.createType("test",
"name:String,age:Int,dtg:Date,*geom:Point:srid=4326;geomesa.indices.enabled='id,z3,attr:name'")

val ds = new TestGeoMesaDataStore(true)
ds.createSchema(sft)

val valid = Seq(
"name = 'bob'",
"IN('123')",
"bbox(geom,-10,-10,10,10) AND dtg during 2020-01-01T00:00:00.000Z/2020-01-01T23:59:59.000Z",
"bbox(geom,-10,-10,10,10) AND (dtg during 2020-01-01T00:00:00.000Z/2020-01-01T00:59:59.000Z OR dtg during 2020-01-01T12:00:00.000Z/2020-01-01T12:59:59.000Z)",
"EXCLUDE"
)

val invalid = Seq(
"INCLUDE",
"bbox(geom,-180,-90,180,90)",
"name ilike '%b'",
"not IN('1')"
)

QueryProperties.BlockFullTableScans.threadLocalValue.set("true")
try {
foreach(valid.map(ECQL.toFilter)) { filter =>
val query = new Query(sft.getTypeName, filter)
SelfClosingIterator(ds.getFeatureReader(query, Transaction.AUTO_COMMIT)).toList must beEmpty
}

foreach(invalid.map(ECQL.toFilter)) { filter =>
val query = new Query(sft.getTypeName, filter)
SelfClosingIterator(ds.getFeatureReader(query, Transaction.AUTO_COMMIT)).toList must throwA[RuntimeException]
// you can set max features and use a full-table scan
query.setMaxFeatures(50)
SelfClosingIterator(ds.getFeatureReader(query, Transaction.AUTO_COMMIT)).toList must beEmpty
}
} finally {
QueryProperties.BlockFullTableScans.threadLocalValue.remove()
ds.dispose()
}
}
"support timestamp types with stats" in {
val sft = SimpleFeatureTypes.createType("ts", "dtg:Timestamp,*geom:Point:srid=4326")
ds.createSchema(sft)
Expand Down

0 comments on commit a6c04c6

Please sign in to comment.