From a6c04c6c61b52659c13e1015c086c10fa4b700c4 Mon Sep 17 00:00:00 2001 From: Emilio Lahr-Vivaz Date: Wed, 3 Jan 2024 15:54:35 +0000 Subject: [PATCH] GEOMESA-3321 Fix disjoint queries being blocked by full table scan checks --- .../hbase/data/HBaseDataStoreTest.scala | 41 +++++++++++++++++- .../index/planning/StrategyDecider.scala | 2 +- .../index/geotools/GeoMesaDataStoreTest.scala | 42 +++++++++++++++++++ 3 files changed, 82 insertions(+), 3 deletions(-) diff --git a/geomesa-hbase/geomesa-hbase-datastore/src/test/scala/org/locationtech/geomesa/hbase/data/HBaseDataStoreTest.scala b/geomesa-hbase/geomesa-hbase-datastore/src/test/scala/org/locationtech/geomesa/hbase/data/HBaseDataStoreTest.scala index bd3109bb02fe..525224d2814c 100644 --- a/geomesa-hbase/geomesa-hbase-datastore/src/test/scala/org/locationtech/geomesa/hbase/data/HBaseDataStoreTest.scala +++ b/geomesa-hbase/geomesa-hbase-datastore/src/test/scala/org/locationtech/geomesa/hbase/data/HBaseDataStoreTest.scala @@ -29,14 +29,14 @@ import org.locationtech.geomesa.process.tube.TubeSelectProcess import org.locationtech.geomesa.utils.collection.SelfClosingIterator import org.locationtech.geomesa.utils.conf.{GeoMesaProperties, SemanticVersion} import org.locationtech.geomesa.utils.geotools.{FeatureUtils, SimpleFeatureTypes} -import org.locationtech.geomesa.utils.io.WithClose +import org.locationtech.geomesa.utils.io.{CloseWithLogging, WithClose} import org.opengis.feature.simple.SimpleFeature import org.opengis.filter.Filter import org.specs2.matcher.MatchResult import org.specs2.mutable.Specification import org.specs2.runner.JUnitRunner -import java.io.File +import java.io.{File, StringWriter} import java.util.Collections @RunWith(classOf[JUnitRunner]) @@ -172,6 +172,43 @@ class HBaseDataStoreTest extends Specification with LazyLogging { testCount(ds) testExactCount(ds) + def testMergedView(): MatchResult[_] = { + val hbaseConf = { + val sw = new StringWriter() + MiniCluster.cluster.getConfiguration.writeXml(sw) + sw.toString + } + + val conf = + s"""{ + | "stores" = [ + | { + | "hbase.catalog": "${params(HBaseCatalogParam.key)}", + | "${ConfigsParam.key}": \"\"\"$hbaseConf\"\"\", + | "geomesa.merged.store.filter": "dtg >= 2014-01-05T17:00:00Z", + | }, + | { + | "hbase.catalog": "${params(HBaseCatalogParam.key)}", + | "${ConfigsParam.key}": \"\"\"$hbaseConf\"\"\", + | "geomesa.merged.store.filter": "dtg < 2014-01-05T17:00:00Z", + | } + | ] + |}""".stripMargin + val ds = DataStoreFinder.getDataStore(Map("geomesa.merged.stores" -> conf).asJava) + ds must not(beNull) + QueryProperties.BlockFullTableScans.threadLocalValue.set("true") + try { + val query = new Query(typeName, ECQL.toFilter("dtg DURING 2014-01-01T00:00:00.000Z/2014-01-04T12:00:00.000Z")) + val features = SelfClosingIterator(ds.getFeatureReader(query, Transaction.AUTO_COMMIT)).toList + features must haveLength(4) + } finally { + QueryProperties.BlockFullTableScans.threadLocalValue.remove() + CloseWithLogging(ds) + } + } + + testMergedView() + ds.getFeatureSource(typeName).removeFeatures(ECQL.toFilter("INCLUDE")) forall(Seq("INCLUDE", diff --git a/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/planning/StrategyDecider.scala b/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/planning/StrategyDecider.scala index 4c63bfc0cf89..6ecf60394aff 100644 --- a/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/planning/StrategyDecider.scala +++ b/geomesa-index-api/src/main/scala/org/locationtech/geomesa/index/planning/StrategyDecider.scala @@ -96,7 +96,7 @@ object StrategyDecider extends MethodProfiling with LazyLogging { // corresponds to filter.exclude // we still need to return something so that we can handle reduce steps, if needed explain("No filter plans found - creating empty plan") - FilterPlan(Seq(FilterStrategy(new EmptyIndex(ds, sft), None, None, temporal = false, 1f))) + FilterPlan(Seq(FilterStrategy(new EmptyIndex(ds, sft), Some(Filter.EXCLUDE), None, temporal = false, 1f))) } else if (options.lengthCompare(1) == 0) { // only a single option, so don't bother with cost explain(s"Filter plan: ${options.head}") diff --git a/geomesa-index-api/src/test/scala/org/locationtech/geomesa/index/geotools/GeoMesaDataStoreTest.scala b/geomesa-index-api/src/test/scala/org/locationtech/geomesa/index/geotools/GeoMesaDataStoreTest.scala index 3bd4e18497ed..088e74248e4b 100644 --- a/geomesa-index-api/src/test/scala/org/locationtech/geomesa/index/geotools/GeoMesaDataStoreTest.scala +++ b/geomesa-index-api/src/test/scala/org/locationtech/geomesa/index/geotools/GeoMesaDataStoreTest.scala @@ -17,6 +17,7 @@ import org.geotools.util.factory.Hints import org.junit.runner.RunWith import org.locationtech.geomesa.features.ScalaSimpleFeature import org.locationtech.geomesa.index.TestGeoMesaDataStore +import org.locationtech.geomesa.index.conf.QueryProperties import org.locationtech.geomesa.index.geotools.GeoMesaDataStoreTest._ import org.locationtech.geomesa.index.index.EmptyIndex import org.locationtech.geomesa.index.index.attribute.AttributeIndex @@ -165,6 +166,47 @@ class GeoMesaDataStoreTest extends Specification { ds2.dispose() } } + "block queries which would cause a full table scan via sys props" in { + val sft = SimpleFeatureTypes.createType("test", + "name:String,age:Int,dtg:Date,*geom:Point:srid=4326;geomesa.indices.enabled='id,z3,attr:name'") + + val ds = new TestGeoMesaDataStore(true) + ds.createSchema(sft) + + val valid = Seq( + "name = 'bob'", + "IN('123')", + "bbox(geom,-10,-10,10,10) AND dtg during 2020-01-01T00:00:00.000Z/2020-01-01T23:59:59.000Z", + "bbox(geom,-10,-10,10,10) AND (dtg during 2020-01-01T00:00:00.000Z/2020-01-01T00:59:59.000Z OR dtg during 2020-01-01T12:00:00.000Z/2020-01-01T12:59:59.000Z)", + "EXCLUDE" + ) + + val invalid = Seq( + "INCLUDE", + "bbox(geom,-180,-90,180,90)", + "name ilike '%b'", + "not IN('1')" + ) + + QueryProperties.BlockFullTableScans.threadLocalValue.set("true") + try { + foreach(valid.map(ECQL.toFilter)) { filter => + val query = new Query(sft.getTypeName, filter) + SelfClosingIterator(ds.getFeatureReader(query, Transaction.AUTO_COMMIT)).toList must beEmpty + } + + foreach(invalid.map(ECQL.toFilter)) { filter => + val query = new Query(sft.getTypeName, filter) + SelfClosingIterator(ds.getFeatureReader(query, Transaction.AUTO_COMMIT)).toList must throwA[RuntimeException] + // you can set max features and use a full-table scan + query.setMaxFeatures(50) + SelfClosingIterator(ds.getFeatureReader(query, Transaction.AUTO_COMMIT)).toList must beEmpty + } + } finally { + QueryProperties.BlockFullTableScans.threadLocalValue.remove() + ds.dispose() + } + } "support timestamp types with stats" in { val sft = SimpleFeatureTypes.createType("ts", "dtg:Timestamp,*geom:Point:srid=4326") ds.createSchema(sft)