feat(core): Add support for Tantivy based time series index (#1852)

New behavior : This change adds support for the Tantivy indexing library as an alternative to Lucene for time series indexing. In several cases it has been found that this is superior to Lucene performance, especially when it comes to memory usage and predictability of memory spikes. This feature is opt-in via a configuration setting to avoid any unexpected changes during upgrade. For the moment only the raw time series index is supported. Downsample support may come in a future PR. BREAKING CHANGES This change requires a working Rust & C compiler to build given the Tantivy code is written in Rust. README docs have been updated to reflect this. There are no runtime breaking changes.
filodb · Sep 20, 2024 · 26ab573 · 26ab573
1 parent c2946bd
commit 26ab573
Show file tree

Hide file tree

Showing 62 changed files with 10,761 additions and 1,305 deletions.
diff --git a/.github/workflows/scala.yml b/.github/workflows/scala.yml
@@ -4,7 +4,7 @@ on:
   push:
     branches: [ develop ]
   pull_request:
-    branches: [ develop, integration, main ]
+    branches: [ develop, integration, main, feat-index-rust ]
 
 jobs:
   test:
@@ -19,6 +19,14 @@ jobs:
       with:
         java-version: '11'
         distribution: 'adopt'
+    - name: Install Rust Toolchain
+      uses: actions-rust-lang/setup-rust-toolchain@v1
+      with:
+        components: rustfmt, clippy
+        target: x86_64-apple-darwin, aarch64-apple-darwin, aarch64-unknown-linux-gnu
+        cache-workspaces: "core/src/rust -> target"
+    - name: Install cargo-zigbuild
+      run: pip install cargo-zigbuild
     - name: Run tests
       run: .github/workflows/runtests.sh
     - name: Coverage Reports

diff --git a/.gitignore b/.gitignore
@@ -32,4 +32,7 @@ metastore_db/
 **/kafka/src/test/scala/filodb/kafka/shard*
 *lib*
 
+# Allow Rust's lib.rs since we're otherwise blocking *lib* above
+!lib.rs
+
 coordinator/src/test/resources/
diff --git a/README.md b/README.md
@@ -95,6 +95,8 @@ To compile the .mermaid source files to .png's, install the [Mermaid CLI](http:/
 3. [Apache Cassandra](http://cassandra.apache.org/) 2.x or 3.x (We prefer using [CCM](https://github.com/pcmanus/ccm) for local testing)
     - For testing, install a single node C* cluster, like this:  `ccm create v39_single -v 3.9 -n 1 -s`
 4. [Apache Kafka](http://kafka.apache.org/) 0.10.x or above
+5. [Rust](https://www.rust-lang.org/tools/install) to build native components
+6. A working C compiler for your system (GCC or Clang)
 
 Optional:
 

diff --git a/core/src/main/resources/filodb-defaults.conf b/core/src/main/resources/filodb-defaults.conf
@@ -789,6 +789,27 @@ filodb {
       block-memory-manager-percent = 71
     }
 
+    # Settings for Tantivy backed indexes
+    tantivy {
+        # Max number of items to keep in the column cache.  This speeds up retrieval of values,
+        # especially on fast queries.  Each cached item is very small, < 1KB
+        column-cache-count = 1000
+
+        # Max size of the query results cache, in bytes.  This can have a major performance win
+        # for alert type queries that are periodically running the same query over and over.
+        query-cache-max-bytes = 50MB
+
+        # Estimated size of an item in the query cache, in bytes.  This is the size in bits
+        # of the number of documents each segment searches over, estimated to 250k docs.
+        # This is a hint to the cache only and does not bound the max number of items.
+        query-cache-estimated-item-size = 31250
+
+        # Percentage of deleted docs in a segment that will flag this to be considered
+        # for a merge.  Setting this too high will leave too many documents around
+        # and increase query time.
+        deleted-doc-merge-threshold = 0.1
+    }
+
     # At the cost of some extra heap memory, we can track queries holding shared lock for a long time
     # and starving the exclusive access of lock for eviction
     track-queries-holding-eviction-lock = true
@@ -809,6 +830,11 @@ filodb {
 
     # Whether to add the _type_ label to all time series for the purpose of filtering
     type-field-indexing-enabled = false
+
+    # The Part Key index implementation to use.  Supported values:
+    # lucene - Lucene based index (default)
+    # tantivy - Tantivy based index
+    part-key-index-type = lucene
   }
 
   # for standalone worker cluster configuration, see akka-bootstrapper

diff --git a/core/src/main/scala/filodb.core/Utils.scala b/core/src/main/scala/filodb.core/Utils.scala
@@ -1,9 +1,11 @@
 package filodb.core
 
+import java.io.{File, IOException}
 import java.lang.management.ManagementFactory
 
 import com.typesafe.config.{Config, ConfigRenderOptions}
 import com.typesafe.scalalogging.StrictLogging
+import scala.util.{Failure, Try}
 
 object Utils extends StrictLogging {
   private val threadMbean = ManagementFactory.getThreadMXBean
@@ -37,4 +39,34 @@ object Utils extends StrictLogging {
     logger.info(s"Available memory calculated or configured as $availableMem")
     availableMem
   }
+
+  // Recursively delete a folder
+  def deleteRecursively(f: File, deleteRoot: Boolean = false): Try[Boolean] = {
+    val subDirDeletion: Try[Boolean] =
+      if (f.isDirectory)
+        f.listFiles match {
+          case xs: Array[File] if xs != null && !xs.isEmpty =>
+            val subDirDeletions: Array[Try[Boolean]] = xs map (f => deleteRecursively(f, true))
+            subDirDeletions reduce ((reduced, thisOne) => {
+              thisOne match {
+                // Ensures even if one Right(_) is found, thr response will be Right(Throwable)
+                case scala.util.Success(_) if reduced == scala.util.Success(true) => thisOne
+                case Failure(_) => thisOne
+                case _ => reduced
+              }
+            })
+          case _ => scala.util.Success(true)
+        }
+      else
+        scala.util.Success(true)
+
+    subDirDeletion match {
+      case scala.util.Success(_) =>
+        if (deleteRoot) {
+          if (f.delete()) scala.util.Success(true) else Failure(new IOException(s"Unable to delete $f"))
+        } else scala.util.Success(true)
+      case right@Failure(_) => right
+    }
+
+  }
 }
diff --git a/core/src/main/scala/filodb.core/memstore/OnDemandPagingShard.scala b/core/src/main/scala/filodb.core/memstore/OnDemandPagingShard.scala
@@ -263,7 +263,7 @@ TimeSeriesShard(ref, schemas, storeConfig, numShards, quotaSource, shardNum, buf
           logger.debug(s"Creating TSPartition for ODP from part ID $id in dataset=$ref shard=$shardNum")
           // If not there, then look up in Lucene and get the details
           for { partKeyBytesRef <- partKeyIndex.partKeyFromPartId(id)
-                unsafeKeyOffset = PartKeyLuceneIndex.bytesRefToUnsafeOffset(partKeyBytesRef.offset)
+                unsafeKeyOffset = PartKeyIndexRaw.bytesRefToUnsafeOffset(partKeyBytesRef.offset)
                 group = partKeyGroup(schemas.part.binSchema, partKeyBytesRef.bytes, unsafeKeyOffset, numGroups)
                 sch  <- Option(schemas(RecordSchema.schemaID(partKeyBytesRef.bytes, unsafeKeyOffset)))
                           } yield {