Skip to content

Commit

Permalink
Merge branch 'develop' into integ-merge
Browse files Browse the repository at this point in the history
  • Loading branch information
amolnayak311 committed Nov 1, 2024
2 parents 4450bcd + 6657340 commit 880d6e9
Show file tree
Hide file tree
Showing 81 changed files with 12,610 additions and 1,329 deletions.
10 changes: 9 additions & 1 deletion .github/workflows/scala.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
push:
branches: [ develop ]
pull_request:
branches: [ develop, integration, main ]
branches: [ develop, integration, main, feat-index-rust ]

jobs:
test:
Expand All @@ -19,6 +19,14 @@ jobs:
with:
java-version: '11'
distribution: 'adopt'
- name: Install Rust Toolchain
uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt, clippy
target: x86_64-apple-darwin, aarch64-apple-darwin, aarch64-unknown-linux-gnu
cache-workspaces: "core/src/rust -> target"
- name: Install cargo-zigbuild
run: pip install cargo-zigbuild
- name: Run tests
run: .github/workflows/runtests.sh
- name: Coverage Reports
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,7 @@ metastore_db/
**/kafka/src/test/scala/filodb/kafka/shard*
*lib*

# Allow Rust's lib.rs since we're otherwise blocking *lib* above
!lib.rs

coordinator/src/test/resources/
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ To compile the .mermaid source files to .png's, install the [Mermaid CLI](http:/
3. [Apache Cassandra](http://cassandra.apache.org/) 2.x or 3.x (We prefer using [CCM](https://github.com/pcmanus/ccm) for local testing)
- For testing, install a single node C* cluster, like this: `ccm create v39_single -v 3.9 -n 1 -s`
4. [Apache Kafka](http://kafka.apache.org/) 0.10.x or above
5. [Rust](https://www.rust-lang.org/tools/install) to build native components
6. A working C compiler for your system (GCC or Clang)

Optional:

Expand Down

Large diffs are not rendered by default.

53 changes: 53 additions & 0 deletions core/src/main/resources/filodb-defaults.conf
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,30 @@ filodb {
deployment-partition-name = "local"

query {
# hierarchical query experience configs
hierarchical {
# Parent logical plans for which the hierarchical query experience is enabled
# This follows the "OR" convention if there are multiple parent logical plans which are enabled in lp tree
allowed-parent-logical-plans = [
"Aggregate"
]

# There are few periodic series plans which stores the RawSeries plan. This config allows us to select the
# logical plans which are allowed to manipulate/optimize the underlying RawSeries
allowed-periodic-series-plans-with-raw-series = [
"PeriodicSeriesWithWindowing",
"PeriodicSeries"
]

# Range functions allowed for hierarchical query experience
allowed-range-functions = [
"rate",
"increase",
]

# Aggregations allowed for hierarchical query experience
allowed-aggregation-operators = ["sum"]
}

# feature flag to enable start time inclusive in window lookback
inclusive-range = true
Expand Down Expand Up @@ -400,6 +424,9 @@ filodb {
# If unspecified, all workspaces are disabled by default.
workspaces-disabled-max-min = ["disabled_ws"]

# config to figure out the tenant column filter to enable max min column selection
max-min-tenant-column-filter = "_ws_"

routing {
enable-remote-raw-exports = false
max-time-range-remote-raw-export = 180 minutes
Expand Down Expand Up @@ -786,6 +813,27 @@ filodb {
block-memory-manager-percent = 71
}

# Settings for Tantivy backed indexes
tantivy {
# Max number of items to keep in the column cache. This speeds up retrieval of values,
# especially on fast queries. Each cached item is very small, < 1KB
column-cache-count = 1000

# Max size of the query results cache, in bytes. This can have a major performance win
# for alert type queries that are periodically running the same query over and over.
query-cache-max-bytes = 50MB

# Estimated size of an item in the query cache, in bytes. This is the size in bits
# of the number of documents each segment searches over, estimated to 250k docs.
# This is a hint to the cache only and does not bound the max number of items.
query-cache-estimated-item-size = 31250

# Percentage of deleted docs in a segment that will flag this to be considered
# for a merge. Setting this too high will leave too many documents around
# and increase query time.
deleted-doc-merge-threshold = 0.1
}

# At the cost of some extra heap memory, we can track queries holding shared lock for a long time
# and starving the exclusive access of lock for eviction
track-queries-holding-eviction-lock = true
Expand All @@ -806,6 +854,11 @@ filodb {

# Whether to add the _type_ label to all time series for the purpose of filtering
type-field-indexing-enabled = false

# The Part Key index implementation to use. Supported values:
# lucene - Lucene based index (default)
# tantivy - Tantivy based index
part-key-index-type = lucene
}

# for standalone worker cluster configuration, see akka-bootstrapper
Expand Down
23 changes: 23 additions & 0 deletions core/src/main/scala/filodb.core/GlobalConfig.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import com.typesafe.config.{Config, ConfigFactory}
import com.typesafe.scalalogging.StrictLogging
import scala.jdk.CollectionConverters._

import filodb.core.metadata.DatasetOptions

/**
* Loads the overall configuration in a specific order:
* - System properties
Expand Down Expand Up @@ -49,4 +51,25 @@ object GlobalConfig extends StrictLogging {
case true => Some(systemConfig.getStringList("filodb.query.workspaces-disabled-max-min").asScala.toSet)
}

// Column filter used to check the enabling/disabling the use of max-min columns
val maxMinTenantColumnFilter = systemConfig.getString("filodb.query.max-min-tenant-column-filter")

// default dataset-options config
val datasetOptions: Option[DatasetOptions] =
systemConfig.hasPath("filodb.partition-schema.options") match {
case false => None
case true => {
val datasetOptionsConfig = systemConfig.getConfig("filodb.partition-schema.options")
Some(DatasetOptions.fromConfig(datasetOptionsConfig))
}
}

val PromMetricLabel = "__name__"

val hierarchicalConfig: Option[Config] = {
systemConfig.hasPath("filodb.query.hierarchical") match {
case false => None
case true => Some(systemConfig.getConfig("filodb.query.hierarchical"))
}
}
}
32 changes: 32 additions & 0 deletions core/src/main/scala/filodb.core/Utils.scala
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package filodb.core

import java.io.{File, IOException}
import java.lang.management.ManagementFactory

import com.typesafe.config.{Config, ConfigRenderOptions}
import com.typesafe.scalalogging.StrictLogging
import scala.util.{Failure, Try}

object Utils extends StrictLogging {
private val threadMbean = ManagementFactory.getThreadMXBean
Expand Down Expand Up @@ -37,4 +39,34 @@ object Utils extends StrictLogging {
logger.info(s"Available memory calculated or configured as $availableMem")
availableMem
}

// Recursively delete a folder
def deleteRecursively(f: File, deleteRoot: Boolean = false): Try[Boolean] = {
val subDirDeletion: Try[Boolean] =
if (f.isDirectory)
f.listFiles match {
case xs: Array[File] if xs != null && !xs.isEmpty =>
val subDirDeletions: Array[Try[Boolean]] = xs map (f => deleteRecursively(f, true))
subDirDeletions reduce ((reduced, thisOne) => {
thisOne match {
// Ensures even if one Right(_) is found, thr response will be Right(Throwable)
case scala.util.Success(_) if reduced == scala.util.Success(true) => thisOne
case Failure(_) => thisOne
case _ => reduced
}
})
case _ => scala.util.Success(true)
}
else
scala.util.Success(true)

subDirDeletion match {
case scala.util.Success(_) =>
if (deleteRoot) {
if (f.delete()) scala.util.Success(true) else Failure(new IOException(s"Unable to delete $f"))
} else scala.util.Success(true)
case right@Failure(_) => right
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,14 @@ class RawIndexBootstrapper(colStore: ColumnStore) {
colStore.scanPartKeys(ref, shardNum)
.map { pk =>
val partId = assignPartId(pk)
index.addPartKey(pk.partKey, partId, pk.startTime, pk.endTime)()
// -1 is returned if we skiped the part key for any reason, such as
// unknown schema or memory issues
//
// assignPartId will log these cases, so no extra logging is needed
// here
if (partId != -1) {
index.addPartKey(pk.partKey, partId, pk.startTime, pk.endTime)()
}
}
.countL
.map { count =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ TimeSeriesShard(ref, schemas, storeConfig, numShards, quotaSource, shardNum, buf
logger.debug(s"Creating TSPartition for ODP from part ID $id in dataset=$ref shard=$shardNum")
// If not there, then look up in Lucene and get the details
for { partKeyBytesRef <- partKeyIndex.partKeyFromPartId(id)
unsafeKeyOffset = PartKeyLuceneIndex.bytesRefToUnsafeOffset(partKeyBytesRef.offset)
unsafeKeyOffset = PartKeyIndexRaw.bytesRefToUnsafeOffset(partKeyBytesRef.offset)
group = partKeyGroup(schemas.part.binSchema, partKeyBytesRef.bytes, unsafeKeyOffset, numGroups)
sch <- Option(schemas(RecordSchema.schemaID(partKeyBytesRef.bytes, unsafeKeyOffset)))
} yield {
Expand Down
Loading

0 comments on commit 880d6e9

Please sign in to comment.