conf/timeseries-dev-source.conf

    dataset = "prometheus"

    # Name of schema used for this dataset stream.  See filodb.schemas in filodb-defaults or any other server conf
    schema = "prom-counter"

    # Should not change once dataset has been set up on the server and data has been persisted to cassandra
    num-shards = 4

    # deprecated in favor of min-num-nodes-in-cluster config in filodb server config
    # To be removed eventually. There is no reason to set a value for this for each dataset
    min-num-nodes = 2

    # Length of chunks to be written, roughly
    sourcefactory = "filodb.kafka.KafkaIngestionStreamFactory"

    # Per dataset overrides. Optional, used only if the store is cassandra.
    # This config enables us to configure datasets that can use different keyspaces.
    # cassandra {
    #   keyspace = "filodb_prom"
    #   downsample-keyspace = "filodb_prom_downsample"
    # }

    sourceconfig {
      # Required FiloDB configurations
      filo-topic-name = "timeseries-dev"

      # Standard kafka configurations, e.g.
      # This accepts both the standard kafka value of a comma-separated
      # string and a Typesafe list of String values
      # EXCEPT: do not populate value.deserializer, as the Kafka format is fixed in FiloDB to be messages of RecordContainer's
      bootstrap.servers = "localhost:9092"
      group.id = "filo-db-timeseries-ingestion"

      # For tests - do not shut down and release memory after ingestion stops (with status IngestionStopped)
      shutdown-ingest-after-stopped = true

      # Values controlling in-memory store chunking, flushing, etc.
      store {
        # Interval it takes to flush ALL time series in a shard.  This time is further divided by groups-per-shard
        flush-interval = 1h

        # TTL for on-disk / C* data.  Data older than this may be purged.
        disk-time-to-live = 24 hours

        # maximum userTime range allowed in a single chunk. This needs to be longer than flush interval
        # to ensure that normal flushes result in only one chunk.
        # If not specified, max-chunk-time = 1.1 * flush-interval
        # max-chunk-time = 66 minutes

        max-chunks-size = 400

        # Write buffer size, in bytes, for blob columns (histograms, UTF8Strings).  Since these are variable data types,
        # we need a maximum size, not a maximum number of items.
        max-blob-buffer-size = 15000

        # Number of bytes of offheap mem to allocate to chunk storage in each shard.  Ex. 1000MB, 1G, 2GB
        # Assume 5 bytes per sample, should be roughly equal to (# samples per time series) * (# time series)
        shard-mem-size = 512MB

        # Maximum numer of write buffers to retain in each shard's WriteBufferPool.  Any extra buffers are released
        # back to native memory, which helps memory reuse.
        # max-buffer-pool-size = 10000

        # Number of time series to evict at a time.
        # num-partitions-to-evict = 1000

        # Number of subgroups within each shard.  Persistence to a ChunkSink occurs one subgroup at a time, as does
        # recovery from failure.  This many batches of flushes must occur to cover persistence of every partition
        groups-per-shard = 20

        # Use a "MultiPartitionScan" or Cassandra MULTIGET for on-demand paging. Might improve performance.
        multi-partition-odp = false

        # Amount of parallelism during on-demand paging
        # demand-paging-parallelism = 4

        # Number of retries for IngestionSource/Kafka initialization
        # failure-retries = 3

        # Amount of time to delay before retrying
        # retry-delay = 15s

        # Capacity of Bloom filter used to track evicted partitions.
        # Tune this based on how much time series churn is expected before a FiloDB node
        # will be restarted for upgrade/maintenance. Do not take into account churn created by
        # time series that are purged due to retention. When a time series is not ingesting for retention
        # period, it is purged, not evicted. Purged PartKeys are not added to Bloom Filter.
        #
        # To calculate Bloom Filter size:
        # console> BloomFilter[String](5000000, falsePositiveRate = 0.01).numberOfBits
        # res9: Long = 47925292
        # Thats about 6MB
        evicted-pk-bloom-filter-capacity = 50000

        # Uncomment to log at DEBUG ingested samples matching these filters on Partition Key columns
        # Only works for StringColumn fields in the partition key. Scope may be expanded later.
        # trace-filters = {
        #   metric = "bad-metric-to-log"
        # }

        # Limits maximum amount of data a single leaf query can scan
        max-data-per-shard-query = 50 MB

        # Set to true to enable metering of time series. Used for rate-limiting
        metering-enabled = true

        # Approx ingested data resolution. This is used in estimating/capping the number of samples that will be scanned
        # during a query execution. specified in milliseconds. default is 60sec.
        ingest-resolution-millis = 60000

        # Set to true to enable processing of duplicate samples. Default behavior is to drop duplicates.
        accept-duplicate-samples = false
      }
      downsample {
        # Resolutions for downsampled data ** in ascending order **
        resolutions = [ 1 minute, 5 minutes ]
        # Retention of downsampled data for the corresponding resolution
        ttls = [ 30 days, 183 days ]
        # Raw schemas from which to downsample
        raw-schema-names = [ "gauge", "untyped", "prom-counter", "prom-histogram", "delta-counter", "delta-histogram"]
      }
    }