forked from filodb/FiloDB
-
Notifications
You must be signed in to change notification settings - Fork 0
/
timeseries-dev-source.conf
122 lines (95 loc) · 5.52 KB
/
timeseries-dev-source.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
dataset = "prometheus"
# Name of schema used for this dataset stream. See filodb.schemas in filodb-defaults or any other server conf
schema = "prom-counter"
# Should not change once dataset has been set up on the server and data has been persisted to cassandra
num-shards = 4
# deprecated in favor of min-num-nodes-in-cluster config in filodb server config
# To be removed eventually. There is no reason to set a value for this for each dataset
min-num-nodes = 2
# Length of chunks to be written, roughly
sourcefactory = "filodb.kafka.KafkaIngestionStreamFactory"
# Per dataset overrides. Optional, used only if the store is cassandra.
# This config enables us to configure datasets that can use different keyspaces.
# cassandra {
# keyspace = "filodb_prom"
# downsample-keyspace = "filodb_prom_downsample"
# }
sourceconfig {
# Required FiloDB configurations
filo-topic-name = "timeseries-dev"
# Standard kafka configurations, e.g.
# This accepts both the standard kafka value of a comma-separated
# string and a Typesafe list of String values
# EXCEPT: do not populate value.deserializer, as the Kafka format is fixed in FiloDB to be messages of RecordContainer's
bootstrap.servers = "localhost:9092"
group.id = "filo-db-timeseries-ingestion"
# For tests - do not shut down and release memory after ingestion stops (with status IngestionStopped)
shutdown-ingest-after-stopped = true
# Values controlling in-memory store chunking, flushing, etc.
store {
# Interval it takes to flush ALL time series in a shard. This time is further divided by groups-per-shard
flush-interval = 1h
# TTL for on-disk / C* data. Data older than this may be purged.
disk-time-to-live = 24 hours
# maximum userTime range allowed in a single chunk. This needs to be longer than flush interval
# to ensure that normal flushes result in only one chunk.
# If not specified, max-chunk-time = 1.1 * flush-interval
# max-chunk-time = 66 minutes
max-chunks-size = 400
# Write buffer size, in bytes, for blob columns (histograms, UTF8Strings). Since these are variable data types,
# we need a maximum size, not a maximum number of items.
max-blob-buffer-size = 15000
# Number of bytes of offheap mem to allocate to chunk storage in each shard. Ex. 1000MB, 1G, 2GB
# Assume 5 bytes per sample, should be roughly equal to (# samples per time series) * (# time series)
shard-mem-size = 512MB
# Maximum numer of write buffers to retain in each shard's WriteBufferPool. Any extra buffers are released
# back to native memory, which helps memory reuse.
# max-buffer-pool-size = 10000
# Number of time series to evict at a time.
# num-partitions-to-evict = 1000
# Number of subgroups within each shard. Persistence to a ChunkSink occurs one subgroup at a time, as does
# recovery from failure. This many batches of flushes must occur to cover persistence of every partition
groups-per-shard = 20
# Use a "MultiPartitionScan" or Cassandra MULTIGET for on-demand paging. Might improve performance.
multi-partition-odp = false
# Amount of parallelism during on-demand paging
# demand-paging-parallelism = 4
# Number of retries for IngestionSource/Kafka initialization
# failure-retries = 3
# Amount of time to delay before retrying
# retry-delay = 15s
# Capacity of Bloom filter used to track evicted partitions.
# Tune this based on how much time series churn is expected before a FiloDB node
# will be restarted for upgrade/maintenance. Do not take into account churn created by
# time series that are purged due to retention. When a time series is not ingesting for retention
# period, it is purged, not evicted. Purged PartKeys are not added to Bloom Filter.
#
# To calculate Bloom Filter size:
# console> BloomFilter[String](5000000, falsePositiveRate = 0.01).numberOfBits
# res9: Long = 47925292
# Thats about 6MB
evicted-pk-bloom-filter-capacity = 50000
# Uncomment to log at DEBUG ingested samples matching these filters on Partition Key columns
# Only works for StringColumn fields in the partition key. Scope may be expanded later.
# trace-filters = {
# metric = "bad-metric-to-log"
# }
# Limits maximum amount of data a single leaf query can scan
max-data-per-shard-query = 50 MB
# Set to true to enable metering of time series. Used for rate-limiting
metering-enabled = true
# Approx ingested data resolution. This is used in estimating/capping the number of samples that will be scanned
# during a query execution. specified in milliseconds. default is 60sec.
ingest-resolution-millis = 60000
# Set to true to enable processing of duplicate samples. Default behavior is to drop duplicates.
accept-duplicate-samples = false
}
downsample {
# Resolutions for downsampled data ** in ascending order **
resolutions = [ 1 minute, 5 minutes ]
# Retention of downsampled data for the corresponding resolution
ttls = [ 30 days, 183 days ]
# Raw schemas from which to downsample
raw-schema-names = [ "gauge", "untyped", "prom-counter", "prom-histogram", "delta-counter", "delta-histogram"]
}
}