Skip to content

Commit

Permalink
feat(downsample): support export to multiple paths in Iceberg format (#…
Browse files Browse the repository at this point in the history
…1720)

Adds support for export from the downsampler job to multiple destinations in the Iceberg format.
Support for export of CSV-formatted data is removed.

---------

Co-authored-by: nikitag55 <[email protected]>
  • Loading branch information
alextheimer and nikitag55 authored Feb 26, 2024
1 parent feb8a6e commit 745eb47
Show file tree
Hide file tree
Showing 6 changed files with 438 additions and 293 deletions.
49 changes: 20 additions & 29 deletions core/src/main/resources/filodb-defaults.conf
Original file line number Diff line number Diff line change
Expand Up @@ -588,22 +588,20 @@ filodb {

data-export {
enabled = false

# Spark SaveMode / options.
save-mode = "error"
format = "csv"
options = {
"header": "true"
}
parallelism = 10
# Catalog under Unified Catalog
catalog = ""
# Database under Catalog
database = ""
# Table format
format = "iceberg"

# Describe the sequence of labels that compose a rule-group's key.
# A time-series should match at most one key.
key-labels = ["_ws_"]
# These labels will be dropped from every exported row.
drop-labels = ["_ws_", "drop-label1"]

bucket = "file://<path-to-file>"

# Each row's labels are compared against all rule-group keys. If a match is found,
# the row's labels are compared *sequentially* against each of the group's rules until
# a rule meets both of the following criteria:
Expand All @@ -615,6 +613,19 @@ filodb {
groups = [
{
key = ["ws-foo"]
# Iceberg table name
table = "ws-foo"
# Table path
table-path = "s3a://<bucket>/<directory>/<catalog>/<database>/ws-foo"
# to add additional dynamic label-based columns to the table
# Eg: _ws_ is the label key in time series to populate column workspace
# Similary, _ns_ is the label key in time series to populate column namespace
label-column-mapping = [
"_ws_", "workspace",
"_ns_", "namespace"
]
# Partition Iceberg Table by any of the col from label-column-mapping
partition-by-columns = ["namespace"]
rules = [
{
allow-filters = [
Expand All @@ -629,26 +640,6 @@ filodb {
]
}
]

# Specifies how to generate a path to an exported time-series.
# The sequence of key-value pairs describe a directory path, where the
# time-series are exported to the final directory.
# For example, if the path-spec is:
# path-spec = [
# "key1", "value1",
# "key2", "value2"
# ]
# then the exported time-series will be stored at the path:
# ~/key1=value1/key2=value2/<file>
# All {{label-name}} strings will be replaced with the time-series label's value.
# All <<time-spec>> strings will be replaced with the result of formatting
# the export window's end time with the time-spec string.
path-spec = [
"ws", "{{_ws_}}-suffix1",
"year", "<<YYYY>>-suffix2",
"api", "v1",
"ns", "{{_ns_}}-suffix3"
]
}
}

Expand Down
Loading

0 comments on commit 745eb47

Please sign in to comment.