diff --git a/.gitignore b/.gitignore index 026d910..9e12a2c 100644 --- a/.gitignore +++ b/.gitignore @@ -97,7 +97,6 @@ Backup of *.doc* # NodeJS # --------------------------------------------------- # Logs -logs *.log npm-debug.log* yarn-debug.log* diff --git a/modules/broker/rabbitmq/README.md b/modules/broker/rabbitmq/README.md index 08204da..899311c 100644 --- a/modules/broker/rabbitmq/README.md +++ b/modules/broker/rabbitmq/README.md @@ -100,7 +100,7 @@ The following labels are automatically added to exported targets. The following example will scrape all rabbitmq instances in cluster. -```river +```alloy import.git "rabbitmq" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -136,7 +136,7 @@ prometheus.remote_write "local" { The following example will scrape rabbitmq for metrics on the local machine. -```river +```alloy import.git "rabbitmq" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/cloud/grafana/cloud/README.md b/modules/cloud/grafana/cloud/README.md index 9f45fb0..92512b6 100644 --- a/modules/cloud/grafana/cloud/README.md +++ b/modules/cloud/grafana/cloud/README.md @@ -49,7 +49,7 @@ The token must have permissions to read stack information. The setup of these pe ### `stack` -```river +```alloy import.git "grafana_cloud" { repository = "https://github.com/grafana/alloy-modules.git" revision = "main" diff --git a/modules/collector/agent/README.md b/modules/collector/agent/README.md index be0b3ac..0b7b302 100644 --- a/modules/collector/agent/README.md +++ b/modules/collector/agent/README.md @@ -99,7 +99,7 @@ The following labels are automatically added to exported targets. The following example will scrape all agents in cluster. -```river +```alloy import.git "agent" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -135,7 +135,7 @@ prometheus.remote_write "local" { The following example will scrape the agent for metrics on the local machine. -```river +```alloy import.git "agent" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/collector/push-gateway/README.md b/modules/collector/push-gateway/README.md index 8020710..4ce7618 100644 --- a/modules/collector/push-gateway/README.md +++ b/modules/collector/push-gateway/README.md @@ -67,7 +67,7 @@ The following labels are automatically added to exported targets. The following example will scrape all push-gateway in cluster. -```river +```alloy import.git "push_gateway" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/databases/kv/etcd/README.md b/modules/databases/kv/etcd/README.md index 12c8245..3c488cf 100644 --- a/modules/databases/kv/etcd/README.md +++ b/modules/databases/kv/etcd/README.md @@ -98,7 +98,7 @@ The following labels are automatically added to exported targets. The following example will scrape all etcd instances in cluster. -```river +```alloy import.git "etcd" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -134,7 +134,7 @@ prometheus.remote_write "local" { The following example will scrape etcd for metrics on the local machine. -```river +```alloy import.git "etcd" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/databases/kv/memcached/README.md b/modules/databases/kv/memcached/README.md index 1b7142c..bae89de 100644 --- a/modules/databases/kv/memcached/README.md +++ b/modules/databases/kv/memcached/README.md @@ -99,7 +99,7 @@ The following labels are automatically added to exported targets. The following example will scrape all memcached instances in cluster. -```river +```alloy import.git "memcached" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -135,7 +135,7 @@ prometheus.remote_write "local" { The following example will scrape memcached for metrics on the local machine. -```river +```alloy import.git "memcached" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/databases/kv/redis/README.md b/modules/databases/kv/redis/README.md index fa1dc1a..42e8200 100644 --- a/modules/databases/kv/redis/README.md +++ b/modules/databases/kv/redis/README.md @@ -99,7 +99,7 @@ The following labels are automatically added to exported targets. The following example will scrape all redis instances in cluster. -```river +```alloy import.git "redis" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -135,7 +135,7 @@ prometheus.remote_write "local" { The following example will scrape redis for metrics on the local machine. -```river +```alloy import.git "redis" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/databases/sql/mysql/README.md b/modules/databases/sql/mysql/README.md index 5601046..ac5cc72 100644 --- a/modules/databases/sql/mysql/README.md +++ b/modules/databases/sql/mysql/README.md @@ -99,7 +99,7 @@ The following labels are automatically added to exported targets. The following example will scrape all mysql instances in cluster. -```river +```alloy import.git "mysql" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -135,7 +135,7 @@ prometheus.remote_write "local" { The following example will scrape mysql for metrics on the local machine. -```river +```alloy import.git "mysql" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/databases/sql/postgres/README.md b/modules/databases/sql/postgres/README.md index 5b1d375..50bf182 100644 --- a/modules/databases/sql/postgres/README.md +++ b/modules/databases/sql/postgres/README.md @@ -99,7 +99,7 @@ The following labels are automatically added to exported targets. The following example will scrape all postgres instances in cluster. -```river +```alloy import.git "postgres" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -135,7 +135,7 @@ prometheus.remote_write "local" { The following example will scrape postgres for metrics on the local machine. -```river +```alloy import.git "postgres" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/databases/timeseries/loki/README.md b/modules/databases/timeseries/loki/README.md index af97b46..9511a49 100644 --- a/modules/databases/timeseries/loki/README.md +++ b/modules/databases/timeseries/loki/README.md @@ -98,7 +98,7 @@ The following labels are automatically added to exported targets. The following example will scrape all Loki instances in cluster. -```river +```alloy import.git "loki" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -134,7 +134,7 @@ prometheus.remote_write "local" { The following example will scrape Loki for metrics on the local machine. -```river +```alloy import.git "loki" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/databases/timeseries/mimir/README.md b/modules/databases/timeseries/mimir/README.md index 54532af..0181820 100644 --- a/modules/databases/timeseries/mimir/README.md +++ b/modules/databases/timeseries/mimir/README.md @@ -99,7 +99,7 @@ The following labels are automatically added to exported targets. The following example will scrape all Mimir instances in cluster. -```river +```alloy import.git "mimir" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -135,7 +135,7 @@ prometheus.remote_write "local" { The following example will scrape Mimir for metrics on the local machine. -```river +```alloy import.git "mimir" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/databases/timeseries/pyroscope/README.md b/modules/databases/timeseries/pyroscope/README.md index daa25f8..c68d4a3 100644 --- a/modules/databases/timeseries/pyroscope/README.md +++ b/modules/databases/timeseries/pyroscope/README.md @@ -99,7 +99,7 @@ The following labels are automatically added to exported targets. The following example will scrape all pyroscope instances in cluster. -```river +```alloy import.git "pyroscope" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -135,7 +135,7 @@ prometheus.remote_write "local" { The following example will scrape pyroscope for metrics on the local machine. -```river +```alloy import.git "pyroscope" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/databases/timeseries/tempo/README.md b/modules/databases/timeseries/tempo/README.md index c1501de..fffa676 100644 --- a/modules/databases/timeseries/tempo/README.md +++ b/modules/databases/timeseries/tempo/README.md @@ -99,7 +99,7 @@ The following labels are automatically added to exported targets. The following example will scrape all Tempo instances in cluster. -```river +```alloy import.git "tempo" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -135,7 +135,7 @@ prometheus.remote_write "local" { The following example will scrape Tempo for metrics on the local machine. -```river +```alloy import.git "tempo" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/kubernetes/annotations/README.md b/modules/kubernetes/annotations/README.md index e1e95d7..e045a3f 100644 --- a/modules/kubernetes/annotations/README.md +++ b/modules/kubernetes/annotations/README.md @@ -1,11 +1,11 @@ -# Kubernetes Annotation Modules +# Kubernetes Metric Annotation Module **Modules:** -- [`metrics.river`](#metricsriver) -- [`probes.river`](#probesriver) +- [`metrics.alloy`](#metricsalloy) +- [`probes.alloy`](#probesalloy) -## `metrics.river` +## `metrics.alloy` This module is meant to be used to automatically scrape targets based on a certain role and set of annotations. This module can be consumed multiple times with different roles. The supported roles are: @@ -143,8 +143,8 @@ The following labels are automatically added to exported targets. | :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | | `targets` | _yes_ | `list(map(string))` | List of targets to scrape | | `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | -| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | -| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `keep_metrics` | _no_ | [see code](module.alloy#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.alloy#L235) | A regular expression of metrics to drop | | `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | | `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | | `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | @@ -156,7 +156,7 @@ N/A --- -## `probes.river` +## `probes.alloy` This module is meant to be used to automatically scrape targets based on a certain role and set of annotations. This module can be consumed multiple times with different roles. The supported roles are: @@ -267,8 +267,8 @@ The following labels are automatically added to exported targets. | :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | | `targets` | _yes_ | `list(map(string))` | List of targets to scrape | | `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | -| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | -| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `keep_metrics` | _no_ | [see code](module.alloy#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.alloy#L235) | A regular expression of metrics to drop | | `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | | `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | | `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | @@ -286,11 +286,11 @@ N/A The following example will scrape all metric annotation instances in cluster. -```river +```alloy import.git "metric_annotations" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" - path = "modules/kubernetes/annotations/metrics.river" + path = "modules/kubernetes/annotations/metrics.alloy" pull_frequency = "15m" } @@ -324,11 +324,11 @@ prometheus.remote_write "local" { The following example will scrape all probe annotation instances in cluster. -```river +```alloy import.git "probe_annotations" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" - path = "modules/kubernetes/annotations/probes.river" + path = "modules/kubernetes/annotations/probes.alloy" pull_frequency = "15m" } diff --git a/modules/kubernetes/annotations/logs/README.md b/modules/kubernetes/annotations/logs/README.md new file mode 100644 index 0000000..df23b86 --- /dev/null +++ b/modules/kubernetes/annotations/logs/README.md @@ -0,0 +1,492 @@ +# Kubernetes Log Annotations Modules + +Annotations offer a versatile and powerful means to tailor log ingestion and processing, adapting log management to meet particular needs +and specifications. They grant users the ability to selectively engage in specific log processing behaviors, circumventing the need for +unique configurations or customizations within the agent's setup. These annotations are accessible at the component level, allowing for +selective implementation. This ensures that only annotations relevant to the user's requirements are activated, optimizing processing +efficiency by excluding unnecessary annotations. + +The following pod annotations are supported: + +| Annotation | Type | Component | Description | +| :---------------------------------- | :----------------- | :---------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `logs.grafana.com/ingest` | Boolean String | [pods](#pods) | Allow a pod to declare it's logs should be dropped, the default behavior is to ingest all logs | +| `logs.grafana.com/tenant` | Regular Expression | [pods](#pods) | Allow a pod to override the tenant for its logs. | +| `logs.grafana.com/drop-info` | Boolean String | [drop_levels](#drop_levels) | Determines if `info` logs should be dropped (default is `false`), but a pod can override this temporarily or permanently. | +| `logs.grafana.com/drop-debug` | Boolean String | [drop_levels](#drop_levels) | Determines if `debug` logs should be dropped (default is `true`), but a pod can override this temporarily or permanently. | +| `logs.grafana.com/drop-trace` | Boolean String | [drop_levels](#drop_levels) | Determines if `trace` logs should be dropped (default is `true`), but a pod can override this temporarily or permanently. | +| `logs.grafana.com/decolorize` | Boolean String | [decolorize](#decolorize) | Determines if [`stage.decolorized`](https://grafana.com/docs/alloy/latest/reference/components/loki.process/#stagedecolorize-block) should be used to remove escape characters. | +| `logs.grafana.com/scrub-nulls` | Boolean String | [json_scrub_empties](#json_scrub_empties) | Determines if keys with null values should be dropped from json, reducing the size of the log message. | +| `logs.grafana.com/scrub-empties` | Boolean String | [json_scrub_nulls](#json_scrub_nulls) | Determines if keys with empty values (`"", [], {}`) should be dropped from json, reducing the size of the log message. | +| `logs.grafana.com/embed-pod` | Boolean String | [embed_pod](#embed_pod) | Whether or not to inject the name of the pod to the end of the log message i.e. `__pod=agent-logs-grafana-agent-jrqms`. | +| `logs.grafana.com/mask-credit-card` | Boolean String | [mask](#mask) | Whether or not to mask credit cards in the log line, if true the data will be masked as `**CC*REDACTED**` | +| `logs.grafana.com/mask-ssn` | Boolean String | [mask](#mask) | Whether or not to mask SSNs in the log line, if true the data will be masked as `**SSN*REDACTED**` | +| `logs.grafana.com/mask-email` | Boolean String | [mask](#mask) | Whether or not to mask emails in the log line, if true the data will be masked as`**EMAIL*REDACTED**` | +| `logs.grafana.com/mask-ipv4` | Boolean String | [mask](#mask) | Whether or not to mask IPv4 addresses in the log line, if true the data will be masked as`**IPV4*REDACTED**` | +| `logs.grafana.com/mask-ipv6` | Boolean String | [mask](#mask) | Whether or not to mask IPv6 addresses in the log line, if true the data will be masked as `**IPV6*REDACTED**` | +| `logs.grafana.com/mask-phone` | Boolean String | [mask](#mask) | Whether or not to mask phone numbers in the log line, if true the data will be masked as `**PHONE*REDACTED**` | +| `logs.grafana.com/mask-luhn` | Boolean String | [mask](#mask) | Whether or not to mask value which match the [Luhn Algorithm](https://en.wikipedia.org/wiki/Luhn_algorithm) in the log line, if true the data will be masked as `**LUHN*REDACTED**` | +| `logs.grafana.com/trim` | Boolean String | [trim](#trim) | Whether or not to trim the log line using [`strings.Trim`](https://pkg.go.dev/strings#Trim) | +| `logs.grafana.com/dedup-spaces` | Boolean String | [dedup_spaces](#dedup_spaces) | Determines if instances of 2 or more spaces should be replaced with a single space | +| `logs.grafana.com/sample` | Boolean String | [sample](#sample) | Determines if logs from the pod should be sampled, using [`stage.sample`](https://grafana.com/docs/alloy/latest/reference/components/loki.process/#stagesampling-block), at a given rate between 0-1 (.25) by default | + +--- + +## `logs.alloy` + +### `pods` + +--- + +## `drop.alloy` + +### `drop_levels` + +Handles the dropping of log messages based on a determined log level. This can help reduce the overall number of log messages/volume while still allowing applications to log verbose messages. The following annotations are supported: + +- `logs.grafana.com/drop-trace` +- `logs.grafana.com/drop-debug` +- `logs.grafana.com/drop-info` + +#### Arguments + +| Name | Required | Default | Description | +| :------------ | :------- | :-------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `trace_value` | _no_ | `"true"` | The regular expression to use to determine if trace logs should be dropped, if you want to drop trace by default without setting the annotations everywhere use `".*"` or `"true\|"` | +| `trace_level` | _no_ | `"(?i)(trace?\|trc)"` | The regular expression to use to match trace logs level label value | +| `debug_value` | _no_ | `"true"` | The regular expression to use to determine if debug logs should be dropped, if you want to drop debug by default without setting the annotations everywhere use `".*"` or `"true\|"` | +| `debug_level` | _no_ | `(?i)(debug?\|dbg)` | The regular expression to use to match debug logs level label value | +| `info_value` | _no_ | `"true"` | The regular expression to use to determine if info logs should be dropped, if you want to drop info by default without setting the annotations everywhere use `".*"` or `"true\|"` | +| `info_level` | _no_ | `(?i)(info?)` | The regular expression to use to match info logs level label value | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +--- + +## `embed.alloy` + +### `embed_pod` + +Loki supports [Structured Metadata](https://grafana.com/docs/loki/latest/get-started/labels/structured-metadata/) which is the ideal solution to embedding information without adding additional labels. However, if this is not possible then the next best solution is to embed the name of the pod at the end of the log line. The module accounts for json or raw text, and supports the following annotation: + +- `logs.grafana.com/embed-pod` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `embed_pod_value` | _no_ | `"true"` | The regular expression to use to determine if pod should be embedded or not, if you want to embed the pod by default without setting the annotations everywhere use `".*"` or `"true\|"` | +| `embed_pod_key` | _no_ | `"__pod"` | The key to use to embed the pod name into the log message | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +--- + +## `json.alloy` + +### `json_scrub_empties` + +JSON is great because it offers a flexible storage schema, where the "schema" is stored next to the value. However, this flexibility comes at a cost; JSON is not an efficient storage mechanism because the "schema" is repeatedly stored next to each value. This can lead to unnecessary and extra bytes, especially when values are empty or defaulted, such as an empty string `""`, an empty object `{}`, or an empty array `[]`. When a value is empty, both the property and the value can be +removed to optimize storage. The following annotation supports this functionality: + +- `logs.grafana.com/scrub-empties` + +#### Arguments + +| Name | Required | Default | Description | +| :-------------------- | :------- | :------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `scrub_empties_value` | _no_ | `"true"` | The regular expression to use to determine if logs should have json empties scrubbed, if you want to scrub empties by default without setting the annotations everywhere use `".*"` or `"true\|"` | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +### `json_scrub_nulls` + +Similar to `scrub-empties`, scrubbing `null` values from JSON logs can be beneficial for the same reasons, offering similar cost benefits. Removing `null` values can help reduce the storage size by eliminating unnecessary JSON entries. The following annotation supports this optimization: + +- `logs.grafana.com/scrub-nulls` + +#### Arguments + +| Name | Required | Default | Description | +| :------------------ | :------- | :------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `scrub_nulls_value` | _no_ | `"true"` | The regular expression to use to determine if logs should have json nulls scrubbed, if you want to scrub nulls by default without setting the annotations everywhere use `".*"` or `"true\|"` | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +--- + +## `mask.alloy` + +### `mask_luhn` + +Supports detecting and masking strings within log lines that match the [Luhn Algorithm](https://en.wikipedia.org/wiki/Luhn_algorithm). + +- `logs.grafana.com/mask-luhn` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :-------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `mask_luhn_value` | _no_ | `"(?i)true"` | The regular expression to use to determine if logs should have luhn values masked, if you want to mask luhn by default without setting the annotations everywhere use `".*"` or `"true\|"` | +| `min_length` | _no_ | `13` | The minimum length of a Luhn match to mask | +| `replace_text` | _no_ | `"**LUHN*REDACTED**"` | The replacement text to use to for Luhn matches | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +### `mask_credit_card` + +Supports detecting and masking strings within log lines that match various credit card formats. + +- `logs.grafana.com/mask-credit-card` + +#### Arguments + +| Name | Required | Default | Description | +| :----------------------- | :------- | :------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `mask_credit_card_value` | _no_ | `"(?i)true"` | The regular expression to use to determine if logs should have credit card values masked, if you want to mask credit cards by default without setting the annotations everywhere use `".*"` or `"true\|"` | +| `replace_text` | _no_ | `"**CC*REDACTED**"` | The replacement text to use to for Credit Card matches | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +### `mask_email` + +Supports detecting and masking strings within log lines that match various email formats. + +- `logs.grafana.com/mask-email` + +#### Arguments + +| Name | Required | Default | Description | +| :----------------- | :------- | :--------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `mask_email_value` | _no_ | `"(?i)true"` | The regular expression to use to determine if logs should have emails masked, if you want to mask emails by default without setting the annotations everywhere use `".*"` or `"true\|"` | +| `replace_text` | _no_ | `"**EMAIL*REDACTED**"` | The replacement text to use to for Credit Card matches | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +### `mask_ipv4` + +Supports detecting and masking strings within log lines that match IPv4 formats. + +- `logs.grafana.com/mask-ipv4` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :-------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `mask_ipv4_value` | _no_ | `"(?i)true"` | The regular expression to use to determine if logs should have IPv4 values masked, if you want to mask IPv4 values by default without setting the annotations everywhere use `".*"` or `"true\|"` | +| `replace_text` | _no_ | `"**IPv4*REDACTED**"` | The replacement text to use to for Credit Card matches | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +### `mask_ipv6` + +Supports detecting and masking strings within log lines that match IPv6 formats. + +- `logs.grafana.com/mask-ipv6` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :-------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `mask_ipv6_value` | _no_ | `"(?i)true"` | The regular expression to use to determine if logs should have IPv6 values masked, if you want to mask IPv6 values by default without setting the annotations everywhere use `".*"` or `"true\|"` | +| `replace_text` | _no_ | `"**IPv6*REDACTED**"` | The replacement text to use to for Credit Card matches | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +### `mask_phone` + +Supports detecting and masking strings within log lines that match phone number formats. + +- `logs.grafana.com/mask-phone` + +#### Arguments + +| Name | Required | Default | Description | +| :----------------- | :------- | :--------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `mask_phone_value` | _no_ | `"(?i)true"` | The regular expression to use to determine if logs should have phone numbers masked, if you want to mask phone numbers by default without setting the annotations everywhere use `".*"` or `"true\|"` | +| `replace_text` | _no_ | `"**PHONE*REDACTED**"` | The replacement text to use to for Credit Card matches | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +### `mask_ssn` + +Supports detecting and masking strings within log lines that match social security number formats. + +- `logs.grafana.com/mask-ssn` + +#### Arguments + +| Name | Required | Default | Description | +| :--------------- | :------- | :------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `mask_ssn_value` | _no_ | `"(?i)true"` | The regular expression to use to determine if logs should have SSNs masked, if you want to mask SSNs by default without setting the annotations everywhere use `".*"` or `"true\|"` | +| `replace_text` | _no_ | `"**SSN*REDACTED**"` | The replacement text to use to for SSN matches | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +--- + +## `utils.alloy` + +### `decolorize` + +Supports the removal of ANSI color codes from the log lines, thus making it easier to parse logs and reducing bytes. + +- `logs.grafana.com/decolorize` + +#### Arguments + +| Name | Required | Default | Description | +| :----------------- | :------- | :------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `decolorize_value` | _no_ | `"(?i)true"` | The regular expression to use to determine if logs should be decolorized, if you want to decolorize by default without setting the annotations everywhere use `".*"` or `"true\|"` | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +### `trim` + +Supports the removal of any leading or trailing whitespace from the log lines. + +- `logs.grafana.com/trim` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `trim_value` | _no_ | `"(?i)true"` | The regular expression to use to determine if whitespace should be embedded or not, if you want to embed the pod by default without setting the annotations everywhere use `".*"` or `"true\|"` | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +### `dedup_spaces` + +Supports replacing two or more spaces with a single space. + +- `logs.grafana.com/trim` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `trim_value` | _no_ | `"(?i)true"` | The regular expression to use to determine if multiple spaces should be replaced with a single space or not, if you want to always dedup use `".*"` or `"true\|"` | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +### `sampling` + +Supports sampling of logs at a given rate + +- `logs.grafana.com/sampling` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :-------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(LogsReceiver)` | Must be a where scraped should be forwarded to | +| `annotation` | _no_ | `logs.grafana.com` | The annotation namespace to use | +| `sampling_value` | _no_ | `"(?i)true"` | The regular expression to use to determine if multiple spaces should be replaced with a single space or not, if you want to always dedup use `".*"` or `"true\|"` | +| `sampling_rate` | _no_ | `0.25` | The sampling rate in a range of [0, 1] | +| `sampling_reason` | _no_ | `annotation_sampling` | The sampling reason | + +#### Exports + +| Name | Type | Description | +| :----------- | :------------- | :---------------------------------------------- | +| `annotation` | `string` | The value passed into the `annotation` argument | +| `receiver` | `LogsReceiver` | The `loki.process` receiver for the module | + +--- + +## Usage + +```alloy +import.git "log_utils" { + repository = "https://github.com/grafana/alloy-modules.git" + revision = "main" + path = "modules/utils/logs/" +} + +import.git "k8s_logs" { + repository = "https://github.com/grafana/alloy-modules.git" + revision = "main" + path = "modules/kubernetes/core/logs.alloy" +} + +import.git "log_annotations" { + repository = "https://github.com/grafana/alloy-modules.git" + revision = "main" + path = "modules/kubernetes/annotations/logs.alloy" +} + +log_annotations.pods "targets" { + annotation = "logs.grafana.com" +} + +k8s_logs.from_worker "default" { + targets = log_annotations.pods.targets.output + forward_to = [log_annotations.decolorize.default.receiver] +} + +log_annotations.decolorize "default" { + forward_to = [log_utils.default_level.default.receiver] + annotation = "logs.grafana.com" +} + +log_utils.default_level "default" { + forward_to = [log_utils.normalize_level.default.receiver] +} + +log_utils.normalize_level "default" { + forward_to = [ + log_utils.pre_process_metrics.default.receiver, + log_annotations.drop_levels.default.receiver, + ] +} + +log_utils.pre_process_metrics "default" {} + +log_annotations.drop_levels "default" { + forward_to = [log_annotations.mask.default.receiver] + annotation = "logs.agent.grafana.com" +} + +log_annotations.mask "default" { + forward_to = [log_annotations.trim.default.receiver] + annotation = "logs.agent.grafana.com" +} + +log_annotations.trim "default" { + forward_to = [log_annotations.dedup_spaces.default.receiver] + annotation = "logs.agent.grafana.com" +} + +log_annotations.dedup_spaces "default" { + forward_to = [log_utils.structured_metadata.default.receiver] + annotation = "logs.agent.grafana.com" +} + +log_utils.structured_metadata "default" { + forward_to = [log_utils.keep_labels.default.receiver] +} + +log_utils.keep_labels "default" { + forward_to = [ + log_utils.post_process_metrics.default.receiver, + loki.write.local.receiver, + ] +} + +log_utils.post_process_metrics "default" {} + +loki.write "local" { + endpoint { + url = env("LOGS_PRIMARY_URL") + + basic_auth { + username = env("LOGS_PRIMARY_TENANT") + password = env("LOGS_PRIMARY_TOKEN") + } + } + + external_labels = { + "cluster" = coalesce(env("CLUSTER_NAME"), env("CLUSTER"), ""), + "env" = coalesce(env("ENV"), ""), + "region" = coalesce(env("REGION"), ""), + } +} +``` diff --git a/modules/kubernetes/annotations/logs/drop.alloy b/modules/kubernetes/annotations/logs/drop.alloy new file mode 100644 index 0000000..1375b80 --- /dev/null +++ b/modules/kubernetes/annotations/logs/drop.alloy @@ -0,0 +1,113 @@ +/* + Drop Levels + This component is used to drop logs based on their log level. + The following log levels are supported: + - Trace + - Debug + - Info + + Annotations: + logs.grafana.com/drop-trace: true + logs.grafana.com/drop-debug: true + logs.grafana.com/drop-info: true +*/ +declare "drop_levels" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "trace_value" { + comment = "The regular expression to use to determine if trace logs should be dropped, if you want to drop trace by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "true" + optional = true + } + + argument "trace_level" { + comment = "The regular expression to use to match trace logs level label value (default: (?i)(trace?|trc))" + default = "(?i)(trace?|trc)" + optional = true + } + + argument "debug_value" { + comment = "The regular expression to use to determine if debug logs should be dropped, if you want to drop debug by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "true" + optional = true + } + + argument "debug_level" { + comment = "The regular expression to use to match debug logs level label value (default: (?i)(debug?|dbg))" + default = "(?i)(debug?|dbg)" + optional = true + } + + argument "info_value" { + comment = "The regular expression to use to determine if info logs should be dropped, if you want to drop info by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "true" + optional = true + } + + argument "info_level" { + comment = "The regular expression to use to match info logs level label value (default: (?i)(info?))" + default = "(?i)(info?)" + optional = true + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.drop_level.receiver + } + + loki.process "drop_level" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/drop-trace annotation, if not set or set to true then drop + // any log message with level=trace + stage.match { + pipeline_name = "pipeline for annotation ||" + argument.annotation.value + "/drop-trace: true" + selector = "{" + argument.__sd_annotation.value + "_drop_trace=~\"" + argument.trace_value.value + "\"" + ",level=~\"" + argument.trace_level.value + "\"}" + action = "drop" + drop_counter_reason = "trace" + } + + // check logs.grafana.com/drop-debug annotation, if not set or set to true then drop + // any log message with level=debug + stage.match { + pipeline_name = "pipeline for annotation ||" + argument.annotation.value + "/drop-debug: true" + selector = "{" + argument.__sd_annotation.value + "_drop_debug=~\"" + argument.debug_value.value + "\"" + ",level=~\"" + argument.debug_level.value + "\"}" + action = "drop" + drop_counter_reason = "debug" + } + + // check logs.grafana.com/drop-info annotation, if not set or set to true then drop + // any log message with level=info + stage.match { + pipeline_name = "pipeline for annotation ||" + argument.annotation.value + "/drop-info: true" + selector = "{" + argument.__sd_annotation.value + "_drop_trace=~\"" + argument.info_value.value + "\"" + ",level=~\"" + argument.info_level.value + "\"}" + action = "drop" + drop_counter_reason = "info" + } + + } +} diff --git a/modules/kubernetes/annotations/logs/embed.alloy b/modules/kubernetes/annotations/logs/embed.alloy new file mode 100644 index 0000000..28bd055 --- /dev/null +++ b/modules/kubernetes/annotations/logs/embed.alloy @@ -0,0 +1,99 @@ +/* + Embed Pod + This component is used to embed the pod name into the log message. + This can be useful for debugging purposes, but it should not be a label in Loki due to the high cardinality it would create. + + Annotations: + logs.grafana.com/embed-pod: true +*/ +declare "embed_pod" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "embed_pod_value" { + comment = "The regular expression to use to determine if pod should be embedded or not, if you want to embed the pod by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "true" + optional = true + } + + argument "embed_pod_key" { + comment = "The key to use to embed the pod name into the log message (default: __pod)" + default = "__pod" + optional = true + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.embed_pod.receiver + } + + loki.process "embed_pod" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/embed-pod annotation, if true embed the name of the pod to the end of the log line + // this can reduce the overall cardinality, by not using a label of "pod", individual pods can still be searched + // using a line selector i.e. __pod=your-pod-name + stage.match { + selector = "{" + argument.__sd_annotation.value + "_embed_pod=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || " + argument.annotation.value + "/embed-pod: true" + + // embed as json property + stage.match { + selector = "{" + argument.__sd_annotation.value + "_embed_pod=~\"(?i)true\"} |~ \"^\\s*{(.|\n)+}\\s*$\"" + // render a new label called log_line, and add the name of the pod to the end of the log message + // knowing the pod name can be valuable for debugging, but it should not be a label in Loki due + // to the high cardinality it would create. + // note: .Entry is a special key that is used to reference the current line + stage.replace { + expression = "\\}$" + replace = "" + } + stage.template { + source = "log_line" + template = "{{ .Entry }},\"" + argument.embed_pod_key.value + "\":\"{{ .pod }}\"}" + } + } + + // embed as text property + stage.match { + selector = "{" + argument.__sd_annotation.value + "_embed_pod=~\"(?i)true\"} !~ \"^\\s*{(.|\n)+}\\s*$\"" + // render a new label called log_line, and add the name of the pod to the end of the log message + // knowing the pod name can be valuable for debugging, but it should not be a label in Loki due + // to the high cardinality it would create. + // note: .Entry is a special key that is used to reference the current line + stage.template { + source = "log_line" + template = "{{ .Entry }} " + argument.embed_pod_key.value + "={{ .pod }}" + } + } + + // reset the output to the log_line + stage.output { + source = "log_line" + } + } + } +} diff --git a/modules/kubernetes/annotations/logs/json.alloy b/modules/kubernetes/annotations/logs/json.alloy new file mode 100644 index 0000000..bda7444 --- /dev/null +++ b/modules/kubernetes/annotations/logs/json.alloy @@ -0,0 +1,132 @@ +/* + JSON Scrub Empties + This component is used to scrub empty values from JSON logs. + + Annotations: + logs.grafana.com/scrub-empties: true +*/ +declare "json_scrub_empties" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "scrub_empties_value" { + comment = "The regular expression to use to determine if logs should have json empties scrubbed, if you want to scrub empties by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "(?i)true" + optional = true + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.json_scrub_empties.receiver + } + + loki.process "json_scrub_empties" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/scrub-empties annotation, if true remove any json property whose value is set to + // an empty string "", empty object {} or empty array [] is removed + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + pipeline_name = "pipeline for annotation || " + argument.annotation.value + "/scrub-empties: true" + selector = "{" + argument.__sd_annotation.value + "_scrub_empties=~\"" + argument.scrub_empties_value.value + "\"} |~ \"^\\s*{(.|\n)+}\\s*$\"" + + // remove null properties + stage.replace { + // unescaped regex: (\s*,\s*("[^"]+"\s*:\s*(\[\s*\]|\{\s*\}|"\s*"))|("[^"]+"\s*:\s*(\[\s*\]|\{\s*\}|"\s*"))\s*,\s*) + expression = "(\\s*,\\s*(\"[^\"]+\"\\s*:\\s*(\\[\\s*\\]|\\{\\s*\\}|\"\\s*\"))|(\"[^\"]+\"\\s*:\\s*(\\[\\s*\\]|\\{\\s*\\}|\"\\s*\"))\\s*,\\s*)" + replace = "" + } + } + + } +} + +/* + JSON Scrub Nulls + This component is used to scrub nulls from JSON logs. + + Annotations: + logs.grafana.com/scrub-nulls: true + +*/ +declare "json_scrub_nulls" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "scrub_nulls_value" { + comment = "The regular expression to use to determine if logs should have json nulls scrubbed, if you want to scrub nulls by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "(?i)true" + optional = true + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.json_scrub_empties.receiver + } + + loki.process "json_scrub_nulls" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/scrub-empties annotation, if true remove any json property whose value is set to + // an empty string "", empty object {} or empty array [] is removed + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + pipeline_name = "pipeline for annotation || " + argument.annotation.value + "/scrub-nulls: true" + selector = "{" + argument.__sd_annotation.value + "_scrub_nulls=~\"" + argument.scrub_nulls_value.value + "\"} |~ \"^\\s*{(.|\n)+}\\s*$\"" + + // remove null properties + stage.replace { + // unescaped regex: (\s*,\s*("[^"]+"\s*:\s*null)|("[^"]+"\s*:\s*null)\s*,\s*) + expression = "(\\s*,\\s*(\"[^\"]+\"\\s*:\\s*null)|(\"[^\"]+\"\\s*:\\s*null)\\s*,\\s*)" + replace = "" + } + } + + } +} diff --git a/modules/kubernetes/annotations/logs/logs.alloy b/modules/kubernetes/annotations/logs/logs.alloy new file mode 100644 index 0000000..fe0d003 --- /dev/null +++ b/modules/kubernetes/annotations/logs/logs.alloy @@ -0,0 +1,202 @@ +/* + Pods component to find eligible log targets + + Annotations: + logs.grafana.com/ingest: true + logs.grafana.com/tenant: "primary" +*/ +declare "pods" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [\"kube-system\"] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"k8s-app=konnectivity-agent\"])" + optional = true + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, this module should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: metrics.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "tenant" { + comment = "The tenant to write metrics to. This does not have to be the tenantId, this is the value to look for in the logs.agent.grafana.com/tenant annotation, and this can be a regex." + optional = true + default = ".*" + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + // export the discovered targets + export "output" { + value = discovery.relabel.log_annotations.output + } + + // export the annotation argument + export "annotation" { + value = coalesce(argument.annotation.value, "logs.grafana.com") + } + + // find all pods + discovery.kubernetes "log_annotations" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.field_selectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, []), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // apply relabelings + discovery.relabel "log_annotations" { + targets = discovery.kubernetes.log_annotations.targets + + // allow pods to declare their logs to be ingested or not, the default is true + // i.e. logs.grafana.com/ingest: false + rule { + action = "keep" + source_labels = [ + "__meta_kubernetes_pod_annotation_" + argument.__sd_annotation.value + "_ingest", + ] + regex = "^(true|)$" + } + + // allow pods to declare what tenant their logs should be written to, the following annotation is supported: + // logs.grafana.com/tenant: "primary" + rule { + action = "keep" + source_labels = [ + "__meta_kubernetes_pod_annotation_" + argument.__sd_annotation.value + "_ingest", + ] + regex = "^(" + argument.tenant.value + ")$" + } + + // set the instance label as the name of the worker node the pod is on + rule { + action = "replace" + source_labels = ["__meta_kubernetes_pod_node_name"] + target_label = "instance" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set the version if specified as metadata labels "version:" or "app.kubernetes.io/version:" or "app_version:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_version", + "__meta_kubernetes_pod_label_version", + "__meta_kubernetes_pod_label_app_version", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "version" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + + // set the job label to be namespace / friendly pod name + rule { + action = "replace" + source_labels = [ + "workload", + "__meta_kubernetes_namespace", + ] + regex = ".+\\/(.+);(.+)" + replacement = "$2/$1" + target_label = "job" + } + } + +} diff --git a/modules/kubernetes/annotations/logs/mask.alloy b/modules/kubernetes/annotations/logs/mask.alloy new file mode 100644 index 0000000..5226a3a --- /dev/null +++ b/modules/kubernetes/annotations/logs/mask.alloy @@ -0,0 +1,466 @@ +/* + Luhn Masking + https://en.wikipedia.org/wiki/Luhn_algorithm + Annotation: logs.grafana.com/mask-luhn: true +*/ +declare "mask_luhn" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "mask_luhn_value" { + comment = "The regular expression to use to determine if logs should have luhn values masked, if you want to mask luhn by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "(?i)true" + optional = true + } + + argument "min_length" { + comment = "The minimum length of a Luhn match to mask (default: 13)" + default = 13 + optional = true + } + + argument "replace_text" { + comment = "The replacement text to use to for Luhn matches (default: **LUHN*REDACTED**)" + default = "**LUHN*REDACTED**" + optional = true + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.mask_luhn.receiver + } + + loki.process "mask_luhn" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/mask-luhn annotation, if true the data will be masked as **LUHN*REDACTED** + stage.match { + pipeline_name = "pipeline for annotation || " + argument.annotation.value + "/mask-luhn: true" + selector = "{" + argument.__sd_annotation.value + "_mask_luhn=~\"" + argument.mask_luhn_value.value + "\"}" + + stage.luhn { + min_length = argument.min_length.value + replacement = argument.replace_text.value + } + } + } +} + +/* + Credit Card Masking + Annotation: logs.grafana.com/mask-credit-card: true +*/ +declare "mask_credit_card" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "mask_credit_card_value" { + comment = "The regular expression to use to determine if logs should have credit card values masked, if you want to mask credit cards by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "(?i)true" + optional = true + } + + argument "replace_text" { + comment = "The replacement text to use to for Credit Card matches (default: **CC*REDACTED**)" + default = "**CC*REDACTED**" + optional = true + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.mask_credit_card.receiver + } + + loki.process "mask_credit_card" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/mask-credit-card annotation, if true the data will be masked as **CC*REDACTED** + // Formats: + // Visa: 4[0-9]{15} + // MasterCard: 5[1-5][0-9]{14} + // American Express: 3[47][0-9]{13} + // Discover: 6[0-9]{15} + // JCB: 3[51-55][0-9]{14} + stage.match { + pipeline_name = "pipeline for annotation || " + argument.annotation.value + "/mask-credit-card: true" + selector = "{" + argument.__sd_annotation.value + "_mask_credit_card=~\"" + argument.mask_credit_card_value.value + "\"}" + + stage.replace { + // unescaped regex: (4[0-9]{15}|5[1-5][0-9]{14}|3[47][0-9]{13}|6[0-9]{15}|3[51-55][0-9]{14}) + expression = "(4[0-9]{15}|5[1-5][0-9]{14}|3[47][0-9]{13}|6[0-9]{15}|3[51-55][0-9]{14})" + replace = argument.replace_text.value + } + } + } +} + +/* + Email Masking + Annotation: logs.grafana.com/mask-email: true +*/ +declare "mask_email" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "mask_email_value" { + comment = "The regular expression to use to determine if logs should have emails masked, if you want to mask emails by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "(?i)true" + optional = true + } + + argument "replace_text" { + comment = "The replacement text to use to for Email matches (default: **EMAIL*REDACTED**)" + default = "**EMAIL*REDACTED**" + optional = true + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.mask_email.receiver + } + + loki.process "mask_email" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/mask-email annotation, if true the data will be masked as **EMAIL*REDACTED** + stage.match { + pipeline_name = "pipeline for annotation || " + argument.annotation.value + "/mask-email: true" + selector = "{" + argument.__sd_annotation.value + "_mask_email=~\"" + argument.mask_email_value.value + "\"}" + + stage.replace { + // unescaped regex: ([\w\.=-]+@[\w\.-]+\.[\w]{2,64}) + expression = "([\\w\\.=-]+@[\\w\\.-]+\\.[\\w]{2,64})" + replace = argument.replace_text.value + } + } + } +} + +/* + IPv4 Masking + Annotation: logs.grafana.com/mask-ipv4: true +*/ +declare "mask_ipv4" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "mask_ipv4_value" { + comment = "The regular expression to use to determine if logs should have IPv4 values masked, if you want to mask IPv4 values by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "(?i)true" + optional = true + } + + argument "replace_text" { + comment = "The replacement text to use to for IPv4 matches (default: **IPv4*REDACTED**)" + default = "**IPv4*REDACTED**" + optional = true + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.mask_ipv4.receiver + } + + loki.process "mask_ipv4" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/mask-ipv4 annotation, if true the data will be masked as **IPV4*REDACTED** + stage.match { + pipeline_name = "pipeline for annotation || " + argument.annotation.value + "/mask-ipv4: true" + selector = "{" + argument.__sd_annotation.value + "_mask_ipv4=~\"" + argument.mask_ipv4_value.value + "\"}" + + stage.replace { + // unescaped regex: ((\b25[0-5]|\b2[0-4][0-9]|\b[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}) + expression = "((\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3})" + replace = argument.replace_text.value + } + } + } +} + +/* + IPv6 Masking + Annotation: logs.grafana.com/mask-ipv6: true +*/ +declare "mask_ipv6" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "mask_ipv4_value" { + comment = "The regular expression to use to determine if logs should have IPv6 values masked, if you want to mask IPv6 values by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "(?i)true" + optional = true + } + + argument "replace_text" { + comment = "The replacement text to use to for IPv6 matches (default: **IPV6*REDACTED**)" + default = "**IPV6*REDACTED**" + optional = true + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.mask_ipv6.receiver + } + + loki.process "mask_ipv6" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/mask-ipv6 annotation, if true the data will be masked as **IPV6*REDACTED** + stage.match { + pipeline_name = "pipeline for annotation || " + argument.annotation.value + "/mask-ipv6: true" + selector = "{" + argument.__sd_annotation.value + "_mask_ipv6=~\"" + argument.mask_ipv6_value.value + "\"}" + + stage.replace { + // unescaped regex: (([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])) + expression = "(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))" + replace = argument.replace_text.value + } + } + } +} + +/* + Phone Masking + Annotation: logs.grafana.com/mask-phone: true +*/ +declare "mask_phone" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "mask_phone_value" { + comment = "The regular expression to use to determine if logs should have phone numbers masked, if you want to mask phone numbers by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "(?i)true" + optional = true + } + + argument "replace_text" { + comment = "The replacement text to use to for Text matches (default: **PHONE*REDACTED**)" + default = "**PHONE*REDACTED**" + optional = true + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.mask_phone.receiver + } + + loki.process "mask_phone" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/mask-phone annotation, if true the data will be masked as **PHONE*REDACTED** + stage.match { + pipeline_name = "pipeline for annotation || " + argument.annotation.value + "/mask-phone: true" + selector = "{" + argument.__sd_annotation.value + "_mask_phone=~\"" + argument.mask_phone_value.value + "\"}" + + stage.replace { + // unescaped regex: ([\+]?[(]?[0-9]{3}[)]?[-\s\.]?[0-9]{3}[-\s\.]?[0-9]{4,6}) + expression = "([\\+]?[(]?[0-9]{3}[)]?[-\\s\\.]?[0-9]{3}[-\\s\\.]?[0-9]{4,6})" + replace = argument.replace_text.value + } + } + } +} + +/* + SSN Masking + Annotation: logs.grafana.com/mask-ssn: true +*/ +declare "mask_ssn" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "mask_ssn_value" { + comment = "The regular expression to use to determine if logs should have SSNs masked, if you want to mask SSNs by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "(?i)true" + optional = true + } + + argument "replace_text" { + comment = "The replacement text to use to for SSN matches (default: **SSN*REDACTED**)" + default = "**SSN*REDACTED**" + optional = true + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.mask_ssn.receiver + } + + loki.process "mask_ssn" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/mask-ssn annotation, if true the data will be masked as **SSN*REDACTED** + stage.match { + pipeline_name = "pipeline for annotation || " + argument.annotation.value + "/mask-ssn: true" + selector = "{" + argument.__sd_annotation.value + "_mask_ssn=~\"" + argument.mask_phone_value.value + "\"}" + + stage.replace { + // unescaped regex: ([0-9]{3}-[0-9]{2}-[0-9]{4}) + expression = "([0-9]{3}-[0-9]{2}-[0-9]{4})" + replace = argument.replace_text.value + } + } + } +} diff --git a/modules/kubernetes/annotations/logs/utils.alloy b/modules/kubernetes/annotations/logs/utils.alloy new file mode 100644 index 0000000..71c4454 --- /dev/null +++ b/modules/kubernetes/annotations/logs/utils.alloy @@ -0,0 +1,259 @@ +/* + Decolorize + This component is used to remove color codes from logs. + + Annotations: + logs.grafana.com/decolorize: true +*/ +declare "decolorize" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "decolorize_value" { + comment = "The regular expression to use to determine if logs should be decolorized, if you want to decolorize by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "(?i)true" + optional = true + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.decolorize.receiver + } + + loki.process "decolorize" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/drop-trace annotation, if not set or set to true then drop + // any log message with level=trace + stage.match { + selector = "{" + argument.__sd_annotation.value + "_decolorize=~\"" + argument.decolorize_value.value + "\"}" + + stage.decolorize {} + } + + } +} + +/* + Trim + This component is used to trim leading and trailing whitespace from log lines. + + Annotations: + logs.grafana.com/trim: true +*/ +declare "trim" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "trim_value" { + comment = "The regular expression to use to determine if whitespace should be embedded or not, if you want to embed the pod by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "true" + optional = true + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.trim.receiver + } + + loki.process "trim" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/trim annotation, if true trim any leading/trailing whitespace log line + stage.match { + selector = "{" + argument.__sd_annotation.value + "_trim=~\"" + argument.trim_value.value + "\"}" + pipeline_name = "pipeline for annotation || " + argument.annotation.value + "/trim: true" + + stage.template { + source = "trimmed_line" + template = "{{ Trim .Entry }}" + } + + stage.output { + source = "trimmed_line" + } + } + } +} + +/* + Dedup Spaces + This component is used to deduplicate spaces in log lines. + + Annotations: + logs.grafana.com/dedup-spaces: true +*/ +declare "dedup_spaces" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "dedup_value" { + comment = "The regular expression to use to determine if multiple spaces should be replaced with a single space or not, if you want to always dedup use '.*' or 'true|' (default: true)" + default = "true" + optional = true + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.dedup_spaces.receiver + } + + loki.process "dedup_spaces" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/trim annotation, if true trim any leading/trailing whitespace log line + stage.match { + selector = "{" + argument.__sd_annotation.value + "_dedup_spaces=~\"" + argument.dedup_value.value + "\"}" + pipeline_name = "pipeline for annotation || " + argument.annotation.value + "/dedup-spaces: true" + + stage.replace { + expression = "(\\s{2,})" + replace = " " + } + } + } + +} + +/* + Sampling + This component is used to sample logs + + Annotations: + logs.grafana.com/sampling: true +*/ +declare "sampling" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, This component should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: logs.grafana.com)" + default = "logs.grafana.com" + optional = true + } + + argument "sampling_value" { + comment = "The regular expression to use to determine if the log should be sampled or not, if you want to sample the pod by default without setting the annotations everywhere use '.*' or 'true|' (default: true)" + default = "true" + optional = true + } + + argument "sampling_rate" { + comment = "The sampling rate in a range of [0, 1] (default: 0.25)" + optional = true + default = 0.25 + } + + argument "sampling_reason" { + comment = "The sampling reason (default: annotation_sampling)" + optional = true + default = "annotation_sampling" + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "logs.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + export "annotation" { + value = argument.annotation.value + } + + export "receiver" { + value = loki.process.sampling.receiver + } + + loki.process "sampling" { + forward_to = argument.forward_to.value + + // check logs.grafana.com/sampling annotation, if true the logs will be sampled at the specified rate + stage.match { + selector = "{" + argument.__sd_annotation.value + "_sampling=~\"" + argument.sampling_value.value + "\"}" + pipeline_name = "pipeline for annotation || " + argument.annotation.value + "/sampling: true" + + stage.sampling { + rate = argument.sampling_rate.value + drop_counter_reason = argument.sampling_reason.value + } + } + } + +} diff --git a/modules/kubernetes/annotations/metrics.alloy b/modules/kubernetes/annotations/metrics.alloy index 2445aa7..f037cfa 100644 --- a/modules/kubernetes/annotations/metrics.alloy +++ b/modules/kubernetes/annotations/metrics.alloy @@ -109,7 +109,7 @@ declare "kubernetes" { argument "annotation" { // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ - // k8s selectors d not support a logical OR, if multiple types of annotations are needed, this module should be invoked multiple times + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, this module should be invoked multiple times // i.e. metrics.grafana.com, then again for prometheus.io comment = "The annotation namespace to use (default: metrics.grafana.com)" default = "metrics.grafana.com" @@ -117,7 +117,7 @@ declare "kubernetes" { } argument "tenant" { - comment = "The tenant to write metrics to. This does not have to be the tenantId, this is the value to look for in the logs.agent.grafana.com/tenant annotation, and this can be a regex." + comment = "The tenant to write metrics to. This does not have to be the tenantId, this is the value to look for in the metrics.grafana.com/tenant annotation, and this can be a regex." optional = true default = ".*" } @@ -151,7 +151,7 @@ declare "kubernetes" { } // annotations service discovery - discovery.kubernetes "annotations" { + discovery.kubernetes "metric_annotations" { role = coalesce(argument.role.value, "endpoints") selectors { @@ -165,8 +165,8 @@ declare "kubernetes" { } } - discovery.relabel "annotations" { - targets = discovery.kubernetes.annotations.targets + discovery.relabel "metric_annotations" { + targets = discovery.kubernetes.metric_annotations.targets /**************************************************************************************************************** * Handle Targets to Keep or Drop @@ -480,7 +480,7 @@ declare "kubernetes" { } export "output" { - value = discovery.relabel.annotations.output + value = discovery.relabel.metric_annotations.output } } diff --git a/modules/kubernetes/annotations/probes.alloy b/modules/kubernetes/annotations/probes.alloy index 92402ed..109d154 100644 --- a/modules/kubernetes/annotations/probes.alloy +++ b/modules/kubernetes/annotations/probes.alloy @@ -78,13 +78,13 @@ declare "kubernetes" { argument "label_selectors" { // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ - comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=grafana-agent\"])" + comment = "The label selectors to use to find matching targets (default: [])" optional = true } argument "annotation" { // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ - // k8s selectors d not support a logical OR, if multiple types of annotations are needed, this module should be invoked multiple times + // k8s selectors do not support a logical OR, if multiple types of annotations are needed, this module should be invoked multiple times // i.e. probes.grafana.com, then again for prometheus.io comment = "The annotation namespace to use (default: probes.grafana.com)" default = "probes.grafana.com" @@ -112,7 +112,7 @@ declare "kubernetes" { } // annotations service discovery - discovery.kubernetes "probes" { + discovery.kubernetes "probe_annotations" { role = coalesce(argument.role.value, "service") selectors { @@ -127,7 +127,7 @@ declare "kubernetes" { } discovery.relabel "probes" { - targets = discovery.kubernetes.probes.targets + targets = discovery.kubernetes.probe_annotations.targets /**************************************************************************************************************** * Handle Targets to Keep or Drop @@ -403,7 +403,7 @@ declare "kubernetes" { } export "output" { - value = discovery.relabel.probes.output + value = discovery.relabel.probe_annotations.output } } diff --git a/modules/kubernetes/cert-manager/README.md b/modules/kubernetes/cert-manager/README.md index 26178a9..256ff1a 100644 --- a/modules/kubernetes/cert-manager/README.md +++ b/modules/kubernetes/cert-manager/README.md @@ -74,7 +74,7 @@ The following labels are automatically added to exported targets. The following example will scrape all cert_manager instances in cluster. -```river +```alloy import.git "cert_manager" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -110,7 +110,7 @@ prometheus.remote_write "local" { The following example will scrape cert_manager for metrics on the local machine. -```river +```alloy import.git "cert_manager" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/kubernetes/core/README.md b/modules/kubernetes/core/README.md index bd8b770..2e6193d 100644 --- a/modules/kubernetes/core/README.md +++ b/modules/kubernetes/core/README.md @@ -244,7 +244,7 @@ The following labels are automatically added to exported targets. The following example will scrape all agents in cluster. -```river +```alloy import.git "k8s" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/kubernetes/core/logs.alloy b/modules/kubernetes/core/logs.alloy new file mode 100644 index 0000000..8a8413a --- /dev/null +++ b/modules/kubernetes/core/logs.alloy @@ -0,0 +1,164 @@ +declare "from_worker" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + export "receiver" { + value = loki.process.parse.receiver + } + + discovery.relabel "worker_logs" { + targets = argument.targets.value + + // set the __path__, this is automatically translated as a label of filename (which should be dropped or normalized) + // DO NOT delete this line as it is needed to tail the pod logs on the node + rule { + action = "replace" + separator = "/" + source_labels = [ + "__meta_kubernetes_pod_uid", + "__meta_kubernetes_pod_container_name", + ] + replacement = "/var/log/pods/*$1/*.log" + target_label = "__path__" + } + + // set the __host__ + rule { + action = "replace" + source_labels = ["__meta_kubernetes_pod_node_name"] + target_label = "__host__" + } + + // as a result of kubernetes service discovery for pods, all of the meta data information is exposed in labels + // __meta_kubernetes_pod_*, including __meta_kubernetes_pod_container_id which can be used to determine what + // the pods container runtime is, docker (docker://...) or containerd (containerd://...) this will inform us + // which parsing stage to use. However, any labels that begin with __* are not passed to loki.process + // (pipeline) stages. Use a relabeling stage to set a label that can be used a LogQL selector in the stage + // below so parsing can be automatically determined, then drop the label from the loki.process stage. + // set the container runtime as a label + rule { + action = "replace" + source_labels = ["__meta_kubernetes_pod_container_id"] + regex = "^(\\S+):\\/\\/.+$" + replacement = "$1" + target_label = "tmp_container_runtime" + } + + // make all labels on the pod available to the pipeline as labels, + // they are omitted before write via labelallow unless explicitly set + rule { + action = "labelmap" + regex = "__meta_kubernetes_pod_label_(.+)" + } + + // make all annotations on the pod available to the pipeline as labels, + // they are omitted before write via labelallow unless explicitly set + rule { + action = "labelmap" + regex = "__meta_kubernetes_pod_annotation_(.+)" + } + } + + // find eligible files on the worker + local.file_match "pods" { + path_targets = discovery.relabel.worker_logs.output + } + + // tail the files + loki.source.file "pods" { + targets = local.file_match.pods.targets + forward_to = [loki.process.parse.receiver] + } + + // parse the log based on the container runtime + loki.process "parse" { + forward_to = argument.forward_to.value + /******************************************************************************* + * Container Runtime Parsing + ********************************************************************************/ + // if the label tmp_container_runtime from above is containerd parse using cri + stage.match { + selector = "{tmp_container_runtime=~\"containerd|cri-o\"}" + // the cri processing stage extracts the following k/v pairs: log, stream, time, flags + stage.cri {} + + // Set the extract flags and stream values as labels + stage.labels { + values = { + flags = "", + stream = "", + } + } + } + + // if the label tmp_container_runtime from above is docker parse using docker + stage.match { + selector = "{tmp_container_runtime=\"docker\"}" + // the docker processing stage extracts the following k/v pairs: log, stream, time + stage.docker {} + + // Set the extract stream value as a label + stage.labels { + values = { + stream = "", + } + } + } + + // drop the temporary container runtime label as it is no longer needed + stage.label_drop { + values = ["tmp_container_runtime"] + } + } + +} + +// declare "api" { +// +// } + +declare "filename_normalize" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + export "receiver" { + value = loki.process.normalize_filename.receiver + } + + loki.process "normalize_filename" { + forward_to = argument.forward_to.value + + /******************************************************************************* + * Normalize Filename + ******************************************************************************* + Normalize the filename, the label "filename" is automatically created from discovered files in the matching path based on the + __path__ label from the relabel_configs. This has extremely high cardinality, it can be useful for a pod with multiple + containers/sidecars to know where the log came from but we can greatly reduce the cardinality. + Example: + Filename: /var/log/pods/agents_agent-logs-grafana-agent-k8hpm_5cafa323-a7ed-4703-9220-640d3e44a5e3/config-reloader/0.log + Becomes: /var/log/pods/agents/agent-logs-grafana-agent/config-reloader.log + */ + stage.regex { + // unescaped regex: ^(?P\/([^\/_]+\/)+)[^\/]+\/(?P[^\/]+)\/[0-9]+\.log + expression = "^(?P\\/([^\\/_]+\\/)+)[^\\/]+\\/(?P[^\\/]+)\\/[0-9]+\\.log" + source = "filename" + } + + stage.template { + source = "normalized_filename" + template = "{{ .path }}{{ .job }}/{{ .container_folder }}.log" + } + + stage.labels { + values = { + filename = "normalized_filename", + } + } + } +} diff --git a/modules/kubernetes/konnectivity-agent/README.md b/modules/kubernetes/konnectivity-agent/README.md index 72c5c7c..3eb5dd4 100644 --- a/modules/kubernetes/konnectivity-agent/README.md +++ b/modules/kubernetes/konnectivity-agent/README.md @@ -72,7 +72,7 @@ The following labels are automatically added to exported targets. The following example will scrape all konnectivity instances in cluster. -```river +```alloy import.git "konnectivity" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/kubernetes/kube-state-metrics/README.md b/modules/kubernetes/kube-state-metrics/README.md index 8934465..223961b 100644 --- a/modules/kubernetes/kube-state-metrics/README.md +++ b/modules/kubernetes/kube-state-metrics/README.md @@ -67,7 +67,7 @@ The following labels are automatically added to exported targets. The following example will scrape all kube-state-metrics in cluster. -```river +```alloy import.git "ksm" { repository = "https://github.com/grafana/ksm-modules.git" revision = "main" diff --git a/modules/kubernetes/opencost/README.md b/modules/kubernetes/opencost/README.md index 8f61fcb..ac287f0 100644 --- a/modules/kubernetes/opencost/README.md +++ b/modules/kubernetes/opencost/README.md @@ -97,7 +97,7 @@ The following labels are automatically added to exported targets. The following example will scrape all opencost instances in cluster. -```river +```alloy import.git "opencost" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/networking/consul/README.md b/modules/networking/consul/README.md index 6f84285..8bd5b3c 100644 --- a/modules/networking/consul/README.md +++ b/modules/networking/consul/README.md @@ -98,7 +98,7 @@ The following labels are automatically added to exported targets. The following example will scrape all consul instances in cluster. -```river +```alloy import.git "consul" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -134,7 +134,7 @@ prometheus.remote_write "local" { The following example will scrape consul for metrics on the local machine. -```river +```alloy import.git "consul" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/networking/haproxy/README.md b/modules/networking/haproxy/README.md index 49c8f02..6c75cbd 100644 --- a/modules/networking/haproxy/README.md +++ b/modules/networking/haproxy/README.md @@ -99,7 +99,7 @@ The following labels are automatically added to exported targets. The following example will scrape all haproxy instances in cluster. -```river +```alloy import.git "haproxy" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -135,7 +135,7 @@ prometheus.remote_write "local" { The following example will scrape haproxy for metrics on the local machine. -```river +```alloy import.git "haproxy" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/source-control/gitlab/README.md b/modules/source-control/gitlab/README.md index a8a12a6..9a78a00 100644 --- a/modules/source-control/gitlab/README.md +++ b/modules/source-control/gitlab/README.md @@ -97,7 +97,7 @@ The following labels are automatically added to exported targets. The following example will scrape all gitlab instances in cluster. -```river +```alloy import.git "gitlab" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -133,7 +133,7 @@ prometheus.remote_write "local_primary" { The following example will scrape gitlab for metrics on the local machine. -```river +```alloy import.git "gitlab" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/system/node-exporter/README.md b/modules/system/node-exporter/README.md index 9d908bb..0c77a9a 100644 --- a/modules/system/node-exporter/README.md +++ b/modules/system/node-exporter/README.md @@ -99,7 +99,7 @@ The following labels are automatically added to exported targets. The following example will scrape all node_exporter instances in cluster. -```river +```alloy import.git "node_exporter" { repository = "https://github.com/node_exporter/agent-modules.git" revision = "main" @@ -135,7 +135,7 @@ prometheus.remote_write "local" { The following example will scrape node_exporter for metrics on the local machine. -```river +```alloy import.git "node_exporter" { repository = "https://github.com/node_exporter/agent-modules.git" revision = "main" diff --git a/modules/ui/grafana/README.md b/modules/ui/grafana/README.md index 7f66aa4..831254a 100644 --- a/modules/ui/grafana/README.md +++ b/modules/ui/grafana/README.md @@ -99,7 +99,7 @@ The following labels are automatically added to exported targets. The following example will scrape all Grafana instances in cluster. -```river +```alloy import.git "grafana" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" @@ -135,7 +135,7 @@ prometheus.remote_write "local" { The following example will scrape grafana for metrics on the local machine. -```river +```alloy import.git "grafana" { repository = "https://github.com/grafana/flow-modules.git" revision = "main" diff --git a/modules/utils/logs/log-levels.alloy b/modules/utils/logs/log-levels.alloy new file mode 100644 index 0000000..e6d7f76 --- /dev/null +++ b/modules/utils/logs/log-levels.alloy @@ -0,0 +1,225 @@ +declare "default_level" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "default_level" { + comment = "The default log level to use if one is not set (default: unknown)" + optional = true + default = "unknown" + } + + export "receiver" { + value = loki.process.level_default.receiver + } + + loki.process "level_default" { + forward_to = argument.forward_to.value + + /******************************************************************************* + * Log-Level Parsing + ********************************************************************************/ + // default level to unknown + stage.static_labels { + values = { + level = argument.default_level.value, + } + } + + // default level to unknown + stage.static_labels { + values = { + log_type = "unknown", + } + } + + // check to see if the log line matches the klog format (https://github.com/kubernetes/klog) + stage.match { + // unescaped regex: ([IWED][0-9]{4}\s+[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+) + selector = "{level=\"" + argument.default_level.value + "\"} |~ \"([IWED][0-9]{4}\\\\s+[0-9]{2}:[0-9]{2}:[0-9]{2}\\\\.[0-9]+)\"" + + // extract log level, klog uses a single letter code for the level followed by the month and day i.e. I0119 + stage.regex { + expression = "((?P[A-Z])[0-9])" + } + + // if the extracted level is I set INFO + stage.replace { + source = "level" + expression = "(I)" + replace = "INFO" + } + + // if the extracted level is W set WARN + stage.replace { + source = "level" + expression = "(W)" + replace = "WARN" + } + + // if the extracted level is E set ERROR + stage.replace { + source = "level" + expression = "(E)" + replace = "ERROR" + } + + // if the extracted level is I set INFO + stage.replace { + source = "level" + expression = "(D)" + replace = "DEBUG" + } + + // set the log_type + stage.static_labels{ + values = { + log_type = "klog", + } + } + + // set the extracted level to be a label + stage.labels { + values = { + level = "", + } + } + } + + // check to see if the log line matches the zerolog format + stage.match { + // unescaped regex: ^.+(TRC|DBG|INF|WRN|ERR|FTL|PNC)[^=]+(\w+=("[^"]*"|\S+))(\s+(\w+=("[^"]*"|\S+)))*\s*$ + selector = "{level=\"" + argument.default_level.value + "\"} |~ \"^.+(TRC|DBG|INF|WRN|ERR|FTL|PNC)[^=]+(\\\\w+=(\\\"[^\\\"]*\\\"|\\\\S+))(\\\\s+(\\\\w+=(\\\"[^\\\"]*\\\"|\\\\S+)))*\\\\s*$\"" + + // set the log_type + stage.static_labels{ + values = { + log_type = "zerolog", + } + } + + // extract the level from the log + // unescaped regex: (?P[0-9]{4}-[0-9]{2}-[0-9]{2}(T|\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]+[^ ]*\s+)(?P(TRC|DBG|INF|WRN|ERR|FTL|PNC)).+ + stage.regex { + expression = "(?P[0-9]{4}-[0-9]{2}-[0-9]{2}(T|\\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]+[^ ]*\\s+)(?P(TRC|DBG|INF|WRN|ERR|FTL|PNC)).+" + } + + // set the extracted level to be a label + stage.labels { + values = { + level = "", + } + } + } + + // if the level is still unknown, do one last attempt at detecting it based on common levels + stage.match { + selector = "{level=\"" + argument.default_level.value + "\"}" + + // unescaped regex: (?i)(?:"(?:level|loglevel|levelname|lvl|levelText|SeverityText)":\s*"|\s*(?:level|loglevel|levelText|lvl)="?|\s+\[?)(?P(DEBUG?|DBG|INFO?(RMATION)?|WA?RN(ING)?|ERR(OR)?|CRI?T(ICAL)?|FATAL|FTL|NOTICE|TRACE|TRC|PANIC|PNC|ALERT|EMERGENCY))("|\s+|-|\s*\]) + stage.regex { + expression = "(?i)(?:\"(?:level|loglevel|levelname|lvl|levelText|SeverityText)\":\\s*\"|\\s*(?:level|loglevel|levelText|lvl)=\"?|\\s+\\[?)(?P(DEBUG?|DBG|INFO?(RMATION)?|WA?RN(ING)?|ERR(OR)?|CRI?T(ICAL)?|FATAL|FTL|NOTICE|TRACE|TRC|PANIC|PNC|ALERT|EMERGENCY))(\"|\\s+|-|\\s*\\])" + } + + // set the extracted level to be a label + stage.labels { + values = { + level = "", + } + } + } + } +} + +declare "normalize_level" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "transform" { + comment = "The transformation to apply to the level can be 'ToLower' or 'ToUpper' (default: ToLower)" + optional = true + default = "ToLower" + } + + export "receiver" { + value = loki.process.level_normalize.receiver + } + + loki.process "level_normalize" { + forward_to = argument.forward_to.value + + /******************************************************************************* + * Log-Level Normalization + ********************************************************************************/ + // normalize debug level, handles TRC, TRAC, or TRACE + stage.replace { + source = "level" + expression = "(?i)(trace?|trc)\\d*" + replace = "trace" + } + + // normalize debug level, handles DBG, DEBU, DEBUG, DEBUG1, DEBUG2, DEBUG3, DEBUG4, DEBUG5, etc. + stage.replace { + source = "level" + expression = "(?i)(debug?|dbg)\\d*" + replace = "debug" + } + + // normalize info level handles INF, INFO, INFORMATION, or INFORMATIONAL + stage.replace { + source = "level" + expression = "(?i)(info?(mation(al)?)?)" + replace = "info" + } + + // normalize the warning level handles WRN, WARN or WARNING + stage.replace { + source = "level" + expression = "(?i)(wa?rn(ing)?)" + replace = "warning" + } + + // normalize the error level handles ERR or ERROR + stage.replace { + source = "level" + expression = "(?i)(err(or)?)" + replace = "error" + } + + // normalize the fatal level handles FTL or FATAL + stage.replace { + source = "level" + expression = "(?i)(fatal|ftl)" + replace = "fatal" + } + + // normalize the critical level handles CRIT or CRITICAL + stage.replace { + source = "level" + expression = "(?i)(crit(ical)?)" + replace = "critical" + } + + // normalize the panic level handles PNC or PANIC + stage.replace { + source = "level" + expression = "(?i)(panic|pnc)" + replace = "critical" + } + + // the level value could be anything fatal, notice, alert, emergency, there are no combinations / abbreviations to normalize for these + // but we can still convert to lower or upper case + stage.template { + source = "level" + template = "{{ " + argument.transform.value + " .Value }}" + } + + // set the extracted level to be a label + stage.labels { + values = { + level = "", + } + } + } +} diff --git a/modules/utils/logs/logs-metrics.alloy b/modules/utils/logs/logs-metrics.alloy new file mode 100644 index 0000000..043202a --- /dev/null +++ b/modules/utils/logs/logs-metrics.alloy @@ -0,0 +1,129 @@ +declare "pre_process_metrics" { + argument "keep_labels" { + optional = true + comment = "List of labels to keep before the metrics are generated" + default = [ + "app", + "cluster", + "component", + "env", + "level", + "namespace", + "region", + "service", + "squad", + "team", + "workload", + ] + } + + argument "prefix" { + comment = "the suffix to use with the metric names (default: log_)" + optional = true + default = "log_" + } + + argument "suffix" { + comment = "the suffix to use with the metric names (default: _pre_total)" + optional = true + default = "_pre_total" + } + + export "receiver" { + value = loki.process.pre_process_metrics.receiver + } + + loki.process "pre_process_metrics" { + forward_to = [] + + stage.label_keep { + values = argument.keep_labels.value + } + + stage.metrics { + metric.counter { + name = "lines" + argument.suffix.value + description = "total number of log lines ingested before processing" + prefix = argument.prefix.value + match_all = true + action = "inc" + max_idle_duration = "24h" + } + } + + stage.metrics { + metric.counter { + name = "bytes" + argument.suffix.value + description = "total number of log bytes ingested before processing" + prefix = argument.prefix.value + match_all = true + count_entry_bytes = true + action = "add" + max_idle_duration = "24h" + } + } + } +} + +declare "post_process_metrics" { + argument "keep_labels" { + optional = true + comment = "List of labels to keep before the metrics are generated" + default = [ + "app", + "cluster", + "component", + "env", + "level", + "namespace", + "region", + "service", + "squad", + "team", + "workload", + ] + } + + argument "prefix" { + comment = "the suffix to use with the metric names (default: log_)" + optional = true + default = "log_" + } + + argument "suffix" { + comment = "the suffix to use with the metric names (default: _total)" + optional = true + default = "_total" + } + + export "receiver" { + value = loki.process.post_process_metrics.receiver + } + + loki.process "post_process_metrics" { + forward_to = [] + + stage.metrics { + metric.counter { + name = "lines" + argument.suffix.value + description = "total number of log lines ingested before processing" + prefix = argument.prefix.value + match_all = true + action = "inc" + max_idle_duration = "24h" + } + } + + stage.metrics { + metric.counter { + name = "bytes" + argument.suffix.value + description = "total number of log bytes ingested before processing" + prefix = argument.prefix.value + match_all = true + count_entry_bytes = true + action = "add" + max_idle_duration = "24h" + } + } + } +} diff --git a/modules/utils/logs/utils.alloy b/modules/utils/logs/utils.alloy new file mode 100644 index 0000000..7160842 --- /dev/null +++ b/modules/utils/logs/utils.alloy @@ -0,0 +1,82 @@ +declare "structured_metadata" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "metadata" { + optional = true + } + + export "receiver" { + value = loki.process.structured_metadata.receiver + } + + /* + As all of the pod labels and annotations we transformed into labels in the previous relabelings to make + them available to the pipeline processing we need to ensure they are not automatically created in Loki. + This would result in an extremely high number of labels and values severely impacting query performance. + Not every log has to contain these labels, but this list should reflect the set of labels that you want + to explicitly allow. + */ + loki.process "structured_metadata" { + forward_to = argument.forward_to.value + + stage.structured_metadata { + values = coalesce(argument.metadata.value, { + filename = "filename", + instance = "instance", + log_type = "log_type", + version = "version", + helm_chart = "helm_sh_chart", + pod = "pod", + }) + } + + } +} + +declare "keep_labels" { + argument "forward_to" { + comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + } + + argument "keep_labels" { + optional = true + comment = "List of labels to keep before the log message is written to Loki" + default = [ + "app", + "cluster", + "component", + "container", + "env", + "job", + "level", + "namespace", + "region", + "service", + "squad", + "team", + "workload", + ] + } + + export "receiver" { + value = loki.process.keep_labels.receiver + } + + /* + As all of the pod labels and annotations we transformed into labels in the previous relabelings to make + them available to the pipeline processing we need to ensure they are not automatically created in Loki. + This would result in an extremely high number of labels and values severely impacting query performance. + Not every log has to contain these labels, but this list should reflect the set of labels that you want + to explicitly allow. + */ + loki.process "keep_labels" { + forward_to = argument.forward_to.value + + stage.label_keep { + values = argument.keep_labels.value + } + + } +}