From c50a43a20627b5b5792fa664031b32c5b644d98e Mon Sep 17 00:00:00 2001 From: Chris Mark Date: Wed, 27 Mar 2024 22:14:30 +0200 Subject: [PATCH] [pkg/stanza] Add regexp based tests & examples for filter operator (#31698) **Description:** This PR adds some additional unit tests and examples to cover the regexp based filter-in/filter-out functionalities of the [filter operator](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/stanza/docs/operators/filter.md). This is an important functionality spotted during a comparison of the `filelogreceiver`'s capabilities with those of [Filebeat](https://github.com/elastic/beats/tree/main/filebeat#filebeat). [Filebeat](https://github.com/elastic/beats/tree/main/filebeat#filebeat)'s [filestream input](https://github.com/elastic/beats/tree/main/filebeat/input/filestream) supports this filtering with its [include_lines](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-filestream.html#filebeat-input-filestream-include-lines) and [exclude_lines](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-filestream.html#filebeat-input-filestream-exclude-lines) options. The added tests and examples are to ensure the parity of this capability. **Link to tracking Issue:** **Testing:** - [FilterOutRegexp](https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/31698/files#diff-01d783f62fa2a61d9fbfe9342f0ea20629604563c550cee56b0fa0ba179556a1R50) - [FilterInRegexp](https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/31698/files#diff-01d783f62fa2a61d9fbfe9342f0ea20629604563c550cee56b0fa0ba179556a1R60) **Documentation:** Added the [Filtering log messages based on content](https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/31698/files#diff-7156c0db24790afae5276c0908dd0e02ecc7c23947607996d50008b8e4ab574dR1) example. PS: I'm not sure if that change technically require a changelog entry. If so, please let me know. Signed-off-by: ChrsMark --- examples/logline-filtering/README.md | 13 ++++++++++++ .../otel-col-config-filter-in-logs.yaml | 17 ++++++++++++++++ .../otel-col-config-filter-out-logs.yaml | 17 ++++++++++++++++ .../transformer/filter/filter_test.go | 20 +++++++++++++++++++ 4 files changed, 67 insertions(+) create mode 100644 examples/logline-filtering/README.md create mode 100644 examples/logline-filtering/otel-col-config-filter-in-logs.yaml create mode 100644 examples/logline-filtering/otel-col-config-filter-out-logs.yaml diff --git a/examples/logline-filtering/README.md b/examples/logline-filtering/README.md new file mode 100644 index 000000000000..9ae695a33ae1 --- /dev/null +++ b/examples/logline-filtering/README.md @@ -0,0 +1,13 @@ +## Filtering log messages based on content + +Filelog receiver provides support for filtering logs based on their content. This can be achieved by using +the [filter operator](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/stanza/docs/operators/filter.md), +configured with matching regular expressions. + +With this happening at the collection point, a lot of resources at the destination backend +can be saved since no additional processing would need to take place. + +A full configuration example on how to filter out logs that start with the `INFO:` pattern is +provided in the [example config](./otel-col-config-filter-out-logs.yaml). +A full configuration example on how to only collect logs that start with the `WARN:` pattern is provided in +the [example config](./otel-col-config-filter-in-logs.yaml) \ No newline at end of file diff --git a/examples/logline-filtering/otel-col-config-filter-in-logs.yaml b/examples/logline-filtering/otel-col-config-filter-in-logs.yaml new file mode 100644 index 000000000000..a03f7b08b36f --- /dev/null +++ b/examples/logline-filtering/otel-col-config-filter-in-logs.yaml @@ -0,0 +1,17 @@ +receivers: + filelog: + include: [/var/log/busybox/simple.log] + operators: + - type: filter + expr: 'body not matches "^WARN:"' + +service: + pipelines: + logs: + receivers: [filelog] + exporters: [otlp/custom] + processors: [] + +exporters: + otlp/custom: + endpoint: http://0.0.0.0:4242 diff --git a/examples/logline-filtering/otel-col-config-filter-out-logs.yaml b/examples/logline-filtering/otel-col-config-filter-out-logs.yaml new file mode 100644 index 000000000000..ff19b50c6889 --- /dev/null +++ b/examples/logline-filtering/otel-col-config-filter-out-logs.yaml @@ -0,0 +1,17 @@ +receivers: + filelog: + include: [/var/log/busybox/simple.log] + operators: + - type: filter + expr: 'body matches "^INFO:"' + +service: + pipelines: + logs: + receivers: [filelog] + exporters: [otlp/custom] + processors: [] + +exporters: + otlp/custom: + endpoint: http://0.0.0.0:4242 diff --git a/pkg/stanza/operator/transformer/filter/filter_test.go b/pkg/stanza/operator/transformer/filter/filter_test.go index 5f9eed013b8f..ced34d202e11 100644 --- a/pkg/stanza/operator/transformer/filter/filter_test.go +++ b/pkg/stanza/operator/transformer/filter/filter_test.go @@ -46,6 +46,26 @@ func TestTransformer(t *testing.T) { `body.message == "test_message"`, false, }, + { + "FilterOutRegexp", + &entry.Entry{ + Body: map[string]any{ + "message": "INFO: this is an info message", + }, + }, + `body.message matches "^INFO:"`, + true, + }, + { + "FilterInRegexp", + &entry.Entry{ + Body: map[string]any{ + "message": "WARN: this is a warning message", + }, + }, + `body.message not matches "^WARN:"`, + false, + }, { "MatchAttribute", &entry.Entry{