From 18cc174da13c664f2a528321f5227e3791d6836e Mon Sep 17 00:00:00 2001 From: George Robinson Date: Thu, 11 Jan 2024 10:31:48 +0000 Subject: [PATCH 1/7] UTF-8 in Mimir --- cmd/mimir/config-descriptor.json | 11 ++ cmd/mimir/help-all.txt.tmpl | 2 + .../configuration-parameters/index.md | 8 ++ pkg/alertmanager/alertmanager_config.go | 93 +++++++++++++ pkg/alertmanager/alertmanager_config_test.go | 129 ++++++++++++++++++ pkg/alertmanager/api.go | 3 + pkg/alertmanager/multitenant.go | 15 ++ pkg/mimir/modules.go | 12 +- 8 files changed, 269 insertions(+), 4 deletions(-) create mode 100644 pkg/alertmanager/alertmanager_config.go create mode 100644 pkg/alertmanager/alertmanager_config_test.go diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json index 96426a111ec..509c44a4e45 100644 --- a/cmd/mimir/config-descriptor.json +++ b/cmd/mimir/config-descriptor.json @@ -13618,6 +13618,17 @@ "fieldFlag": "alertmanager.enable-state-cleanup", "fieldType": "boolean", "fieldCategory": "advanced" + }, + { + "kind": "field", + "name": "utf8_strict_mode", + "required": false, + "desc": "Enable UTF-8 strict mode. Allows UTF-8 in the matchers for routes and inhibition rules, in silences, and in the labels for alerts. It is recommended to check both alertmanager_matchers_disagree and alertmanager_matchers_incompatible metrics before using this mode as otherwise some tenant configurations might fail to load.", + "fieldValue": null, + "fieldDefaultValue": false, + "fieldFlag": "alertmanager.utf8-strict-mode", + "fieldType": "boolean", + "fieldCategory": "advanced" } ], "fieldValue": null, diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl index 7ccd20392a2..119c2944c03 100644 --- a/cmd/mimir/help-all.txt.tmpl +++ b/cmd/mimir/help-all.txt.tmpl @@ -273,6 +273,8 @@ Usage of ./cmd/mimir/mimir: Directory to store Alertmanager state and temporarily configuration files. The content of this directory is not required to be persisted between restarts unless Alertmanager replication has been disabled. (default "./data-alertmanager/") -alertmanager.storage.retention duration How long should we store stateful data (notification logs and silences). For notification log entries, refers to how long should we keep entries before they expire and are deleted. For silences, refers to how long should tenants view silences after they expire and are deleted. (default 120h0m0s) + -alertmanager.utf8-strict-mode + Enable UTF-8 strict mode. Allows UTF-8 in the matchers for routes and inhibition rules, in silences, and in the labels for alerts. It is recommended to check both alertmanager_matchers_disagree and alertmanager_matchers_incompatible metrics before using this mode as otherwise some tenant configurations might fail to load. -alertmanager.web.external-url string The URL under which Alertmanager is externally reachable (eg. could be different than -http.alertmanager-http-prefix in case Alertmanager is served via a reverse proxy). This setting is used both to configure the internal requests router and to generate links in alert templates. If the external URL has a path portion, it will be used to prefix all HTTP endpoints served by Alertmanager, both the UI and API. (default http://localhost:8080/alertmanager) -api.skip-label-name-validation-header-enabled diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md index b5c24b67b8c..ed1724ec240 100644 --- a/docs/sources/mimir/configure/configuration-parameters/index.md +++ b/docs/sources/mimir/configure/configuration-parameters/index.md @@ -2251,6 +2251,14 @@ alertmanager_client: # removed for any tenant that does not have a configuration. # CLI flag: -alertmanager.enable-state-cleanup [enable_state_cleanup: | default = true] + +# (advanced) Enable UTF-8 strict mode. Allows UTF-8 in the matchers for routes +# and inhibition rules, in silences, and in the labels for alerts. It is +# recommended to check both alertmanager_matchers_disagree and +# alertmanager_matchers_incompatible metrics before using this mode as otherwise +# some tenant configurations might fail to load. +# CLI flag: -alertmanager.utf8-strict-mode +[utf8_strict_mode: | default = false] ``` ### alertmanager_storage diff --git a/pkg/alertmanager/alertmanager_config.go b/pkg/alertmanager/alertmanager_config.go new file mode 100644 index 00000000000..a486ff91c31 --- /dev/null +++ b/pkg/alertmanager/alertmanager_config.go @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: AGPL-3.0-only +// This file contains code for the migration from classic mode to UTF-8 strict +// mode in the Alertmanager. It is intended to be used to gather data about +// configurations that are incompatible with the UTF-8 matchers parser, so +// action can be taken to fix those configurations before enabling the mode. + +package alertmanager + +import ( + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/prometheus/alertmanager/matchers/compat" + "gopkg.in/yaml.v3" + + "github.com/grafana/mimir/pkg/alertmanager/alertspb" +) + +// matchersConfig is a simplified version of an Alertmanager configuration +// containing just the configuration options that are matchers. matchersConfig +// is used to validate that existing configurations are forwards compatible with +// the new UTF-8 parser in Alertmanager (see the matchers/parse package). +type matchersConfig struct { + Route *matchersRoute `yaml:"route,omitempty" json:"route,omitempty"` + InhibitRules []*matchersInhibitionRule `yaml:"inhibit_rules,omitempty" json:"inhibit_rules,omitempty"` +} + +type matchersRoute struct { + Matchers []string `yaml:"matchers,omitempty" json:"matchers,omitempty"` + Routes []*matchersRoute `yaml:"routes,omitempty" json:"routes,omitempty"` +} + +type matchersInhibitionRule struct { + SourceMatchers []string `yaml:"source_matchers,omitempty" json:"source_matchers,omitempty"` + TargetMatchers []string `yaml:"target_matchers,omitempty" json:"target_matchers,omitempty"` +} + +// validateMatchersInConfigDesc validates that a configuration is forwards compatible with the +// UTF-8 matchers parser (matchers/parse). It does so via loading the same configuration a second +// time but instead via the fallback parser which emits logs and metrics about incompatible inputs +// and disagreement. If no incompatible inputs or disagreement are found, then the Alertmanager +// can be switched to the UTF-8 strict mode. Otherwise, configurations should be fixed before +// enabling the mode. +func validateMatchersInConfigDesc(logger log.Logger, metrics *compat.Metrics, origin string, cfg alertspb.AlertConfigDesc) { + // Do not add origin to the logger as it's added in the compat package. + logger = log.With(logger, "user", cfg.User) + parseFn := compat.FallbackMatchersParser(logger, metrics) + matchersCfg := matchersConfig{} + if err := yaml.Unmarshal([]byte(cfg.RawConfig), &matchersCfg); err != nil { + level.Warn(logger).Log("msg", "Failed to load configuration in validateMatchersInConfigDesc", "origin", origin, "err", err) + return + } + validateRoute(logger, parseFn, origin, matchersCfg.Route, cfg.User) + validateInhibitionRules(logger, parseFn, origin, matchersCfg.InhibitRules, cfg.User) +} + +func validateRoute(logger log.Logger, parseFn compat.ParseMatchers, origin string, r *matchersRoute, user string) { + if r == nil { + // This shouldn't be possible, but if somehow a tenant does have a nil route this prevents + // a nil pointer dereference and a subsequent panic. + return + } + for _, m := range r.Matchers { + // If parseFn returns an error then the input is invalid in both classic mode and UTF-8 + // strict mode. The alertmanager_matchers_invalid metric will be incremented in the compat + // package, and all occurrences of disagreement and incompatible inputs will be logged. + // However, invalid inputs are not logged there, so we log it here just in case we need + // this information. Though, in general, we are not concerned about inputs that are invalid + // in both modes. + if _, err := parseFn(m, origin); err != nil { + level.Debug(logger).Log("msg", "Invalid matcher in route", "input", m, "origin", origin, "err", err) + } + } + for _, route := range r.Routes { + validateRoute(logger, parseFn, origin, route, user) + } +} + +func validateInhibitionRules(logger log.Logger, parseFn compat.ParseMatchers, origin string, rules []*matchersInhibitionRule, _ string) { + for _, r := range rules { + for _, m := range r.SourceMatchers { + if _, err := parseFn(m, origin); err != nil { + // See comments from ValidateRoute. + level.Debug(logger).Log("msg", "Invalid matcher in inhibition rule source matchers", "input", m, "origin", origin, "err", err) + } + } + for _, m := range r.TargetMatchers { + if _, err := parseFn(m, origin); err != nil { + // See comments from ValidateRoute. + level.Debug(logger).Log("msg", "Invalid matcher in inhibition rule target matchers", "input", m, "origin", origin, "err", err) + } + } + } +} diff --git a/pkg/alertmanager/alertmanager_config_test.go b/pkg/alertmanager/alertmanager_config_test.go new file mode 100644 index 00000000000..beb97510cca --- /dev/null +++ b/pkg/alertmanager/alertmanager_config_test.go @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: AGPL-3.0-only + +package alertmanager + +import ( + "testing" + + "github.com/go-kit/log" + "github.com/prometheus/alertmanager/matchers/compat" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/require" + + "github.com/grafana/mimir/pkg/alertmanager/alertspb" +) + +func TestValidateMatchersInConfigDesc(t *testing.T) { + tests := []struct { + name string + config alertspb.AlertConfigDesc + total float64 + disagreeTotal float64 + incompatibleTotal float64 + invalidTotal float64 + }{{ + name: "config is accepted", + config: alertspb.AlertConfigDesc{ + RawConfig: ` +route: + routes: + - matchers: + - foo=bar +inhibit_rules: + - source_matchers: + - bar=baz + - target_matchers: + - baz=qux +`, + }, + total: 3, + }, { + name: "config contains invalid input", + config: alertspb.AlertConfigDesc{ + RawConfig: ` +route: + routes: + - matchers: + - foo!bar +inhibit_rules: + - source_matchers: + - bar!baz + - target_matchers: + - baz!qux +`, + }, + total: 3, + invalidTotal: 3, + }, { + name: "config is accepted in matchers/parse but not pkg/labels", + config: alertspb.AlertConfigDesc{ + RawConfig: ` +route: + routes: + - matchers: + - foo🙂=bar +inhibit_rules: + - source_matchers: + - bar🙂=baz + - target_matchers: + - baz🙂=qux +`, + }, + total: 3, + }, { + name: "config is accepted in pkg/labels but not matchers/parse", + config: alertspb.AlertConfigDesc{ + RawConfig: ` +route: + routes: + - matchers: + - foo= +inhibit_rules: + - source_matchers: + - bar= + - target_matchers: + - baz= +`, + }, + total: 3, + incompatibleTotal: 3, + }, { + name: "config contains disagreement", + config: alertspb.AlertConfigDesc{ + RawConfig: ` +route: + routes: + - matchers: + - foo="\xf0\x9f\x99\x82" +inhibit_rules: + - source_matchers: + - bar="\xf0\x9f\x99\x82" + - target_matchers: + - baz="\xf0\x9f\x99\x82" +`, + }, + total: 3, + disagreeTotal: 3, + }} + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + m := compat.NewMetrics(prometheus.NewRegistry()) + validateMatchersInConfigDesc(log.NewNopLogger(), m, "test", test.config) + requireMetric(t, test.total, m.Total) + requireMetric(t, test.disagreeTotal, m.DisagreeTotal) + requireMetric(t, test.incompatibleTotal, m.IncompatibleTotal) + requireMetric(t, test.invalidTotal, m.InvalidTotal) + }) + } +} + +func requireMetric(t *testing.T, expected float64, m *prometheus.GaugeVec) { + if expected == 0 { + require.Equal(t, 0, testutil.CollectAndCount(m)) + } else { + require.Equal(t, 1, testutil.CollectAndCount(m)) + require.Equal(t, expected, testutil.ToFloat64(m)) + } +} diff --git a/pkg/alertmanager/api.go b/pkg/alertmanager/api.go index f0947ba4c7b..65638f243f5 100644 --- a/pkg/alertmanager/api.go +++ b/pkg/alertmanager/api.go @@ -20,6 +20,7 @@ import ( "github.com/grafana/dskit/tenant" "github.com/pkg/errors" "github.com/prometheus/alertmanager/config" + "github.com/prometheus/alertmanager/matchers/compat" "github.com/prometheus/alertmanager/template" commoncfg "github.com/prometheus/common/config" "gopkg.in/yaml.v3" @@ -186,6 +187,8 @@ func (am *MultitenantAlertmanager) DeleteUserConfig(w http.ResponseWriter, r *ht // Partially copied from: https://github.com/prometheus/alertmanager/blob/8e861c646bf67599a1704fc843c6a94d519ce312/cli/check_config.go#L65-L96 func validateUserConfig(logger log.Logger, cfg alertspb.AlertConfigDesc, limits Limits, user string) error { + validateMatchersInConfigDesc(logger, compat.RegisteredMetrics, "api", cfg) + // We don't have a valid use case for empty configurations. If a tenant does not have a // configuration set and issue a request to the Alertmanager, we'll a) upload an empty // config and b) immediately start an Alertmanager instance for them if a fallback diff --git a/pkg/alertmanager/multitenant.go b/pkg/alertmanager/multitenant.go index 2800c832202..53c2694b264 100644 --- a/pkg/alertmanager/multitenant.go +++ b/pkg/alertmanager/multitenant.go @@ -32,6 +32,7 @@ import ( "github.com/prometheus/alertmanager/cluster/clusterpb" amconfig "github.com/prometheus/alertmanager/config" "github.com/prometheus/alertmanager/featurecontrol" + "github.com/prometheus/alertmanager/matchers/compat" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "golang.org/x/time/rate" @@ -91,6 +92,11 @@ type MultitenantAlertmanagerConfig struct { // Allow disabling of full_state object cleanup. EnableStateCleanup bool `yaml:"enable_state_cleanup" category:"advanced"` + + // Enable UTF-8 strict mode. This means Alertmanager uses the matchers/parse parser + // to parse configurations and API requests, instead of pkg/labels. Use this mode + // once you are confident that your configuration is forwards compatible. + UTF8StrictMode bool `yaml:"utf8_strict_mode" category:"advanced"` } const ( @@ -120,6 +126,8 @@ func (cfg *MultitenantAlertmanagerConfig) RegisterFlags(f *flag.FlagSet, logger cfg.ShardingRing.RegisterFlags(f, logger) f.DurationVar(&cfg.PeerTimeout, "alertmanager.peer-timeout", defaultPeerTimeout, "Time to wait between peers to send notifications.") + + f.BoolVar(&cfg.UTF8StrictMode, "alertmanager.utf8-strict-mode", false, "Enable UTF-8 strict mode. Allows UTF-8 in the matchers for routes and inhibition rules, in silences, and in the labels for alerts. It is recommended to check both alertmanager_matchers_disagree and alertmanager_matchers_incompatible metrics before using this mode as otherwise some tenant configurations might fail to load.") } // Validate config and returns error on failure @@ -670,6 +678,13 @@ func (am *MultitenantAlertmanager) setConfig(cfg alertspb.AlertConfigDesc) error var userTemplateDir = filepath.Join(am.getTenantDirectory(cfg.User), templatesDir) var pathsToRemove = make(map[string]struct{}) + // Instead of using "config" as the origin, as in Prometheus Alertmanager, we use "tenant". + // The reason for this that the config.Load function uses the origin "config", + // which is correct, but Mimir uses config.Load to validate both API requests and tenant + // configurations. This means metrics from API requests are confused with metrics from + // tenant configurations. To avoid this confusion, we use a different origin. + validateMatchersInConfigDesc(am.logger, compat.RegisteredMetrics, "tenant", cfg) + // List existing files to keep track of the ones to be removed if oldTemplateFiles, err := os.ReadDir(userTemplateDir); err == nil { for _, file := range oldTemplateFiles { diff --git a/pkg/mimir/modules.go b/pkg/mimir/modules.go index ffbeba88477..090406f9160 100644 --- a/pkg/mimir/modules.go +++ b/pkg/mimir/modules.go @@ -883,10 +883,14 @@ func (t *Mimir) initRuler() (serv services.Service, err error) { } func (t *Mimir) initAlertManager() (serv services.Service, err error) { - // Initialize the compat package in classic mode. This has the same behavior as if - // the compat package was not initialized, but with debug logs when a configuration - // is loaded or a new configuration is submitted. - features, err := featurecontrol.NewFlags(util_log.Logger, featurecontrol.FeatureClassicMode) + mode := featurecontrol.FeatureClassicMode + if t.Cfg.Alertmanager.UTF8StrictMode { + level.Debug(util_log.Logger).Log("msg", "Starting Alertmanager in UTF-8 strict mode") + mode = featurecontrol.FeatureUTF8StrictMode + } else { + level.Debug(util_log.Logger).Log("msg", "Starting Alertmanager in classic mode") + } + features, err := featurecontrol.NewFlags(util_log.Logger, mode) util_log.CheckFatal("initializing Alertmanager feature flags", err) compat.InitFromFlags(util_log.Logger, compat.RegisteredMetrics, features) From 8f483ca784f0709c8e5a51a3052e700d807e73ee Mon Sep 17 00:00:00 2001 From: George Robinson Date: Wed, 24 Jan 2024 19:53:13 +0000 Subject: [PATCH 2/7] Add integration tests for classic and UTF-8 mode --- integration/alertmanager_test.go | 157 +++++++++++++++++++++++++++++++ integration/configs.go | 22 +++++ 2 files changed, 179 insertions(+) diff --git a/integration/alertmanager_test.go b/integration/alertmanager_test.go index b819bd44193..e4bf48dd9f5 100644 --- a/integration/alertmanager_test.go +++ b/integration/alertmanager_test.go @@ -126,6 +126,163 @@ func TestAlertmanager(t *testing.T) { require.Equal(t, "Accept-Encoding", res.Header.Get("Vary")) } +// This test asserts that in classic mode it is not possible to upload configurations, +// create silences, or post alerts that contain UTF-8 on the left hand side of label +// matchers or label names. It can be deleted when the -alertmanager.utf8-strict-mode +// flag is removed. +func TestAlertmanagerClassicMode(t *testing.T) { + s, err := e2e.NewScenario(networkName) + require.NoError(t, err) + defer s.Close() + + consul := e2edb.NewConsul() + minio := e2edb.NewMinio(9000, alertsBucketName) + require.NoError(t, s.StartAndWaitReady(consul, minio)) + + require.NoError(t, uploadAlertmanagerConfig(minio, alertsBucketName, "user-1", mimirAlertmanagerUserConfigYaml)) + + alertmanager := e2emimir.NewAlertmanager( + "alertmanager", + mergeFlags( + AlertmanagerFlags(), + AlertmanagerS3Flags(), + AlertmanagerShardingFlags(consul.NetworkHTTPEndpoint(), 1), + map[string]string{"-alertmanager.utf8-strict-mode": "false"}, + ), + ) + require.NoError(t, s.StartAndWaitReady(alertmanager)) + require.NoError(t, alertmanager.WaitSumMetrics(e2e.Equals(1), "cortex_alertmanager_config_last_reload_successful")) + require.NoError(t, alertmanager.WaitSumMetrics(e2e.Greater(0), "cortex_alertmanager_config_hash")) + + c, err := e2emimir.NewClient("", "", alertmanager.HTTPEndpoint(), "", "user-1") + require.NoError(t, err) + + ctx, cancelFunc := context.WithTimeout(context.Background(), 15*time.Second) + defer cancelFunc() + + // Should be able to use classic config, but not UTF-8 configuration. + require.NoError(t, c.SetAlertmanagerConfig(ctx, mimirAlertmanagerUserClassicConfigYaml, nil)) + require.EqualError(t, c.SetAlertmanagerConfig(ctx, mimirAlertmanagerUserUTF8ConfigYaml, nil), "setting config failed with status 400 and error error validating Alertmanager config: bad matcher format: bar🙂=baz\n") + + // Should be able to create a silence with classic matchers, but not UTF-8 matchers. + silenceID, err := c.CreateSilence(ctx, types.Silence{ + Matchers: amlabels.Matchers{ + {Name: "foo", Value: "bar"}, + }, + Comment: "This is a test silence.", + StartsAt: time.Now(), + EndsAt: time.Now().Add(time.Minute), + }) + require.NoError(t, err) + require.NotEmpty(t, silenceID) + + silenceID, err = c.CreateSilence(ctx, types.Silence{ + Matchers: amlabels.Matchers{ + {Name: "bar🙂", Value: "baz"}, + }, + Comment: "This is a test silence.", + StartsAt: time.Now(), + EndsAt: time.Now().Add(time.Minute), + }) + require.EqualError(t, err, "creating the silence failed with status 400 and error \"silence invalid: invalid label matcher 0: invalid label name \\\"bar🙂\\\"\"\n") + require.Empty(t, silenceID) + + // Should be able to post alerts with classic labels but not UTF-8 labels. + require.NoError(t, c.SendAlertToAlermanager(ctx, &model.Alert{ + Labels: model.LabelSet{ + "foo": "bar", + }, + StartsAt: time.Now(), + EndsAt: time.Now().Add(time.Minute), + })) + require.EqualError(t, c.SendAlertToAlermanager(ctx, &model.Alert{ + Labels: model.LabelSet{ + "bar🙂": "baz", + }, + StartsAt: time.Now(), + EndsAt: time.Now().Add(time.Minute), + }), "sending alert failed with status 400 and error \"invalid label set: invalid name \\\"bar🙂\\\"\"\n") +} + +// This test asserts that in UTF-8 strict mode it is possible to upload configurations, +// create silences, and post alerts that contain UTF-8 on the left hand side of label +// matchers and label names. It is the opposite of TestAlertmanagerClassicMode. It should +// be merged with the TestAlertmanager test when the -alertmanager.utf8-strict-mode flag +// is removed. +func TestAlertmanagerUTF8StrictMode(t *testing.T) { + s, err := e2e.NewScenario(networkName) + require.NoError(t, err) + defer s.Close() + + consul := e2edb.NewConsul() + minio := e2edb.NewMinio(9000, alertsBucketName) + require.NoError(t, s.StartAndWaitReady(consul, minio)) + + require.NoError(t, uploadAlertmanagerConfig(minio, alertsBucketName, "user-1", mimirAlertmanagerUserConfigYaml)) + + alertmanager := e2emimir.NewAlertmanager( + "alertmanager", + mergeFlags( + AlertmanagerFlags(), + AlertmanagerS3Flags(), + AlertmanagerShardingFlags(consul.NetworkHTTPEndpoint(), 1), + map[string]string{"-alertmanager.utf8-strict-mode": "true"}, + ), + ) + require.NoError(t, s.StartAndWaitReady(alertmanager)) + require.NoError(t, alertmanager.WaitSumMetrics(e2e.Equals(1), "cortex_alertmanager_config_last_reload_successful")) + require.NoError(t, alertmanager.WaitSumMetrics(e2e.Greater(0), "cortex_alertmanager_config_hash")) + + c, err := e2emimir.NewClient("", "", alertmanager.HTTPEndpoint(), "", "user-1") + require.NoError(t, err) + + ctx, cancelFunc := context.WithTimeout(context.Background(), 15*time.Second) + defer cancelFunc() + + // Should be able to use classic and UTF-8 configurations without error. + require.NoError(t, c.SetAlertmanagerConfig(ctx, mimirAlertmanagerUserClassicConfigYaml, nil)) + require.NoError(t, c.SetAlertmanagerConfig(ctx, mimirAlertmanagerUserUTF8ConfigYaml, nil)) + + // Should be able to create a silence with both classic matchers and UTF-8 matchers. + silenceID, err := c.CreateSilence(ctx, types.Silence{ + Matchers: amlabels.Matchers{ + {Name: "foo", Value: "bar"}, + }, + Comment: "This is a test silence.", + StartsAt: time.Now(), + EndsAt: time.Now().Add(time.Minute), + }) + require.NoError(t, err) + require.NotEmpty(t, silenceID) + + silenceID, err = c.CreateSilence(ctx, types.Silence{ + Matchers: amlabels.Matchers{ + {Name: "bar🙂", Value: "baz"}, + }, + Comment: "This is a test silence.", + StartsAt: time.Now(), + EndsAt: time.Now().Add(time.Minute), + }) + require.NoError(t, err) + require.NotEmpty(t, silenceID) + + // Should be able to post alerts with both classic labels and UTF-8 labels. + require.NoError(t, c.SendAlertToAlermanager(ctx, &model.Alert{ + Labels: model.LabelSet{ + "foo": "bar", + }, + StartsAt: time.Now(), + EndsAt: time.Now().Add(time.Minute), + })) + require.NoError(t, c.SendAlertToAlermanager(ctx, &model.Alert{ + Labels: model.LabelSet{ + "bar🙂": "baz", + }, + StartsAt: time.Now(), + EndsAt: time.Now().Add(time.Minute), + })) +} + func TestAlertmanagerV1Deprecated(t *testing.T) { s, err := e2e.NewScenario(networkName) require.NoError(t, err) diff --git a/integration/configs.go b/integration/configs.go index b83c0840f9d..e5aba64aba2 100644 --- a/integration/configs.go +++ b/integration/configs.go @@ -55,6 +55,28 @@ receivers: - name: "example_receiver" ` + mimirAlertmanagerUserClassicConfigYaml = `route: + receiver: test + group_by: [foo] + routes: + - matchers: + - foo=bar + - bar=baz +receivers: + - name: test +` + + mimirAlertmanagerUserUTF8ConfigYaml = `route: + receiver: test + group_by: [bar🙂] + routes: + - matchers: + - foo=bar + - bar🙂=baz +receivers: + - name: test +` + mimirRulerUserConfigYaml = `groups: - name: rule interval: 100s From d5b45eca04847e8bd11bc32c8fe53591a0a9fb3e Mon Sep 17 00:00:00 2001 From: George Robinson Date: Wed, 24 Jan 2024 20:06:17 +0000 Subject: [PATCH 3/7] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76c5ba2d0c3..7208a4e95b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Grafana Mimir +* [CHANGE] Alertmanager: CLI flag `-utf8-strict-mode` has been added, and its default value is set to `true`. #6898 * [CHANGE] Alertmanager: Deprecates the `v1` API. All `v1` API endpoints now respond with a JSON deprecation notice and a status code of `410`. All endpoints have a `v2` equivalent. The list of endpoints is: #7103 * `/api/v1/alerts` * `/api/v1/receivers` From 29c8e34d80f3a95245f42b064801ec484a1ac4c1 Mon Sep 17 00:00:00 2001 From: George Robinson Date: Wed, 24 Jan 2024 22:39:17 +0000 Subject: [PATCH 4/7] Add integration tests for matchers/compat metrics --- integration/alertmanager_test.go | 82 ++++++++++++++++++++++++++++++++ integration/configs.go | 30 ++++++++++++ 2 files changed, 112 insertions(+) diff --git a/integration/alertmanager_test.go b/integration/alertmanager_test.go index e4bf48dd9f5..27d5a1258eb 100644 --- a/integration/alertmanager_test.go +++ b/integration/alertmanager_test.go @@ -283,6 +283,88 @@ func TestAlertmanagerUTF8StrictMode(t *testing.T) { })) } +// This test asserts that the correct metrics are incremented when configurations are uploaded, +// including configurations with disagreement, incompatible and invalid matchers. It can be deleted +// when the -alertmanager.utf8-strict-mode flag is removed. +func TestAlertmanagerMatchersMetrics(t *testing.T) { + s, err := e2e.NewScenario(networkName) + require.NoError(t, err) + defer s.Close() + + consul := e2edb.NewConsul() + minio := e2edb.NewMinio(9000, alertsBucketName) + require.NoError(t, s.StartAndWaitReady(consul, minio)) + + // Upload the default configuration for two users. + require.NoError(t, uploadAlertmanagerConfig(minio, alertsBucketName, "user-1", mimirAlertmanagerUserConfigYaml)) + require.NoError(t, uploadAlertmanagerConfig(minio, alertsBucketName, "user-2", mimirAlertmanagerUserConfigYaml)) + + alertmanager := e2emimir.NewAlertmanager( + "alertmanager", + mergeFlags( + AlertmanagerFlags(), + AlertmanagerS3Flags(), + AlertmanagerShardingFlags(consul.NetworkHTTPEndpoint(), 1), + ), + ) + require.NoError(t, s.StartAndWaitReady(alertmanager)) + require.NoError(t, alertmanager.WaitSumMetrics(e2e.Equals(2), "cortex_alertmanager_config_last_reload_successful")) + require.NoError(t, alertmanager.WaitSumMetrics(e2e.Greater(0), "cortex_alertmanager_config_hash")) + + c1, err := e2emimir.NewClient("", "", alertmanager.HTTPEndpoint(), "", "user-1") + require.NoError(t, err) + c2, err := e2emimir.NewClient("", "", alertmanager.HTTPEndpoint(), "", "user-2") + require.NoError(t, err) + + // The metrics should all be zero as no configurations contain matchers. + metricNames := []string{ + "alertmanager_matchers_parse", + "alertmanager_matchers_disagree", + "alertmanager_matchers_incompatible", + "alertmanager_matchers_invalid", + } + metrics, err := alertmanager.SumMetrics(metricNames, e2e.SkipMissingMetrics) + require.NoError(t, err) + require.Equal(t, []float64{0, 0, 0, 0}, metrics) + + ctx, cancelFunc := context.WithTimeout(context.Background(), 15*time.Second) + defer cancelFunc() + + // Upload a configuration for user1. + require.NoError(t, c1.SetAlertmanagerConfig(ctx, mimirAlertmanagerUserClassicConfigYaml, nil)) + metrics, err = alertmanager.SumMetrics(metricNames, e2e.SkipMissingMetrics) + require.NoError(t, err) + // The sum for alertmanager_matchers_parse should be 4 as there are two matchers for origin=api + // and another two matchers for origin=config. + require.Equal(t, []float64{4, 0, 0, 0}, metrics) + + // Upload a configuration for user2. + require.NoError(t, c2.SetAlertmanagerConfig(ctx, mimirAlertmanagerUserClassicConfigYaml, nil)) + metrics, err = alertmanager.SumMetrics(metricNames, e2e.SkipMissingMetrics) + require.NoError(t, err) + // The sum for alertmanager_matchers_parse should be 8 as there are two matchers for origin=api + // and another two matchers for origin=config, and 4 from the previous sum. + require.Equal(t, []float64{8, 0, 0, 0}, metrics) + + // Upload a configuration with disagreement. + require.NoError(t, c2.SetAlertmanagerConfig(ctx, mimirAlertmanagerDisagreementConfigYaml, nil)) + metrics, err = alertmanager.SumMetrics(metricNames, e2e.SkipMissingMetrics) + require.NoError(t, err) + require.Equal(t, []float64{10, 1, 0, 0}, metrics) + + // Upload a configuration with incompatible matchers. + require.NoError(t, c2.SetAlertmanagerConfig(ctx, mimirAlertmanagerIncompatibleConfigYaml, nil)) + metrics, err = alertmanager.SumMetrics(metricNames, e2e.SkipMissingMetrics) + require.NoError(t, err) + require.Equal(t, []float64{12, 1, 1, 0}, metrics) + + // Upload a configuration with invalid matchers. + require.EqualError(t, c2.SetAlertmanagerConfig(ctx, mimirAlertmanagerInvalidConfigYaml, nil), "setting config failed with status 400 and error error validating Alertmanager config: bad matcher format: \n") + metrics, err = alertmanager.SumMetrics(metricNames, e2e.SkipMissingMetrics) + require.NoError(t, err) + require.Equal(t, []float64{14, 1, 1, 2}, metrics) +} + func TestAlertmanagerV1Deprecated(t *testing.T) { s, err := e2e.NewScenario(networkName) require.NoError(t, err) diff --git a/integration/configs.go b/integration/configs.go index e5aba64aba2..9e2aa3e1e03 100644 --- a/integration/configs.go +++ b/integration/configs.go @@ -77,6 +77,36 @@ receivers: - name: test ` + mimirAlertmanagerDisagreementConfigYaml = `route: + receiver: test + group_by: [foo] + routes: + - matchers: + - foo="\xf0\x9f\x99\x82" +receivers: + - name: test +` + + mimirAlertmanagerIncompatibleConfigYaml = `route: + receiver: test + group_by: [foo] + routes: + - matchers: + - foo= +receivers: + - name: test +` + + mimirAlertmanagerInvalidConfigYaml = `route: + receiver: test + group_by: [foo] + routes: + - matchers: + - foo=,, +receivers: + - name: test +` + mimirRulerUserConfigYaml = `groups: - name: rule interval: 100s From 9e4962c4040b462e8e0f395dd112c4e09ed74c64 Mon Sep 17 00:00:00 2001 From: George Robinson Date: Thu, 25 Jan 2024 10:47:55 +0000 Subject: [PATCH 5/7] Feedback on CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7208a4e95b1..91a350f2cb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### Grafana Mimir -* [CHANGE] Alertmanager: CLI flag `-utf8-strict-mode` has been added, and its default value is set to `true`. #6898 +* [FEATURE] Alertmanager: Added `-alertmanager.utf8-strict-mode` to control support for any UTF-8 character as part of Alertmanager configuration/API matchers and labels. It's default value is set to `false`. #6898 * [CHANGE] Alertmanager: Deprecates the `v1` API. All `v1` API endpoints now respond with a JSON deprecation notice and a status code of `410`. All endpoints have a `v2` equivalent. The list of endpoints is: #7103 * `/api/v1/alerts` * `/api/v1/receivers` From dd88340057f581eefb4140f3e14d8baec1b1dfd7 Mon Sep 17 00:00:00 2001 From: George Robinson Date: Thu, 25 Jan 2024 10:50:11 +0000 Subject: [PATCH 6/7] Change utf8-strict-mode to experimental --- cmd/mimir/config-descriptor.json | 2 +- cmd/mimir/help-all.txt.tmpl | 2 +- .../sources/mimir/configure/configuration-parameters/index.md | 4 ++-- pkg/alertmanager/multitenant.go | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json index 509c44a4e45..54e5ea585be 100644 --- a/cmd/mimir/config-descriptor.json +++ b/cmd/mimir/config-descriptor.json @@ -13628,7 +13628,7 @@ "fieldDefaultValue": false, "fieldFlag": "alertmanager.utf8-strict-mode", "fieldType": "boolean", - "fieldCategory": "advanced" + "fieldCategory": "experimental" } ], "fieldValue": null, diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl index 119c2944c03..231dbbeaa59 100644 --- a/cmd/mimir/help-all.txt.tmpl +++ b/cmd/mimir/help-all.txt.tmpl @@ -274,7 +274,7 @@ Usage of ./cmd/mimir/mimir: -alertmanager.storage.retention duration How long should we store stateful data (notification logs and silences). For notification log entries, refers to how long should we keep entries before they expire and are deleted. For silences, refers to how long should tenants view silences after they expire and are deleted. (default 120h0m0s) -alertmanager.utf8-strict-mode - Enable UTF-8 strict mode. Allows UTF-8 in the matchers for routes and inhibition rules, in silences, and in the labels for alerts. It is recommended to check both alertmanager_matchers_disagree and alertmanager_matchers_incompatible metrics before using this mode as otherwise some tenant configurations might fail to load. + [experimental] Enable UTF-8 strict mode. Allows UTF-8 in the matchers for routes and inhibition rules, in silences, and in the labels for alerts. It is recommended to check both alertmanager_matchers_disagree and alertmanager_matchers_incompatible metrics before using this mode as otherwise some tenant configurations might fail to load. -alertmanager.web.external-url string The URL under which Alertmanager is externally reachable (eg. could be different than -http.alertmanager-http-prefix in case Alertmanager is served via a reverse proxy). This setting is used both to configure the internal requests router and to generate links in alert templates. If the external URL has a path portion, it will be used to prefix all HTTP endpoints served by Alertmanager, both the UI and API. (default http://localhost:8080/alertmanager) -api.skip-label-name-validation-header-enabled diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md index ed1724ec240..75916baaeda 100644 --- a/docs/sources/mimir/configure/configuration-parameters/index.md +++ b/docs/sources/mimir/configure/configuration-parameters/index.md @@ -2252,8 +2252,8 @@ alertmanager_client: # CLI flag: -alertmanager.enable-state-cleanup [enable_state_cleanup: | default = true] -# (advanced) Enable UTF-8 strict mode. Allows UTF-8 in the matchers for routes -# and inhibition rules, in silences, and in the labels for alerts. It is +# (experimental) Enable UTF-8 strict mode. Allows UTF-8 in the matchers for +# routes and inhibition rules, in silences, and in the labels for alerts. It is # recommended to check both alertmanager_matchers_disagree and # alertmanager_matchers_incompatible metrics before using this mode as otherwise # some tenant configurations might fail to load. diff --git a/pkg/alertmanager/multitenant.go b/pkg/alertmanager/multitenant.go index 53c2694b264..750f8d3d969 100644 --- a/pkg/alertmanager/multitenant.go +++ b/pkg/alertmanager/multitenant.go @@ -96,7 +96,7 @@ type MultitenantAlertmanagerConfig struct { // Enable UTF-8 strict mode. This means Alertmanager uses the matchers/parse parser // to parse configurations and API requests, instead of pkg/labels. Use this mode // once you are confident that your configuration is forwards compatible. - UTF8StrictMode bool `yaml:"utf8_strict_mode" category:"advanced"` + UTF8StrictMode bool `yaml:"utf8_strict_mode" category:"experimental"` } const ( From 187717b0d10acafad2ee4b58bdfa5e5bbaa45019 Mon Sep 17 00:00:00 2001 From: George Robinson Date: Thu, 25 Jan 2024 10:51:08 +0000 Subject: [PATCH 7/7] Change log level from Debug to Info --- pkg/mimir/modules.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/mimir/modules.go b/pkg/mimir/modules.go index 090406f9160..6f81896f968 100644 --- a/pkg/mimir/modules.go +++ b/pkg/mimir/modules.go @@ -885,10 +885,10 @@ func (t *Mimir) initRuler() (serv services.Service, err error) { func (t *Mimir) initAlertManager() (serv services.Service, err error) { mode := featurecontrol.FeatureClassicMode if t.Cfg.Alertmanager.UTF8StrictMode { - level.Debug(util_log.Logger).Log("msg", "Starting Alertmanager in UTF-8 strict mode") + level.Info(util_log.Logger).Log("msg", "Starting Alertmanager in UTF-8 strict mode") mode = featurecontrol.FeatureUTF8StrictMode } else { - level.Debug(util_log.Logger).Log("msg", "Starting Alertmanager in classic mode") + level.Info(util_log.Logger).Log("msg", "Starting Alertmanager in classic mode") } features, err := featurecontrol.NewFlags(util_log.Logger, mode) util_log.CheckFatal("initializing Alertmanager feature flags", err)