diff --git a/config/notifiers.go b/config/notifiers.go index fe28ca05c4..74c74a92a0 100644 --- a/config/notifiers.go +++ b/config/notifiers.go @@ -535,6 +535,10 @@ type WebhookConfig struct { // Alerts exceeding this threshold will be truncated. Setting this to 0 // allows an unlimited number of alerts. MaxAlerts uint64 `yaml:"max_alerts" json:"max_alerts"` + + // Timeout is the maximum time allowed to invoke the webhook. Setting this to 0 + // does not impose a timeout. + Timeout time.Duration `yaml:"timeout" json:"timeout"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. diff --git a/docs/configuration.md b/docs/configuration.md index 89403ca8f9..f2a4f6b197 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1594,6 +1594,13 @@ url_file: # above this threshold are truncated. When leaving this at its default value of # 0, all alerts are included. [ max_alerts: | default = 0 ] + +# The maximum time to wait for a webhook request to complete, before failing the +# request and allowing it to be retried. The default value of 0s indicates that +# no timeout should be applied. +# NOTE: This will have no effect if set higher than the group_interval. +[ timeout: | default = 0s ] + ``` The Alertmanager diff --git a/notify/webhook/webhook.go b/notify/webhook/webhook.go index 1b5546ddbe..41b9f497dc 100644 --- a/notify/webhook/webhook.go +++ b/notify/webhook/webhook.go @@ -112,8 +112,17 @@ func (n *Notifier) Notify(ctx context.Context, alerts ...*types.Alert) (bool, er url = strings.TrimSpace(string(content)) } + if n.conf.Timeout > 0 { + postCtx, cancel := context.WithTimeoutCause(ctx, n.conf.Timeout, fmt.Errorf("configured webhook timeout reached (%s)", n.conf.Timeout)) + defer cancel() + ctx = postCtx + } + resp, err := notify.PostJSON(ctx, n.client, url, &buf) if err != nil { + if ctx.Err() != nil { + err = fmt.Errorf("%w: %w", err, context.Cause(ctx)) + } return true, notify.RedactURL(err) } defer notify.Drain(resp) diff --git a/test/with_api_v2/acceptance/send_test.go b/test/with_api_v2/acceptance/send_test.go index 4e13d37ca5..fa190f44c1 100644 --- a/test/with_api_v2/acceptance/send_test.go +++ b/test/with_api_v2/acceptance/send_test.go @@ -464,3 +464,52 @@ receivers: t.Log(co.Check()) } + +func TestWebhookTimeout(t *testing.T) { + t.Parallel() + + // This integration test uses an extended group_interval to check that + // the webhook level timeout has the desired effect, and that notification + // sending is retried in this case. + conf := ` +route: + receiver: "default" + group_by: [alertname] + group_wait: 1s + group_interval: 1m + repeat_interval: 1m + +receivers: +- name: "default" + webhook_configs: + - url: 'http://%s' + timeout: 500ms +` + + at := NewAcceptanceTest(t, &AcceptanceOpts{ + Tolerance: 150 * time.Millisecond, + }) + + co := at.Collector("webhook") + wh := NewWebhook(t, co) + + wh.Func = func(ts float64) bool { + // Make some webhook requests slow enough to hit the webhook + // timeout, but not so slow as to hit the dispatcher timeout. + if ts < 3 { + time.Sleep(time.Second) + return true + } + return false + } + + am := at.AlertmanagerCluster(fmt.Sprintf(conf, wh.Address()), 1) + + am.Push(At(1), Alert("alertname", "test1")) + + co.Want(Between(3, 4), Alert("alertname", "test1").Active(1)) + + at.Run() + + t.Log(co.Check()) +}