Skip to content

Commit

Permalink
Fix zeroed EndsAt timestamp
Browse files Browse the repository at this point in the history
This commit fixes a bug where firing alerts had zeroed EndsAt
timestamps in notifications. This bug was introduced in another
PR (#1686) to fix issues where alerts are resolved during a flush.

(Commit message from George Robinson)

Update the acceptance tests as the webhook now has a non-zero EndsAt
timestamp for firing alerts. The models.GettableAlert doesn't havea
`status` field which means the current test is unable to differentiate
between firing and resolved alerts.

Signed-off-by: Holger Hans Peter Freyther <[email protected]>
  • Loading branch information
zecke committed Jun 17, 2024
1 parent d987ddd commit 7903fc8
Show file tree
Hide file tree
Showing 15 changed files with 121 additions and 103 deletions.
12 changes: 6 additions & 6 deletions dispatch/dispatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,22 +425,22 @@ route:

require.Equal(t, AlertGroups{
&AlertGroup{
Alerts: []*types.AlertSnapshot{types.NewAlertSnapshot(inputAlerts[0], now)},
Alerts: types.SnapshotAlerts([]*types.Alert{inputAlerts[0]}, now),
Labels: model.LabelSet{
"alertname": "OtherAlert",
},
Receiver: "prod",
},
&AlertGroup{
Alerts: []*types.AlertSnapshot{types.NewAlertSnapshot(inputAlerts[1], now)},
Alerts: types.SnapshotAlerts([]*types.Alert{inputAlerts[1]}, now),
Labels: model.LabelSet{
"alertname": "TestingAlert",
"service": "api",
},
Receiver: "testing",
},
&AlertGroup{
Alerts: []*types.AlertSnapshot{types.NewAlertSnapshot(inputAlerts[2], now), types.NewAlertSnapshot(inputAlerts[3], now)},
Alerts: types.SnapshotAlerts([]*types.Alert{inputAlerts[2], inputAlerts[3]}, now),
Labels: model.LabelSet{
"alertname": "HighErrorRate",
"service": "api",
Expand All @@ -449,7 +449,7 @@ route:
Receiver: "prod",
},
&AlertGroup{
Alerts: []*types.AlertSnapshot{types.NewAlertSnapshot(inputAlerts[4], now)},
Alerts: types.SnapshotAlerts([]*types.Alert{inputAlerts[4]}, now),
Labels: model.LabelSet{
"alertname": "HighErrorRate",
"service": "api",
Expand All @@ -458,7 +458,7 @@ route:
Receiver: "prod",
},
&AlertGroup{
Alerts: []*types.AlertSnapshot{types.NewAlertSnapshot(inputAlerts[5], now)},
Alerts: types.SnapshotAlerts([]*types.Alert{inputAlerts[5]}, now),
Labels: model.LabelSet{
"alertname": "HighLatency",
"service": "db",
Expand All @@ -467,7 +467,7 @@ route:
Receiver: "kafka",
},
&AlertGroup{
Alerts: []*types.AlertSnapshot{types.NewAlertSnapshot(inputAlerts[5], now)},
Alerts: types.SnapshotAlerts([]*types.Alert{inputAlerts[5]}, now),
Labels: model.LabelSet{
"alertname": "HighLatency",
"service": "db",
Expand Down
2 changes: 1 addition & 1 deletion notify/discord/discord.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.AlertSnapshot) (bool

level.Debug(n.logger).Log("incident", key)

alerts := types.Snapshot(as...)
alerts := types.AlertsSnapshot(as)
data := notify.GetTemplateData(ctx, n.tmpl, as, n.logger)
tmpl := notify.TmplText(n.tmpl, data, &err)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion notify/msteams/msteams.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.AlertSnapshot) (bool
return false, err
}

alerts := types.Snapshot(as...)
alerts := types.AlertsSnapshot(as)
color := colorGrey
switch alerts.Status() {
case model.AlertFiring:
Expand Down
2 changes: 1 addition & 1 deletion notify/opsgenie/opsgenie.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func (n *Notifier) createRequests(ctx context.Context, as ...*types.AlertSnapsho

var (
alias = key.Hash()
alerts = types.Snapshot(as...)
alerts = types.AlertsSnapshot(as)
)
switch alerts.Status() {
case model.AlertResolved:
Expand Down
2 changes: 1 addition & 1 deletion notify/pagerduty/pagerduty.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.AlertSnapshot) (bool
}

var (
alerts = types.Snapshot(as...)
alerts = types.AlertsSnapshot(as)
data = notify.GetTemplateData(ctx, n.tmpl, as, n.logger)
eventType = pagerDutyEventTrigger
)
Expand Down
2 changes: 1 addition & 1 deletion notify/victorops/victorops.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ func (n *Notifier) createVictorOpsPayload(ctx context.Context, as ...*types.Aler
}

var (
alerts = types.Snapshot(as...)
alerts = types.AlertsSnapshot(as)
data = notify.GetTemplateData(ctx, n.tmpl, as, n.logger)
tmpl = notify.TmplText(n.tmpl, data, &err)

Expand Down
2 changes: 1 addition & 1 deletion template/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ func (as Alerts) Resolved() []Alert {
func (t *Template) Data(recv string, groupLabels model.LabelSet, alerts ...*types.AlertSnapshot) *Data {
data := &Data{
Receiver: regexp.QuoteMeta(recv),
Status: string(types.Snapshot(alerts...).Status()),
Status: string(types.AlertsSnapshot(alerts).Status()),
Alerts: make(Alerts, 0, len(alerts)),
GroupLabels: KV{},
CommonLabels: KV{},
Expand Down
2 changes: 1 addition & 1 deletion test/cli/acceptance/cli_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ receivers:

alert1 := Alert("alertname", "test1").Active(1, 2)
am.AddAlertsAt(false, 0, alert1)
co.Want(Between(1, 2), Alert("alertname", "test1").Active(1))
co.Want(Between(1, 2), Alert("alertname", "test1").Active(1, 2))

at.Run()

Expand Down
18 changes: 13 additions & 5 deletions test/with_api_v2/acceptance/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ func TestClusterDeduplication(t *testing.T) {
t.Parallel()

conf := `
global:
resolve_timeout: %v
route:
receiver: "default"
group_by: []
Expand All @@ -47,11 +49,11 @@ receivers:
co := at.Collector("webhook")
wh := a.NewWebhook(t, co)

amc := at.AlertmanagerCluster(fmt.Sprintf(conf, wh.Address()), 3)
amc := at.AlertmanagerCluster(fmt.Sprintf(conf, resolveTimeout, wh.Address()), 3)

amc.Push(a.At(1), a.Alert("alertname", "test1"))

co.Want(a.Between(2, 3), a.Alert("alertname", "test1").Active(1))
co.Want(a.Between(2, 3), a.Alert("alertname", "test1").Active(1, 1+resolveTimeout.Seconds()))

at.Run()

Expand All @@ -64,6 +66,8 @@ func TestClusterVSInstance(t *testing.T) {
t.Parallel()

conf := `
global:
resolve_timeout: %v
route:
receiver: "default"
group_by: [ "alertname" ]
Expand Down Expand Up @@ -98,16 +102,16 @@ receivers:
collectors = append(collectors, tc.Collector("webhook"))
webhook := a.NewWebhook(t, collectors[i])

amClusters = append(amClusters, tc.AlertmanagerCluster(fmt.Sprintf(conf, webhook.Address()), clusterSizes[i]))
amClusters = append(amClusters, tc.AlertmanagerCluster(fmt.Sprintf(conf, resolveTimeout, webhook.Address()), clusterSizes[i]))

wg.Add(1)
}

for _, alertTime := range []float64{0, 2, 4, 6, 8} {
for _, alertTime := range []float64{1, 2, 4, 6, 8} {
for i, amc := range amClusters {
alert := a.Alert("alertname", fmt.Sprintf("test1-%v", alertTime))
amc.Push(a.At(alertTime), alert)
collectors[i].Want(a.Between(alertTime, alertTime+5), alert.Active(alertTime))
collectors[i].Want(a.Between(alertTime, alertTime+5), alert.Active(alertTime, alertTime+resolveTimeout.Seconds()))
}
}

Expand All @@ -124,4 +128,8 @@ receivers:
if err != nil {
t.Fatal(err)
}

for _, co := range collectors {
t.Log(co.Check())
}
}
28 changes: 16 additions & 12 deletions test/with_api_v2/acceptance/inhibit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ func TestInhibiting(t *testing.T) {
// gets resolved.

conf := `
global:
resolve_timeout: %v
route:
receiver: "default"
group_by: []
Expand Down Expand Up @@ -58,7 +60,7 @@ inhibit_rules:
co := at.Collector("webhook")
wh := NewWebhook(t, co)

amc := at.AlertmanagerCluster(fmt.Sprintf(conf, wh.Address()), 1)
amc := at.AlertmanagerCluster(fmt.Sprintf(conf, resolveTimeout, wh.Address()), 1)

amc.Push(At(1), Alert("alertname", "test1", "job", "testjob", "zone", "aa"))
amc.Push(At(1), Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa"))
Expand All @@ -73,21 +75,21 @@ inhibit_rules:
amc.Push(At(3.6), Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(2.2, 3.6))

co.Want(Between(2, 2.5),
Alert("alertname", "test1", "job", "testjob", "zone", "aa").Active(1),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa").Active(1),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "ab").Active(1),
Alert("alertname", "test1", "job", "testjob", "zone", "aa").Active(1, 1+resolveTimeout.Seconds()),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa").Active(1, 1+resolveTimeout.Seconds()),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "ab").Active(1, 1+resolveTimeout.Seconds()),
)

co.Want(Between(3, 3.5),
Alert("alertname", "test1", "job", "testjob", "zone", "aa").Active(1),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "ab").Active(1),
Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(2.2),
Alert("alertname", "test1", "job", "testjob", "zone", "aa").Active(1, 1+resolveTimeout.Seconds()),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "ab").Active(1, 1+resolveTimeout.Seconds()),
Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(2.2, 2.2+resolveTimeout.Seconds()),
)

co.Want(Between(4, 4.5),
Alert("alertname", "test1", "job", "testjob", "zone", "aa").Active(1),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa").Active(1),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "ab").Active(1),
Alert("alertname", "test1", "job", "testjob", "zone", "aa").Active(1, 1+resolveTimeout.Seconds()),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa").Active(1, 1+resolveTimeout.Seconds()),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "ab").Active(1, 1+resolveTimeout.Seconds()),
Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(2.2, 3.6),
)

Expand All @@ -104,6 +106,8 @@ func TestAlwaysInhibiting(t *testing.T) {
// alerts.

conf := `
global:
resolve_timeout: %v
route:
receiver: "default"
group_by: []
Expand Down Expand Up @@ -133,7 +137,7 @@ inhibit_rules:
co := at.Collector("webhook")
wh := NewWebhook(t, co)

amc := at.AlertmanagerCluster(fmt.Sprintf(conf, wh.Address()), 1)
amc := at.AlertmanagerCluster(fmt.Sprintf(conf, resolveTimeout, wh.Address()), 1)

amc.Push(At(1), Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa"))
amc.Push(At(1), Alert("alertname", "JobDown", "job", "testjob", "zone", "aa"))
Expand All @@ -142,7 +146,7 @@ inhibit_rules:
amc.Push(At(2.6), Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa").Active(1, 2.6))

co.Want(Between(2, 2.5),
Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(1),
Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(1, 1+resolveTimeout.Seconds()),
)

co.Want(Between(3, 3.5),
Expand Down
Loading

0 comments on commit 7903fc8

Please sign in to comment.