Skip to content

Commit

Permalink
Merge branch 'v10.4.x-logzio' into DEV-47164-add-alerting-observabili…
Browse files Browse the repository at this point in the history
…ty-in-logs
  • Loading branch information
yasmin-tr committed Jan 5, 2025
2 parents 0cd5e6a + 0e5c976 commit eaaf6d9
Show file tree
Hide file tree
Showing 14 changed files with 157 additions and 18 deletions.
5 changes: 5 additions & 0 deletions conf/defaults.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1233,6 +1233,11 @@ disable_jitter = false
# The url set to send alerts for external notifications on the LogzioAlertsRouter. If you put empty string it will not send, only log.
logzio_alerts_route_url =

# LOGZ.IO GRAFANA CHANGE :: DEV-47243 Handle state cache inconsistency on eval
# The base interval of the scheduler for evaluating alerts. The default value is 10s
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
scheduler_tick_interval =


[unified_alerting.screenshots]
# Enable screenshots in notifications. You must have either installed the Grafana image rendering
Expand Down
1 change: 1 addition & 0 deletions custom.ini
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,4 @@ custom_endpoint = log
prometheusPromQAIL = false
publicDashboards = false
autoMigratePiechartPanel = true
configurableSchedulerTick = true
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,6 @@ replace xorm.io/xorm => ./pkg/util/xorm
// This is required in order to get notification delivery errors from the receivers API.
replace github.com/prometheus/alertmanager => github.com/grafana/prometheus-alertmanager v0.25.1-0.20240208102907-e82436ce63e6

replace github.com/grafana/alerting => github.com/logzio/data-viz-alerting v0.0.0-20240926134858-3220ec2366dc
replace github.com/grafana/alerting => github.com/logzio/data-viz-alerting v0.0.0-20241201143551-1edf298a1813

exclude github.com/mattn/go-sqlite3 v2.0.3+incompatible
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2892,8 +2892,8 @@ github.com/linkedin/goavro/v2 v2.10.0 h1:eTBIRoInBM88gITGXYtUSqqxLTFXfOsJBiX8ZMW
github.com/linkedin/goavro/v2 v2.10.0/go.mod h1:UgQUb2N/pmueQYH9bfqFioWxzYCZXSfF8Jw03O5sjqA=
github.com/linode/linodego v1.25.0 h1:zYMz0lTasD503jBu3tSRhzEmXHQN1zptCw5o71ibyyU=
github.com/linode/linodego v1.25.0/go.mod h1:BMZI0pMM/YGjBis7pIXDPbcgYfCZLH0/UvzqtsGtG1c=
github.com/logzio/data-viz-alerting v0.0.0-20240926134858-3220ec2366dc h1:PwLcwpAa5kpbAOX8i6eeEJsLemW84IDopjjY3F6Enrc=
github.com/logzio/data-viz-alerting v0.0.0-20240926134858-3220ec2366dc/go.mod h1:brTFeACal/cSZAR8XO/4LPKs7rzNfS86okl6QjSP1eY=
github.com/logzio/data-viz-alerting v0.0.0-20241201143551-1edf298a1813 h1:V03fxqQImWJJ6s3Ep6ptn6qDFCyPSxRrRxZTsLzFXmU=
github.com/logzio/data-viz-alerting v0.0.0-20241201143551-1edf298a1813/go.mod h1:brTFeACal/cSZAR8XO/4LPKs7rzNfS86okl6QjSP1eY=
github.com/luna-duclos/instrumentedsql v0.0.0-20181127104832-b7d587d28109/go.mod h1:PWUIzhtavmOR965zfawVsHXbEuU1G29BPZ/CB3C7jXk=
github.com/luna-duclos/instrumentedsql v1.1.2/go.mod h1:4LGbEqDnopzNAiyxPPDXhLspyunZxgPTMJBKtC6U0BQ=
github.com/luna-duclos/instrumentedsql v1.1.3/go.mod h1:9J1njvFds+zN7y85EDhN9XNQLANWwZt2ULeIC8yMNYs=
Expand Down
4 changes: 4 additions & 0 deletions pkg/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,10 @@ func (hs *HTTPServer) registerRoutes() {
datasourceRoute.Get("/uid/:uid", authorize(ac.EvalPermission(datasources.ActionRead, uidScope)), routing.Wrap(hs.GetDataSourceByUID))
datasourceRoute.Get("/name/:name", authorize(ac.EvalPermission(datasources.ActionRead, nameScope)), routing.Wrap(hs.GetDataSourceByName))
datasourceRoute.Get("/id/:name", authorize(ac.EvalPermission(datasources.ActionIDRead, nameScope)), routing.Wrap(hs.GetDataSourceIdByName))
// LOGZ.IO GRAFANA CHANGE :: DEV-46879 - Create endpoints to return summary of datasources
datasourceRoute.Get("/summary", authorize(ac.EvalPermission(datasources.ActionRead)), routing.Wrap(hs.GetDataSourcesSummary))
datasourceRoute.Get("/name/:name/summary", authorize(ac.EvalPermission(datasources.ActionRead, nameScope)), routing.Wrap(hs.GetDataSourceSummaryByName))
// LOGZ.IO GRAFANA CHANGE :: End
})

pluginIDScope := pluginaccesscontrol.ScopeProvider.GetResourceScope(ac.Parameter(":pluginId"))
Expand Down
76 changes: 76 additions & 0 deletions pkg/api/datasources.go
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,82 @@ func (hs *HTTPServer) GetDataSourceIdByName(c *contextmodel.ReqContext) response
return response.JSON(http.StatusOK, &dtos)
}

// LOGZ.IO GRAFANA CHANGE :: DEV-46879 - Create endpoints to return summary of datasources

// swagger:route GET /api/datasources/summary datasources getDataSourcesSummary
//
// Get data sources summary.
//
// Responses:
// 200: getDataSourceSummaryResponse
// 401: unauthorisedError
// 403: forbiddenError
// 404: notFoundError
// 500: internalServerError
func (hs *HTTPServer) GetDataSourcesSummary(c *contextmodel.ReqContext) response.Response {
query := datasources.GetDataSourcesQuery{OrgID: c.SignedInUser.GetOrgID(), DataSourceLimit: hs.Cfg.DataSourceLimit}

dataSources, err := hs.DataSourcesService.GetDataSources(c.Req.Context(), &query)
if err != nil {
return response.Error(500, "Failed to query datasources", err)
}

filtered, err := hs.dsGuardian.New(c.SignedInUser.OrgID, c.SignedInUser).FilterDatasourcesByQueryPermissions(dataSources)
if err != nil {
return response.Error(500, "Failed to query datasources", err)
}

result := make(dtos.DataSourceSummaryList, 0)
for _, ds := range filtered {
dsItem := dtos.DataSourceSummaryListItemDTO{
Id: ds.ID,
UID: ds.UID,
Name: ds.Name,
Type: ds.Type,
Database: ds.Database,
}

result = append(result, dsItem)
}

sort.Sort(result)

return response.JSON(http.StatusOK, &result)
}

// swagger:route GET /api/datasources/name/:name/summary datasources getDataSourcesSummaryByName
//
// Get data sources summary.
//
// Responses:
// 200: getDataSourceSummaryByNameResponse
// 401: unauthorisedError
// 403: forbiddenError
// 404: notFoundError
// 500: internalServerError
func (hs *HTTPServer) GetDataSourceSummaryByName(c *contextmodel.ReqContext) response.Response {
query := datasources.GetDataSourceQuery{Name: web.Params(c.Req)[":name"], OrgID: c.SignedInUser.GetOrgID()}

dataSource, err := hs.DataSourcesService.GetDataSource(c.Req.Context(), &query)
if err != nil {
if errors.Is(err, datasources.ErrDataSourceNotFound) {
return response.Error(404, "Data source not found", nil)
}
return response.Error(500, "Failed to query datasources", err)
}

dto := dtos.DataSourceSummaryListItemDTO{
Id: dataSource.ID,
UID: dataSource.UID,
Name: dataSource.Name,
Type: dataSource.Type,
Database: dataSource.Database,
}
return response.JSON(http.StatusOK, &dto)
}

// LOGZ.IO GRAFANA CHANGE :: End

// swagger:route GET /datasources/{id}/resources/{datasource_proxy_route} datasources callDatasourceResourceByID
//
// Fetch data source resources by Id.
Expand Down
25 changes: 25 additions & 0 deletions pkg/api/dtos/datasource.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,28 @@ func (slice DataSourceList) Less(i, j int) bool {
func (slice DataSourceList) Swap(i, j int) {
slice[i], slice[j] = slice[j], slice[i]
}

// LOGZ.IO GRAFANA CHANGE :: DEV-46879 - Create endpoints to return summary of datasources
type DataSourceSummaryListItemDTO struct {
Id int64 `json:"id"`
UID string `json:"uid"`
Name string `json:"name"`
Type string `json:"type"`
Database string `json:"database"`
}

type DataSourceSummaryList []DataSourceSummaryListItemDTO

func (slice DataSourceSummaryList) Len() int {
return len(slice)
}

func (slice DataSourceSummaryList) Less(i, j int) bool {
return strings.ToLower(slice[i].Name) < strings.ToLower(slice[j].Name)
}

func (slice DataSourceSummaryList) Swap(i, j int) {
slice[i], slice[j] = slice[j], slice[i]
}

// LOGZ.IO GRAFANA CHANGE :: End
21 changes: 12 additions & 9 deletions pkg/services/ngalert/api/alerting_logzio.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/schedule"
"github.com/grafana/grafana/pkg/setting"
"net/http"
"time"
)

type LogzioAlertingService struct {
Expand Down Expand Up @@ -42,7 +43,8 @@ func NewLogzioAlertingService(

func (srv *LogzioAlertingService) RouteEvaluateAlert(c *contextmodel.ReqContext, evalRequests []apimodels.AlertEvaluationRequest) response.Response {
c.Logger.Info(fmt.Sprintf("Evaluate Alert API: got requests for %d evaluations", len(evalRequests)))
var evaluationsErrors []apimodels.AlertEvalRunResult

var results []apimodels.AlertEvalRunResult

for _, evalRequest := range evalRequests {
c.Logger.Info("Evaluate Alert API", "eval_time", evalRequest.EvalTime, "rule_title", evalRequest.AlertRule.Title, "rule_uid", evalRequest.AlertRule.UID, "org_id", evalRequest.AlertRule.OrgID)
Expand All @@ -53,17 +55,18 @@ func (srv *LogzioAlertingService) RouteEvaluateAlert(c *contextmodel.ReqContext,
FolderTitle: evalRequest.FolderTitle,
LogzHeaders: srv.addQuerySourceHeader(c),
}
err := srv.Schedule.RunRuleEvaluation(c.Req.Context(), evalReq)

if err != nil {
evaluationsErrors = append(evaluationsErrors, apimodels.AlertEvalRunResult{UID: evalRequest.AlertRule.UID, EvalTime: evalRequest.EvalTime, RunResult: err.Error()})
} else {
evaluationsErrors = append(evaluationsErrors, apimodels.AlertEvalRunResult{UID: evalRequest.AlertRule.UID, EvalTime: evalRequest.EvalTime, RunResult: "success"})
}
var step = evalRequest.AlertRule.ID % 30

time.AfterFunc(time.Duration(step * time.Second.Nanoseconds()), func() {
srv.Schedule.RunRuleEvaluation(c.Req.Context(), evalReq)
})

results = append(results, apimodels.AlertEvalRunResult{UID: evalRequest.AlertRule.UID, EvalTime: evalRequest.EvalTime, RunResult: "success"})
}

c.Logger.Info("Evaluate Alert API - Done", "evalErrors", evaluationsErrors)
return response.JSON(http.StatusOK, apimodels.EvalRunsResponse{RunResults: evaluationsErrors})
c.Logger.Info("Evaluate Alert API - Done", "results", results)
return response.JSON(http.StatusOK, apimodels.EvalRunsResponse{RunResults: results})
}

func (srv *LogzioAlertingService) addQuerySourceHeader(c *contextmodel.ReqContext) http.Header {
Expand Down
2 changes: 1 addition & 1 deletion pkg/services/ngalert/ngalert.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ func (ng *AlertNG) init() error {
statePersister = state.NewAsyncStatePersister(logger, ticker, cfg)
}
stateManager := state.NewManager(cfg, statePersister)
scheduler := schedule.NewScheduler(schedCfg, stateManager)
scheduler := schedule.NewScheduler(schedCfg, stateManager, ng.store) // LOGZ.IO GRAFANA CHANGE :: DEV-47243 Handle state cache inconsistency on eval - warm cache in scheduler as temporary solution

// if it is required to include folder title to the alerts, we need to subscribe to changes of alert title
if !ng.Cfg.UnifiedAlerting.ReservedLabels.IsReservedLabelDisabled(models.FolderTitleLabel) {
Expand Down
7 changes: 6 additions & 1 deletion pkg/services/ngalert/schedule/schedule.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"github.com/grafana/grafana/pkg/services/ngalert/store" // LOGZ.IO GRAFANA CHANGE :: DEV-47243 Handle state cache inconsistency on eval - warm cache in scheduler as temporary solution
"net/url"
"time"

Expand Down Expand Up @@ -99,6 +100,8 @@ type schedule struct {
tracer tracing.Tracer

scheduledEvalEnabled bool // LOGZ.IO GRAFANA CHANGE :: DEV-43744 Add scheduled evaluation enabled config

store *store.DBstore // LOGZ.IO GRAFANA CHANGE :: DEV-47243 Handle state cache inconsistency on eval - warm cache in scheduler as temporary solution
}

// SchedulerCfg is the scheduler configuration.
Expand All @@ -120,7 +123,7 @@ type SchedulerCfg struct {
}

// NewScheduler returns a new schedule.
func NewScheduler(cfg SchedulerCfg, stateManager *state.Manager) *schedule {
func NewScheduler(cfg SchedulerCfg, stateManager *state.Manager, store *store.DBstore) *schedule { // LOGZ.IO GRAFANA CHANGE :: DEV-47243 Handle state cache inconsistency on eval - warm cache in scheduler as temporary solution
const minMaxAttempts = int64(1)
if cfg.MaxAttempts < minMaxAttempts {
cfg.Log.Warn("Invalid scheduler maxAttempts, using a safe minimum", "configured", cfg.MaxAttempts, "actual", minMaxAttempts)
Expand All @@ -145,6 +148,7 @@ func NewScheduler(cfg SchedulerCfg, stateManager *state.Manager) *schedule {
alertsSender: cfg.AlertSender,
tracer: cfg.Tracer,
scheduledEvalEnabled: cfg.ScheduledEvalEnabled, // LOGZ.IO GRAFANA CHANGE :: DEV-43744 Add scheduled evaluation enabled config
store: store, // LOGZ.IO GRAFANA CHANGE :: DEV-47243 Handle state cache inconsistency on eval - warm cache in scheduler as temporary solution
}

return &sch
Expand Down Expand Up @@ -352,6 +356,7 @@ func (sch *schedule) processTick(ctx context.Context, dispatcherGroup *errgroup.
toDelete = append(toDelete, key)
}
sch.deleteAlertRule(toDelete...)
sch.stateManager.Warm(ctx, sch.store) // LOGZ.IO GRAFANA CHANGE :: DEV-47243 Handle state cache inconsistency on eval - warm cache in scheduler as temporary solution
return readyToRun, registeredDefinitions, updatedRules
}

Expand Down
5 changes: 3 additions & 2 deletions pkg/services/ngalert/schedule/schedule_unit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/grafana/grafana/pkg/services/ngalert/store" // LOGZ.IO GRAFANA CHANGE :: DEV-47243 Handle state cache inconsistency on eval - warm cache in scheduler as temporary solution
"math/rand"
"net/url"
"testing"
Expand Down Expand Up @@ -94,7 +95,7 @@ func TestProcessTicks(t *testing.T) {
}
st := state.NewManager(managerCfg, state.NewNoopPersister())

sched := NewScheduler(schedCfg, st)
sched := NewScheduler(schedCfg, st, &store.DBstore{}) // LOGZ.IO GRAFANA CHANGE :: DEV-47243 Handle state cache inconsistency on eval - warm cache in scheduler as temporary solution

evalAppliedCh := make(chan evalAppliedInfo, 1)
stopAppliedCh := make(chan models.AlertRuleKey, 1)
Expand Down Expand Up @@ -921,7 +922,7 @@ func setupScheduler(t *testing.T, rs *fakeRulesStore, is *state.FakeInstanceStor
syncStatePersister := state.NewSyncStatePersisiter(log.New("ngalert.state.manager.perist"), managerCfg)
st := state.NewManager(managerCfg, syncStatePersister)

return NewScheduler(schedCfg, st)
return NewScheduler(schedCfg, st, &store.DBstore{}) // LOGZ.IO GRAFANA CHANGE :: DEV-47243 Handle state cache inconsistency on eval - warm cache in scheduler as temporary solution
}

func withQueryForState(t *testing.T, evalResult eval.State) models.AlertRuleMutator {
Expand Down
16 changes: 16 additions & 0 deletions pkg/tests/api/alerting/api_notification_channel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2505,6 +2505,14 @@ var expEmailNotifications = []*notifications.SendEmailCommandSync{
PanelURL: "",
Values: map[string]float64{"A": 1},
ValueString: "[ var='A' labels={} value=1 ]",
EvalValues: []alertingTemplates.EvalValue{
{
Var: "A",
Metric: "",
Labels: "{}",
Value: "1",
},
},
},
},
"GroupLabels": template.KV{"alertname": "EmailAlert"},
Expand Down Expand Up @@ -2665,6 +2673,14 @@ var expNonEmailNotifications = map[string][]string{
"startsAt": "%s",
"values": {"A": 1},
"valueString": "[ var='A' labels={} value=1 ]",
"evalValues": [
{
"Var": "A",
"Metric": "",
"Labels": "{}",
"Value": "1"
}
],
"endsAt": "0001-01-01T00:00:00Z",
"generatorURL": "http://localhost:3000/alerting/grafana/UID_WebhookAlert/view",
"fingerprint": "15c59b0a380bd9f1",
Expand Down
3 changes: 2 additions & 1 deletion pkg/tests/api/alerting/api_ruler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -782,7 +782,7 @@ func TestAlertRulePostExport(t *testing.T) {
pathsToIgnore := []string{
"Groups.Rules.UID",
"Groups.Folder",
"Data.Model", // Model is not amended with default values
"Data.Model", // Model is not amended with default values
"Groups.Rules.ExecErrState", // LOGZ.IO GRAFANA CHANGE :: DEV-46410 - Change default ExecErrState to OK and enforce OK value
}

Expand Down Expand Up @@ -1763,6 +1763,7 @@ func TestIntegrationRulePause(t *testing.T) {
}

func TestIntegrationHysteresisRule(t *testing.T) {
t.Skip("Skip this test until the issue is resolved or warm cache temp fix is removed") // LOGZ.IO GRAFANA CHANGE :: DEV-47243 Handle state cache inconsistency on eval
testinfra.SQLiteIntegrationTest(t)

// Setup Grafana and its Database. Scheduler is set to evaluate every 1 second
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,10 @@ const AnnotationValue = ({ annotationKey, value, valueLink }: Props) => {
const tokenizeValue = <Tokenize input={value} delimiter={['{{', '}}']} />;

if (valueLink) {
// LOGZ.IO GRAFANA CHANGE :: DEV-47446 - open external links in new tab with valid url
const href = valueLink.match(/grafana-app/) ? valueLink : `/grafana-app${valueLink}`;
return (
<TextLink href={valueLink} external>
<TextLink href={href} external>
{value}
</TextLink>
);
Expand Down

0 comments on commit eaaf6d9

Please sign in to comment.