Skip to content

Commit

Permalink
Add metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
swift1337 committed Dec 20, 2024
1 parent 60c166d commit 6cc8e43
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 9 deletions.
29 changes: 29 additions & 0 deletions pkg/scheduler/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package scheduler

import (
"time"

"github.com/zeta-chain/node/zetaclient/metrics"
)

// Note that currently the hard-coded "global" metrics are used.
func recordMetrics(task *Task, startedAt time.Time, err error, skipped bool) {
var status string
switch {
case skipped:
status = "skipped"
case err != nil:
status = "failed"
default:
status = "ok"
}

var (
group = string(task.group)
name = task.name
dur = time.Since(startedAt).Seconds()
)

metrics.SchedulerTaskInvocationCounter.WithLabelValues(status, group, name).Inc()
metrics.SchedulerTaskExecutionDuration.WithLabelValues(status, group, name).Observe(dur)
}
13 changes: 4 additions & 9 deletions pkg/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,22 +163,17 @@ func (t *Task) Stop() {

// execute executes Task with additional logging and metrics.
func (t *Task) execute(ctx context.Context) error {
startedAt := time.Now().UTC()

// skip tick
if t.skipper != nil && t.skipper() {
recordMetrics(t, startedAt, nil, true)
return nil
}

t.logger.Debug().Msg("Invoking task")

err := t.exec(ctx)

// todo metrics (TBD)
// - duration (time taken)
// - outcome (skip, err, ok)
// - bump invocation counter
// - "last invoked at" timestamp (?)
// - chain_id
// - metrics cardinality: "task_group (?)" "task_name", "status", "chain_id"
recordMetrics(t, startedAt, err, false)

return err
}
Expand Down
21 changes: 21 additions & 0 deletions zetaclient/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,27 @@ var (
Name: "num_connected_peers",
Help: "The number of connected peers (authenticated keygen peers)",
})

// SchedulerTaskInvocationCounter tracks invocations categorized by status, group, and name
SchedulerTaskInvocationCounter = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: ZetaClientNamespace,
Name: "scheduler_task_invocations_total",
Help: "Total number of task invocations",
},
[]string{"status", "task_group", "task_name"},
)

// SchedulerTaskExecutionDuration measures the execution duration of tasks
SchedulerTaskExecutionDuration = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: ZetaClientNamespace,
Name: "scheduler_task_duration_seconds",
Help: "Histogram of task execution duration in seconds",
Buckets: []float64{0.05, 0.1, 0.2, 0.3, 0.5, 1, 1.5, 2, 3, 5, 7.5, 10, 15}, // 50ms to 15s
},
[]string{"status", "task_group", "task_name"},
)
)

// NewMetrics creates a new Metrics instance
Expand Down

0 comments on commit 6cc8e43

Please sign in to comment.