Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
zoetrope committed Sep 7, 2023
1 parent c798e29 commit f6fcafa
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 56 deletions.
63 changes: 61 additions & 2 deletions metrics/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ type storage interface {

// NewCollector returns a new prometheus.Collector.
func NewCollector(client *v3.Client) prometheus.Collector {

storage := &cke.Storage{Client: client}
return &collector{
metrics: map[string]metricGroup{
"leader": {
Expand All @@ -55,15 +57,15 @@ func NewCollector(client *v3.Client) prometheus.Collector {
isAvailable: isOperationPhaseAvailable,
},
"reboot": {
collectors: []prometheus.Collector{rebootQueueEntries, rebootQueueItems, nodeRebootStatus},
collectors: []prometheus.Collector{nodeMetricsCollector{storage}},
isAvailable: isRebootAvailable,
},
"sabakan_integration": {
collectors: []prometheus.Collector{sabakanIntegrationSuccessful, sabakanIntegrationTimestampSeconds, sabakanWorkers, sabakanUnusedMachines},
isAvailable: isSabakanIntegrationAvailable,
},
},
storage: &cke.Storage{Client: client},
storage: storage,
}
}

Expand Down Expand Up @@ -120,3 +122,60 @@ func (c collector) Collect(ch chan<- prometheus.Metric) {
}
wg.Wait()
}

type nodeMetricsCollector struct {
storage *cke.Storage
}

var _ prometheus.Collector = &nodeMetricsCollector{}

func (c nodeMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- rebootQueueEntries
ch <- rebootQueueItems
ch <- nodeRebootStatus
}

func (c nodeMetricsCollector) Collect(ch chan<- prometheus.Metric) {
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()

rqEntries, err := c.storage.GetRebootsEntries(ctx)
if err != nil {
return
}
cluster, err := c.storage.GetCluster(ctx)
if err != nil {
return
}
itemCounts := cke.CountRebootQueueEntries(rqEntries)
nodeStatus := cke.BuildNodeRebootStatus(cluster.Nodes, rqEntries)

ch <- prometheus.MustNewConstMetric(
rebootQueueEntries,
prometheus.GaugeValue,
float64(len(rqEntries)),
)
for status, count := range itemCounts {
ch <- prometheus.MustNewConstMetric(
rebootQueueItems,
prometheus.GaugeValue,
float64(count),
status,
)
}
for node, statuses := range nodeStatus {
for status, matches := range statuses {
value := float64(0)
if matches {
value = 1
}
ch <- prometheus.MustNewConstMetric(
nodeRebootStatus,
prometheus.GaugeValue,
value,
node,
status,
)
}
}
}
32 changes: 14 additions & 18 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,25 @@ var operationPhaseTimestampSeconds = prometheus.NewGauge(
},
)

var rebootQueueEntries = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "reboot_queue_entries",
Help: "The number of reboot queue entries remaining.",
},
var rebootQueueEntries = prometheus.NewDesc(
"reboot_queue_entries",
"The number of reboot queue entries remaining.",
nil,
nil,
)

var rebootQueueItems = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "reboot_queue_items",
Help: "The number of reboot queue entries remaining per status.",
},
var rebootQueueItems = prometheus.NewDesc(
"reboot_queue_items",
"The number of reboot queue entries remaining per status.",
[]string{"status"},
nil,
)

var nodeRebootStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "node_reboot_status",
Help: "The reboot status of a node.",
}, []string{"node", "status"},
var nodeRebootStatus = prometheus.NewDesc(
"node_reboot_status",
"The reboot status of a node.",
[]string{"node", "status"},
nil,
)

var sabakanIntegrationSuccessful = prometheus.NewGauge(
Expand Down
30 changes: 0 additions & 30 deletions metrics/updater.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,36 +39,6 @@ func isOperationPhaseAvailable(_ context.Context, _ storage) (bool, error) {
return isLeader, nil
}

// UpdateRebootQueueEntries updates "reboot_queue_entries".
func UpdateRebootQueueEntries(numEntries int) {
rebootQueueEntries.Set(float64(numEntries))
}

// UpdateRebootQueueItems updates "reboot_queue_items".
func UpdateRebootQueueItems(counts map[string]int) {
for status, count := range counts {
rebootQueueItems.With(map[string]string{
"status": status,
}).Set(float64(count))
}
}

// UpdateNodeRebootStatus updates "node_reboot_status".
func UpdateNodeRebootStatus(nodeStatus map[string]map[string]bool) {
for node, statuses := range nodeStatus {
for status, matches := range statuses {
value := float64(0)
if matches {
value = 1
}
nodeRebootStatus.With(map[string]string{
"node": node,
"status": status,
}).Set(value)
}
}
}

func isRebootAvailable(_ context.Context, _ storage) (bool, error) {
return isLeader, nil
}
Expand Down
6 changes: 0 additions & 6 deletions metrics/updater_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,6 @@ func testUpdateRebootQueueEntries(t *testing.T) {
collector, _ := newTestCollector()
handler := GetHandler(collector)

UpdateRebootQueueEntries(tt.input)

w := httptest.NewRecorder()
req := httptest.NewRequest("GET", "/metrics", nil)
handler.ServeHTTP(w, req)
Expand Down Expand Up @@ -343,8 +341,6 @@ func testUpdateRebootQueueItems(t *testing.T) {
collector, _ := newTestCollector()
handler := GetHandler(collector)

UpdateRebootQueueItems(tt.input)

w := httptest.NewRecorder()
req := httptest.NewRequest("GET", "/metrics", nil)
handler.ServeHTTP(w, req)
Expand Down Expand Up @@ -396,8 +392,6 @@ func testUpdateNodeRebootStatus(t *testing.T) {
collector, _ := newTestCollector()
handler := GetHandler(collector)

UpdateNodeRebootStatus(input)

w := httptest.NewRecorder()
req := httptest.NewRequest("GET", "/metrics", nil)
handler.ServeHTTP(w, req)
Expand Down
2 changes: 2 additions & 0 deletions sabakan/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ func MachineToNode(m *Machine, tmpl *cke.Node) *cke.Node {
n.Labels["cke.cybozu.com/rack"] = strconv.Itoa(m.Spec.Rack)
n.Labels["cke.cybozu.com/index-in-rack"] = strconv.Itoa(m.Spec.IndexInRack)
n.Labels["cke.cybozu.com/role"] = m.Spec.Role
n.Labels["cke.cybozu.com/retire-date"] = m.Spec.RetireDate.Format("2006-01")
n.Labels["cke.cybozu.com/register-date"] = m.Spec.RegisterDate.Format("2006-01")
n.Labels["node-role.kubernetes.io/"+m.Spec.Role] = "true"
if n.ControlPlane {
n.Labels["node-role.kubernetes.io/master"] = "true"
Expand Down

0 comments on commit f6fcafa

Please sign in to comment.