From 6f1592c4d24cfc1c2d02e01b0b657d4b43a1f3b9 Mon Sep 17 00:00:00 2001 From: jay-mckay Date: Thu, 5 Sep 2024 13:36:01 -0600 Subject: [PATCH] add optional process metrics --- go.mod | 4 +- go.sum | 4 +- main.go | 23 ++++++----- metrics.go | 113 +++++++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 117 insertions(+), 27 deletions(-) diff --git a/go.mod b/go.mod index 0a71ccf..93d5e1b 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/coreos/go-systemd/v22 v22.5.0 github.com/godbus/dbus/v5 v5.1.0 github.com/prometheus/client_golang v1.20.3 + github.com/prometheus/procfs v0.15.1 ) require ( @@ -17,7 +18,8 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.55.0 // indirect - github.com/prometheus/procfs v0.15.1 // indirect golang.org/x/sys v0.22.0 // indirect google.golang.org/protobuf v1.34.2 // indirect ) + +replace github.com/coreos/go-systemd/v22 => github.com/jay-mckay/go-systemd/v22 v22.0.0 diff --git a/go.sum b/go.sum index e9be645..12dde30 100644 --- a/go.sum +++ b/go.sum @@ -2,13 +2,13 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= -github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/jay-mckay/go-systemd/v22 v22.0.0 h1:nXxwYusmnPs+IvtAPhMibtBpaiK6LAz2YnQFPUytMms= +github.com/jay-mckay/go-systemd/v22 v22.0.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= diff --git a/main.go b/main.go index ad80422..12d43cf 100644 --- a/main.go +++ b/main.go @@ -17,31 +17,32 @@ func authorize(next http.Handler, secret string) http.Handler { }) } -func newHandler() http.Handler { - mux := http.NewServeMux() - mux.Handle("/control", ControlHandler) - mux.Handle("/metrics", MetricsHandler) - mux.Handle("/", http.NotFoundHandler()) - var handler http.Handler = mux - return handler -} - func main() { var ( + pattern string listenAddr string certFile string keyFile string bearerToken string insecure bool + collectProc bool ) + flag.StringVar(&pattern, "pattern", "user-*.slice", "unit pattern to match units on") flag.StringVar(&listenAddr, "listenAddr", ":2112", "address to listen on for telemetry") flag.StringVar(&certFile, "certFile", "", "file containing certificate to use for tls") flag.StringVar(&keyFile, "keyFile", "", "file containing key to use for tls") flag.StringVar(&bearerToken, "bearerToken", "", "bearer token to use for authentication") flag.BoolVar(&insecure, "insecure", false, "disable tls and bearer token authentication") + flag.BoolVar(&collectProc, "collectProc", false, "enable the collection of process metrics") flag.Parse() + mux := http.NewServeMux() + mux.Handle("/control", ControlHandler) + mux.Handle("/metrics", MetricsHandler(pattern, collectProc)) + mux.Handle("/", http.NotFoundHandler()) + var handler http.Handler = mux + if !insecure { if certFile == "" { log.Fatal("certificate required for use with tls") @@ -53,11 +54,9 @@ func main() { log.Fatal("token of length > 16 required for authentication") } - handler := authorize(newHandler(), bearerToken) - log.Fatal(http.ListenAndServeTLS(listenAddr, certFile, keyFile, handler)) + log.Fatal(http.ListenAndServeTLS(listenAddr, certFile, keyFile, authorize(handler, bearerToken))) } else { - handler := newHandler() log.Println("running in insecure mode") log.Fatal(http.ListenAndServe(listenAddr, handler)) } diff --git a/metrics.go b/metrics.go index 17f37ba..00152fd 100644 --- a/metrics.go +++ b/metrics.go @@ -10,23 +10,26 @@ import ( systemd "github.com/coreos/go-systemd/v22/dbus" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/prometheus/procfs" ) -var MetricsHandler = http.HandlerFunc(metricsHandler) - -func metricsHandler(w http.ResponseWriter, r *http.Request) { - registry := prometheus.NewRegistry() - collector := NewCollector("user-*.slice") - registry.MustRegister(collector) - h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{}) - h.ServeHTTP(w, r) +func MetricsHandler(pattern string, collectProc bool) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + registry := prometheus.NewRegistry() + collector := NewCollector(pattern, collectProc) + registry.MustRegister(collector) + h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{}) + h.ServeHTTP(w, r) + } } var namespace = "systemd_unit" var labels = []string{"unit", "username"} +var procLabels = []string{"unit", "username", "proc"} type Collector struct { pattern string + collectProc bool memoryAccounting *prometheus.Desc memoryMax *prometheus.Desc memoryMin *prometheus.Desc @@ -36,6 +39,9 @@ type Collector struct { cpuAccounting *prometheus.Desc cpuUsage *prometheus.Desc cpuQuota *prometheus.Desc + procCPU *prometheus.Desc + procMemory *prometheus.Desc + procCount *prometheus.Desc } type Metric struct { @@ -50,11 +56,19 @@ type Metric struct { cpuQuota uint64 unit string username string + processes map[string]*Process } -func NewCollector(pattern string) *Collector { +type Process struct { + cpu float64 + memory uint64 + count uint64 +} + +func NewCollector(pattern string, collectProc bool) *Collector { return &Collector{ - pattern: pattern, + pattern: pattern, + collectProc: collectProc, memoryAccounting: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memory", "accounting"), "Whether memory accounting is enabled", labels, nil), memoryMax: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memory", "max_bytes"), @@ -69,10 +83,16 @@ func NewCollector(pattern string) *Collector { "Resident shared size memory usage", labels, nil), cpuAccounting: prometheus.NewDesc(prometheus.BuildFQName(namespace, "cpu", "accounting"), "Whether CPU accounting is enabled", labels, nil), - cpuUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "cpu", "user_seconds"), + cpuUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "cpu", "usage_ns"), "Total CPU usage", labels, nil), - cpuQuota: prometheus.NewDesc(prometheus.BuildFQName(namespace, "cpu", "quota_seconds_per_second"), + cpuQuota: prometheus.NewDesc(prometheus.BuildFQName(namespace, "cpu", "quota_ns_per_s"), "CPU Quota", labels, nil), + procCPU: prometheus.NewDesc(prometheus.BuildFQName(namespace, "proc", "cpu_seconds"), + "Aggregate CPU usage for this process", procLabels, nil), + procMemory: prometheus.NewDesc(prometheus.BuildFQName(namespace, "proc", "memory_bytes"), + "Aggregate memory usage for this process", procLabels, nil), + procCount: prometheus.NewDesc(prometheus.BuildFQName(namespace, "proc", "count"), + "Instance count of this process", procLabels, nil), } } @@ -86,6 +106,11 @@ func (c *Collector) Describe(ch chan<- *prometheus.Desc) { ch <- c.cpuAccounting ch <- c.cpuUsage ch <- c.cpuQuota + if c.collectProc { + ch <- c.procCPU + ch <- c.procMemory + ch <- c.procCount + } } func (c *Collector) Collect(ch chan<- prometheus.Metric) { @@ -100,6 +125,13 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) { ch <- prometheus.MustNewConstMetric(c.cpuAccounting, prometheus.GaugeValue, b2f(m.cpuAccounting), m.unit, m.username) ch <- prometheus.MustNewConstMetric(c.cpuUsage, prometheus.CounterValue, float64(m.cpuUsage), m.unit, m.username) ch <- prometheus.MustNewConstMetric(c.cpuQuota, prometheus.CounterValue, float64(m.cpuQuota), m.unit, m.username) + if c.collectProc { + for name, p := range m.processes { + ch <- prometheus.MustNewConstMetric(c.procCPU, prometheus.GaugeValue, p.cpu, m.unit, m.username, name) + ch <- prometheus.MustNewConstMetric(c.procMemory, prometheus.GaugeValue, float64(p.memory), m.unit, m.username, name) + ch <- prometheus.MustNewConstMetric(c.procCount, prometheus.GaugeValue, float64(p.count), m.unit, m.username, name) + } + } } } @@ -139,11 +171,68 @@ func (c *Collector) collectMetrics() []Metric { unit: unit.Name, username: lookupUsername(unit), } + if c.collectProc { + procs, err := collectProcesses(conn, ctx, unit.Name) + if err != nil { + log.Println(err) + } else { + metric.processes = procs + } + } metrics = append(metrics, metric) } return metrics } +func collectProcesses(conn *systemd.Conn, ctx context.Context, unit string) (map[string]*Process, error) { + processes := make(map[string]*Process) + procs, err := conn.GetUnitProcesses(ctx, unit) + if err != nil { + return processes, err + } + + fs, err := procfs.NewDefaultFS() + if err != nil { + return processes, err + } + + for _, p := range procs { + proc, err := fs.Proc(int(p.PID)) + if err != nil { + log.Println(err) + continue + } + + comm, err := proc.Comm() + if err != nil { + log.Println(err) + continue + } + + stat, err := proc.Stat() + if err != nil { + log.Println(err) + continue + } + + smaps, err := proc.ProcSMapsRollup() + if err != nil { + log.Println(err) + continue + } + + val, ok := processes[comm] + if !ok { + processes[comm] = &Process{cpu: stat.CPUTime(), memory: smaps.Pss, count: 1} + } else { + val.cpu += stat.CPUTime() + val.memory += smaps.Pss + val.count += 1 + } + } + return processes, nil +} + func lookupUsername(unit systemd.UnitStatus) string { pattern := `^user-(\d+)\.slice$` re := regexp.MustCompile(pattern)