Skip to content

Commit

Permalink
add optional process metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
jay-mckay committed Sep 5, 2024
1 parent ea6db21 commit 6f1592c
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 27 deletions.
4 changes: 3 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/coreos/go-systemd/v22 v22.5.0
github.com/godbus/dbus/v5 v5.1.0
github.com/prometheus/client_golang v1.20.3
github.com/prometheus/procfs v0.15.1
)

require (
Expand All @@ -17,7 +18,8 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
golang.org/x/sys v0.22.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
)

replace github.com/coreos/go-systemd/v22 => github.com/jay-mckay/go-systemd/v22 v22.0.0
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk=
github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/jay-mckay/go-systemd/v22 v22.0.0 h1:nXxwYusmnPs+IvtAPhMibtBpaiK6LAz2YnQFPUytMms=
github.com/jay-mckay/go-systemd/v22 v22.0.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
Expand Down
23 changes: 11 additions & 12 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,32 @@ func authorize(next http.Handler, secret string) http.Handler {
})
}

func newHandler() http.Handler {
mux := http.NewServeMux()
mux.Handle("/control", ControlHandler)
mux.Handle("/metrics", MetricsHandler)
mux.Handle("/", http.NotFoundHandler())
var handler http.Handler = mux
return handler
}

func main() {
var (
pattern string
listenAddr string
certFile string
keyFile string
bearerToken string
insecure bool
collectProc bool
)

flag.StringVar(&pattern, "pattern", "user-*.slice", "unit pattern to match units on")
flag.StringVar(&listenAddr, "listenAddr", ":2112", "address to listen on for telemetry")
flag.StringVar(&certFile, "certFile", "", "file containing certificate to use for tls")
flag.StringVar(&keyFile, "keyFile", "", "file containing key to use for tls")
flag.StringVar(&bearerToken, "bearerToken", "", "bearer token to use for authentication")
flag.BoolVar(&insecure, "insecure", false, "disable tls and bearer token authentication")
flag.BoolVar(&collectProc, "collectProc", false, "enable the collection of process metrics")
flag.Parse()

mux := http.NewServeMux()
mux.Handle("/control", ControlHandler)
mux.Handle("/metrics", MetricsHandler(pattern, collectProc))
mux.Handle("/", http.NotFoundHandler())
var handler http.Handler = mux

if !insecure {
if certFile == "" {
log.Fatal("certificate required for use with tls")
Expand All @@ -53,11 +54,9 @@ func main() {
log.Fatal("token of length > 16 required for authentication")
}

handler := authorize(newHandler(), bearerToken)
log.Fatal(http.ListenAndServeTLS(listenAddr, certFile, keyFile, handler))
log.Fatal(http.ListenAndServeTLS(listenAddr, certFile, keyFile, authorize(handler, bearerToken)))

} else {
handler := newHandler()
log.Println("running in insecure mode")
log.Fatal(http.ListenAndServe(listenAddr, handler))
}
Expand Down
113 changes: 101 additions & 12 deletions metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,26 @@ import (
systemd "github.com/coreos/go-systemd/v22/dbus"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/procfs"
)

var MetricsHandler = http.HandlerFunc(metricsHandler)

func metricsHandler(w http.ResponseWriter, r *http.Request) {
registry := prometheus.NewRegistry()
collector := NewCollector("user-*.slice")
registry.MustRegister(collector)
h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{})
h.ServeHTTP(w, r)
func MetricsHandler(pattern string, collectProc bool) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
registry := prometheus.NewRegistry()
collector := NewCollector(pattern, collectProc)
registry.MustRegister(collector)
h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{})
h.ServeHTTP(w, r)
}
}

var namespace = "systemd_unit"
var labels = []string{"unit", "username"}
var procLabels = []string{"unit", "username", "proc"}

type Collector struct {
pattern string
collectProc bool
memoryAccounting *prometheus.Desc
memoryMax *prometheus.Desc
memoryMin *prometheus.Desc
Expand All @@ -36,6 +39,9 @@ type Collector struct {
cpuAccounting *prometheus.Desc
cpuUsage *prometheus.Desc
cpuQuota *prometheus.Desc
procCPU *prometheus.Desc
procMemory *prometheus.Desc
procCount *prometheus.Desc
}

type Metric struct {
Expand All @@ -50,11 +56,19 @@ type Metric struct {
cpuQuota uint64
unit string
username string
processes map[string]*Process
}

func NewCollector(pattern string) *Collector {
type Process struct {
cpu float64
memory uint64
count uint64
}

func NewCollector(pattern string, collectProc bool) *Collector {
return &Collector{
pattern: pattern,
pattern: pattern,
collectProc: collectProc,
memoryAccounting: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memory", "accounting"),
"Whether memory accounting is enabled", labels, nil),
memoryMax: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memory", "max_bytes"),
Expand All @@ -69,10 +83,16 @@ func NewCollector(pattern string) *Collector {
"Resident shared size memory usage", labels, nil),
cpuAccounting: prometheus.NewDesc(prometheus.BuildFQName(namespace, "cpu", "accounting"),
"Whether CPU accounting is enabled", labels, nil),
cpuUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "cpu", "user_seconds"),
cpuUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "cpu", "usage_ns"),
"Total CPU usage", labels, nil),
cpuQuota: prometheus.NewDesc(prometheus.BuildFQName(namespace, "cpu", "quota_seconds_per_second"),
cpuQuota: prometheus.NewDesc(prometheus.BuildFQName(namespace, "cpu", "quota_ns_per_s"),
"CPU Quota", labels, nil),
procCPU: prometheus.NewDesc(prometheus.BuildFQName(namespace, "proc", "cpu_seconds"),
"Aggregate CPU usage for this process", procLabels, nil),
procMemory: prometheus.NewDesc(prometheus.BuildFQName(namespace, "proc", "memory_bytes"),
"Aggregate memory usage for this process", procLabels, nil),
procCount: prometheus.NewDesc(prometheus.BuildFQName(namespace, "proc", "count"),
"Instance count of this process", procLabels, nil),
}
}

Expand All @@ -86,6 +106,11 @@ func (c *Collector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.cpuAccounting
ch <- c.cpuUsage
ch <- c.cpuQuota
if c.collectProc {
ch <- c.procCPU
ch <- c.procMemory
ch <- c.procCount
}
}

func (c *Collector) Collect(ch chan<- prometheus.Metric) {
Expand All @@ -100,6 +125,13 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(c.cpuAccounting, prometheus.GaugeValue, b2f(m.cpuAccounting), m.unit, m.username)
ch <- prometheus.MustNewConstMetric(c.cpuUsage, prometheus.CounterValue, float64(m.cpuUsage), m.unit, m.username)
ch <- prometheus.MustNewConstMetric(c.cpuQuota, prometheus.CounterValue, float64(m.cpuQuota), m.unit, m.username)
if c.collectProc {
for name, p := range m.processes {
ch <- prometheus.MustNewConstMetric(c.procCPU, prometheus.GaugeValue, p.cpu, m.unit, m.username, name)
ch <- prometheus.MustNewConstMetric(c.procMemory, prometheus.GaugeValue, float64(p.memory), m.unit, m.username, name)
ch <- prometheus.MustNewConstMetric(c.procCount, prometheus.GaugeValue, float64(p.count), m.unit, m.username, name)
}
}
}
}

Expand Down Expand Up @@ -139,11 +171,68 @@ func (c *Collector) collectMetrics() []Metric {
unit: unit.Name,
username: lookupUsername(unit),
}
if c.collectProc {
procs, err := collectProcesses(conn, ctx, unit.Name)
if err != nil {
log.Println(err)
} else {
metric.processes = procs
}
}
metrics = append(metrics, metric)
}
return metrics
}

func collectProcesses(conn *systemd.Conn, ctx context.Context, unit string) (map[string]*Process, error) {
processes := make(map[string]*Process)
procs, err := conn.GetUnitProcesses(ctx, unit)
if err != nil {
return processes, err
}

fs, err := procfs.NewDefaultFS()
if err != nil {
return processes, err
}

for _, p := range procs {
proc, err := fs.Proc(int(p.PID))
if err != nil {
log.Println(err)
continue
}

comm, err := proc.Comm()
if err != nil {
log.Println(err)
continue
}

stat, err := proc.Stat()
if err != nil {
log.Println(err)
continue
}

smaps, err := proc.ProcSMapsRollup()
if err != nil {
log.Println(err)
continue
}

val, ok := processes[comm]
if !ok {
processes[comm] = &Process{cpu: stat.CPUTime(), memory: smaps.Pss, count: 1}
} else {
val.cpu += stat.CPUTime()
val.memory += smaps.Pss
val.count += 1
}
}
return processes, nil
}

func lookupUsername(unit systemd.UnitStatus) string {
pattern := `^user-(\d+)\.slice$`
re := regexp.MustCompile(pattern)
Expand Down

0 comments on commit 6f1592c

Please sign in to comment.