Skip to content

Commit

Permalink
export unit name in place of cgroup name
Browse files Browse the repository at this point in the history
  • Loading branch information
jay-mckay committed Dec 2, 2024
1 parent 72398ee commit 969e837
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 32 deletions.
51 changes: 33 additions & 18 deletions metrics/collector.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package metrics

import (
"fmt"
"log"
"net/http"
"os/user"
Expand All @@ -21,8 +22,8 @@ const (

var (
namespace = "cgroup_warden"
labels = []string{"cgroup", "username"}
procLabels = []string{"cgroup", "username", "proc"}
labels = []string{"username", "unit"}
procLabels = []string{"username", "unit", "proc"}
lock = sync.RWMutex{}
)

Expand All @@ -49,7 +50,7 @@ type Collector struct {
}

type Metric struct {
cgroup string
unit string
username string
memoryUsage uint64
cpuUsage float64
Expand All @@ -67,12 +68,12 @@ func (c *Collector) Describe(ch chan<- *prometheus.Desc) {
func (c *Collector) Collect(ch chan<- prometheus.Metric) {
stats := c.CollectMetrics()
for _, s := range stats {
ch <- prometheus.MustNewConstMetric(c.memoryUsage, prometheus.GaugeValue, float64(s.memoryUsage), s.cgroup, s.username)
ch <- prometheus.MustNewConstMetric(c.cpuUsage, prometheus.CounterValue, s.cpuUsage, s.cgroup, s.username)
ch <- prometheus.MustNewConstMetric(c.memoryUsage, prometheus.GaugeValue, float64(s.memoryUsage), s.username, s.unit)
ch <- prometheus.MustNewConstMetric(c.cpuUsage, prometheus.CounterValue, s.cpuUsage, s.username, s.unit)
for name, p := range s.processes {
ch <- prometheus.MustNewConstMetric(c.procCPU, prometheus.CounterValue, float64(p.cpu), s.cgroup, s.username, name)
ch <- prometheus.MustNewConstMetric(c.procMemory, prometheus.GaugeValue, float64(p.memory), s.cgroup, s.username, name)
ch <- prometheus.MustNewConstMetric(c.procCount, prometheus.GaugeValue, float64(p.count), s.cgroup, s.username, name)
ch <- prometheus.MustNewConstMetric(c.procCPU, prometheus.CounterValue, float64(p.cpu), s.username, s.unit, name)
ch <- prometheus.MustNewConstMetric(c.procMemory, prometheus.GaugeValue, float64(p.memory), s.username, s.unit, name)
ch <- prometheus.MustNewConstMetric(c.procCount, prometheus.GaugeValue, float64(p.count), s.username, s.unit, name)
}
}
}
Expand Down Expand Up @@ -109,7 +110,7 @@ type hierarchy interface {
GetGroupsWithPIDs() groupPIDMap

// creates a metric for the cgroup with the PID information
CreateMetric(cgroup string, pids pidSet) Metric
CreateMetric(cgroup string, pids pidSet) *Metric
}

func (c *Collector) CollectMetrics() []Metric {
Expand All @@ -131,9 +132,11 @@ func (c *Collector) CollectMetrics() []Metric {
go func(group string, procs map[uint64]bool) {
defer wg.Done()
metric := h.CreateMetric(group, pids)
lock.Lock()
metrics = append(metrics, metric)
lock.Unlock()
if metric != nil {
lock.Lock()
metrics = append(metrics, *metric)
lock.Unlock()
}
}(group, pids)
}

Expand Down Expand Up @@ -191,22 +194,34 @@ func ProcInfo(pids map[uint64]bool) map[string]Process {
return processes
}

var userSliceRe = regexp.MustCompile(`user-(\d+)\.slice`)
var unitRe = regexp.MustCompile(`(user-\d+\.slice)`)

func unitName(cgroup string) (string, error) {
match := unitRe.FindStringSubmatch(cgroup)

if len(match) < 1 {
return "", fmt.Errorf("cannot determine slice from '%s'", cgroup)
}

return match[0], nil
}

var uidRe = regexp.MustCompile(`user-(\d+)\.slice`)

// Looks up the username associated with a user slice cgroup.
// Slice of the form 'user-1000.slice' or '/user.slice/user-1234.slice'
// Must be compiled with CGO_ENABLED if used over NFS.
func lookupUsername(slice string) string {
match := userSliceRe.FindStringSubmatch(slice)
func lookupUsername(slice string) (string, error) {
match := uidRe.FindStringSubmatch(slice)

if len(match) < 2 {
return "unknown user"
return "", fmt.Errorf("cannot determine uid from '%s'", slice)
}

user, err := user.LookupId(match[1])
if err != nil {
return "unknown user"
return "", fmt.Errorf("unable to lookup user with id '%s'", match[1])
}

return user.Username
return user.Username, nil
}
24 changes: 17 additions & 7 deletions metrics/legacy.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,19 @@ func (l *legacy) GetGroupsWithPIDs() groupPIDMap {
return pids
}

func (l *legacy) CreateMetric(group string, pids pidSet) Metric {
func (l *legacy) CreateMetric(group string, pids pidSet) *Metric {
var metric Metric

metric.cgroup = group

manager, err := cgroup1.Load(cgroup1.StaticPath(group), cgroup1.WithHierarchy(subsystem))
if err != nil {
log.Printf("could not load cgroup '%s': %s\n", group, err)
return metric
return nil
}

stat, err := manager.Stat(cgroup1.IgnoreNotExist)
if err != nil || stat == nil {
log.Printf("could not get stats from cgroup '%s': %s\n", group, err)
return metric
return nil
}

if stat.CPU != nil {
Expand All @@ -70,9 +68,21 @@ func (l *legacy) CreateMetric(group string, pids pidSet) Metric {

metric.processes = ProcInfo(pids)

metric.username = lookupUsername(group)
unit, err := unitName(group)
if err != nil {
log.Println(err)
return nil
}
metric.unit = unit

username, err := lookupUsername(group)
if err != nil {
log.Println(err)
return &metric
}
metric.username = username

return metric
return &metric
}

func subsystem() ([]cgroup1.Subsystem, error) {
Expand Down
24 changes: 17 additions & 7 deletions metrics/unified.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,19 @@ func (u *unified) GetGroupsWithPIDs() groupPIDMap {
return pids
}

func (u *unified) CreateMetric(group string, pids pidSet) Metric {
func (u *unified) CreateMetric(group string, pids pidSet) *Metric {
var metric Metric

metric.cgroup = group

manager, err := cgroup2.Load(group)
if err != nil {
log.Printf("could not load cgroup '%s': %s\n", group, err)
return metric
return nil
}

stat, err := manager.Stat()
if err != nil || stat == nil {
log.Printf("could not get stats from cgroup '%s': %s\n", group, err)
return metric
return nil
}

if stat.CPU != nil {
Expand All @@ -74,7 +72,19 @@ func (u *unified) CreateMetric(group string, pids pidSet) Metric {

metric.processes = ProcInfo(pids)

metric.username = lookupUsername(group)
unit, err := unitName(group)
if err != nil {
log.Println(err)
return nil
}
metric.unit = unit

username, err := lookupUsername(group)
if err != nil {
log.Println(err)
return &metric
}
metric.username = username

return metric
return &metric
}

0 comments on commit 969e837

Please sign in to comment.