Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Harvest should track poller maxRSS in auto-support #2982

Merged
merged 1 commit into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions cmd/poller/collector/asup.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ type harvestInfo struct {
NumPollers uint64
NumExporters uint64
NumPortRange uint64
Pid int
RssBytes uint64
MaxRssBytes uint64
EpochMilli int64 // milliseconds since the epoch, in UTC
}

type Counters struct {
Expand Down Expand Up @@ -127,14 +131,14 @@ func (p *Payload) AddCollectorAsup(a AsupCollector) {
*p.Collectors = append(*p.Collectors, a)
}

func SendAutosupport(collectors []Collector, status *matrix.Matrix, pollerName string) error {
func SendAutosupport(collectors []Collector, status *matrix.Matrix, pollerName string, maxRss uint64) error {

var (
msg *Payload
err error
)

if msg, err = BuildAndWriteAutoSupport(collectors, status, pollerName); err != nil {
if msg, err = BuildAndWriteAutoSupport(collectors, status, pollerName, maxRss); err != nil {
return fmt.Errorf("failed to build ASUP message poller:%s %w", pollerName, err)
}

Expand Down Expand Up @@ -193,13 +197,14 @@ func sendAsupVia(msg *Payload, asupExecPath string) error {
return nil
}

func BuildAndWriteAutoSupport(collectors []Collector, status *matrix.Matrix, pollerName string) (*Payload, error) {
func BuildAndWriteAutoSupport(collectors []Collector, status *matrix.Matrix, pollerName string, maxRss uint64) (*Payload, error) {

var (
msg *Payload
arch string
cpus uint8
numPortRange uint64
rssBytes uint64
)

// add info about the platform (where Harvest is running)
Expand Down Expand Up @@ -227,7 +232,24 @@ func BuildAndWriteAutoSupport(collectors []Collector, status *matrix.Matrix, pol
numPortRange++
}
}

hostname, _ := os.Hostname()

// Get the PID and RSS in bytes of the current process.
// If there is an error, rssBytes will be zero
pid := os.Getpid()
newProcess, err := process.NewProcess(int32(pid))
if err != nil {
logging.Get().Err(err).Msg("failed to get process info")
} else {
memInfo, err := newProcess.MemoryInfo()
if err != nil {
logging.Get().Err(err).Int("pid", pid).Msg("failed to get memory info")
} else {
rssBytes = memInfo.RSS
}
}

// add harvest release info
msg.Harvest = &harvestInfo{
// harvest uuid creation from sha1 of cluster uuid
Expand All @@ -241,6 +263,10 @@ func BuildAndWriteAutoSupport(collectors []Collector, status *matrix.Matrix, pol
NumPollers: uint64(len(conf.Config.Pollers)),
NumExporters: uint64(len(conf.Config.Exporters)),
NumPortRange: numPortRange,
Pid: pid,
RssBytes: rssBytes,
MaxRssBytes: max(maxRss, rssBytes),
EpochMilli: time.Now().UnixMilli(),
}
payloadPath, err := writeAutoSupport(msg, pollerName)
if err != nil {
Expand Down
9 changes: 6 additions & 3 deletions cmd/poller/poller.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ type Poller struct {
client *http.Client
auth *auth.Credentials
hasPromExporter bool
maxRssBytes uint64
}

// Init starts Poller, reads parameters, opens zeroLog handler, initializes metadata,
Expand Down Expand Up @@ -382,7 +383,7 @@ func (p *Poller) firstAutoSupport() {
if p.collectors == nil {
return
}
if _, err := collector.BuildAndWriteAutoSupport(p.collectors, p.metadataTarget, p.name); err != nil {
if _, err := collector.BuildAndWriteAutoSupport(p.collectors, p.metadataTarget, p.name, p.maxRssBytes); err != nil {
logger.Error().Err(err).
Str("poller", p.name).
Msg("First autosupport failed.")
Expand All @@ -391,7 +392,7 @@ func (p *Poller) firstAutoSupport() {

func (p *Poller) startAsup() (map[string]*matrix.Matrix, error) {
if p.collectors != nil {
if err := collector.SendAutosupport(p.collectors, p.metadataTarget, p.name); err != nil {
if err := collector.SendAutosupport(p.collectors, p.metadataTarget, p.name, p.maxRssBytes); err != nil {
logger.Error().Err(err).
Str("poller", p.name).
Msg("Start autosupport failed.")
Expand Down Expand Up @@ -464,7 +465,6 @@ func (p *Poller) Run() {
p.addMemoryMetadata()

// add number of goroutines to metadata
// @TODO: cleanup, does not belong to "status"
_ = p.metadataTarget.LazySetValueInt64("goroutines", "host", int64(runtime.NumGoroutine()))

upc := 0 // up collectors
Expand Down Expand Up @@ -1269,6 +1269,9 @@ func (p *Poller) addMemoryMetadata() {

memPercentage := float64(memInfo.RSS) / float64(memory.Total) * 100
_ = p.status.LazySetValueFloat64("memory_percent", "host", memPercentage)

// Update maxRssBytes
p.maxRssBytes = max(p.maxRssBytes, memInfo.RSS)
}

func startPoller(_ *cobra.Command, _ []string) {
Expand Down