From 669bb52584688c0071b3404897fc2a33727c9b8e Mon Sep 17 00:00:00 2001 From: Oleg Vasilev Date: Tue, 7 May 2024 10:02:39 +0300 Subject: [PATCH] agent: don't treat downscale denies as failed request (#927) Fixes #926, a follow-up for the #770. The defintion for the VM stuckness was changed to include denied downscale request in the following commit: commit fdf01331d233985128ac9313ad77681162871bf0 Author: Shayan Hosseini Date: Sat Apr 6 09:25:01 2024 -0400 agent: track more liveness in vm-stuck metrics (#855) This resulted in the consistent firing of the alert. We should actually treat the denied downscale as part of the normal operation. This can happen due to mismatching policy of what is an acceptable level memory usage in autoscaler-agent vs vm_monitor. Signed-off-by: Oleg Vasilev --- pkg/agent/execbridge.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/agent/execbridge.go b/pkg/agent/execbridge.go index bcd011264..7ea6d89dc 100644 --- a/pkg/agent/execbridge.go +++ b/pkg/agent/execbridge.go @@ -169,8 +169,10 @@ func (h *execMonitorHandle) Downscale( result, err := doMonitorDownscale(ctx, logger, h.monitor.dispatcher, target) - if err == nil && result.Ok { - h.runner.recordResourceChange(current, target, h.runner.global.metrics.monitorApprovedChange) + if err == nil { + if result.Ok { + h.runner.recordResourceChange(current, target, h.runner.global.metrics.monitorApprovedChange) + } } else { h.runner.status.update(h.runner.global, func(ps podStatus) podStatus { ps.failedMonitorRequestCounter.Inc()