From e428611b814bc052cfff19a525e140a546980568 Mon Sep 17 00:00:00 2001 From: Plamen Kokanov Date: Thu, 15 Aug 2024 09:38:00 +0300 Subject: [PATCH 1/3] Add logging and monitoring in case we have minimum memory recommendations --- .../pkg/recommender/logic/estimator.go | 32 +++++++++++++++++-- .../pkg/recommender/logic/recommender.go | 10 +++--- .../pkg/recommender/logic/recommender_test.go | 11 ++++--- .../pkg/recommender/routines/recommender.go | 2 +- .../pkg/utils/metrics/quality/quality.go | 13 ++++++++ 5 files changed, 55 insertions(+), 13 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go b/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go index 079de558f663..a3248a053d75 100644 --- a/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go +++ b/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go @@ -17,10 +17,15 @@ limitations under the License. package logic import ( + "encoding/json" "math" "time" + corev1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model" + metrics_quality "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/quality" ) // TODO: Split the estimator to have a separate estimator object for CPU and memory. @@ -53,6 +58,7 @@ type marginEstimator struct { type minResourcesEstimator struct { minResources model.Resources baseEstimator ResourceEstimator + vpaKey model.VpaID } type confidenceMultiplier struct { @@ -79,8 +85,8 @@ func WithMargin(marginFraction float64, baseEstimator ResourceEstimator) Resourc // WithMinResources returns a given ResourceEstimator with minResources applied. // The returned resources are equal to the max(original resources, minResources) -func WithMinResources(minResources model.Resources, baseEstimator ResourceEstimator) ResourceEstimator { - return &minResourcesEstimator{minResources, baseEstimator} +func WithMinResources(minResources model.Resources, baseEstimator ResourceEstimator, vpaKey model.VpaID) ResourceEstimator { + return &minResourcesEstimator{minResources, baseEstimator, vpaKey} } // WithConfidenceMultiplier returns a given ResourceEstimator with confidenceMultiplier applied. @@ -152,9 +158,31 @@ func (e *minResourcesEstimator) GetResourceEstimation(s *model.AggregateContaine newResources := make(model.Resources) for resource, resourceAmount := range originalResources { if resourceAmount < e.minResources[resource] { + if resource == "memory" { + klog.Warningf("Computed resources for %s were below minimum! Computed %v, minimum is %v.", resource, resourceAmount, e.minResources[resource]) + logHistogramInformation(s) + metrics_quality.ObserveLowerThanMinRecommendation(s.GetUpdateMode(), corev1.ResourceName(resource), e.vpaKey.Namespace+"/"+e.vpaKey.VpaName) + } resourceAmount = e.minResources[resource] } newResources[resource] = resourceAmount } return newResources } + +func logHistogramInformation(s *model.AggregateContainerState) { + if s.AggregateCPUUsage == nil { + klog.Warning("Aggregate CPU usage has no metric samples, cannot show internal histogram data!") + return + } + if s.AggregateMemoryPeaks == nil { + klog.Warning("Aggregate memory usage has no metric samples, cannot show internal histogram data!") + return + } + c, _ := s.SaveToCheckpoint() + prettyCheckpoint, err := json.MarshalIndent(c, "", " ") + if err != nil { + klog.Errorf("Error during marshalling checkpoint: %s", err) + } + klog.Warningf("Here's the checkpoint/state: %s", prettyCheckpoint) +} diff --git a/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go b/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go index 4afa93d69e7f..9b92d75a69d4 100644 --- a/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go +++ b/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go @@ -38,7 +38,7 @@ var ( // PodResourceRecommender computes resource recommendation for a Vpa object. type PodResourceRecommender interface { - GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap) RecommendedPodResources + GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap, vpaKey model.VpaID) RecommendedPodResources } // RecommendedPodResources is a Map from container name to recommended resources. @@ -61,7 +61,7 @@ type podResourceRecommender struct { upperBoundEstimator ResourceEstimator } -func (r *podResourceRecommender) GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap) RecommendedPodResources { +func (r *podResourceRecommender) GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap, vpaKey model.VpaID) RecommendedPodResources { var recommendation = make(RecommendedPodResources) if len(containerNameToAggregateStateMap) == 0 { return recommendation @@ -74,9 +74,9 @@ func (r *podResourceRecommender) GetRecommendedPodResources(containerNameToAggre } recommender := &podResourceRecommender{ - WithMinResources(minResources, r.targetEstimator), - WithMinResources(minResources, r.lowerBoundEstimator), - WithMinResources(minResources, r.upperBoundEstimator), + WithMinResources(minResources, r.targetEstimator, vpaKey), + WithMinResources(minResources, r.lowerBoundEstimator, vpaKey), + WithMinResources(minResources, r.upperBoundEstimator, vpaKey), } for containerName, aggregatedContainerState := range containerNameToAggregateStateMap { diff --git a/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go b/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go index 6e7e788dc391..3b7de5575fe5 100644 --- a/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go +++ b/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go @@ -17,9 +17,10 @@ limitations under the License. package logic import ( + "testing" + "github.com/stretchr/testify/assert" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model" - "testing" ) func TestMinResourcesApplied(t *testing.T) { @@ -36,7 +37,7 @@ func TestMinResourcesApplied(t *testing.T) { "container-1": &model.AggregateContainerState{}, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, nil) assert.Equal(t, model.CPUAmountFromCores(*podMinCPUMillicores/1000), recommendedResources["container-1"].Target[model.ResourceCPU]) assert.Equal(t, model.MemoryAmountFromBytes(*podMinMemoryMb*1024*1024), recommendedResources["container-1"].Target[model.ResourceMemory]) } @@ -56,7 +57,7 @@ func TestMinResourcesSplitAcrossContainers(t *testing.T) { "container-2": &model.AggregateContainerState{}, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, nil) assert.Equal(t, model.CPUAmountFromCores((*podMinCPUMillicores/1000)/2), recommendedResources["container-1"].Target[model.ResourceCPU]) assert.Equal(t, model.CPUAmountFromCores((*podMinCPUMillicores/1000)/2), recommendedResources["container-2"].Target[model.ResourceCPU]) assert.Equal(t, model.MemoryAmountFromBytes((*podMinMemoryMb*1024*1024)/2), recommendedResources["container-1"].Target[model.ResourceMemory]) @@ -80,7 +81,7 @@ func TestControlledResourcesFiltered(t *testing.T) { }, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, nil) assert.Contains(t, recommendedResources[containerName].Target, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].LowerBound, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].UpperBound, model.ResourceMemory) @@ -106,7 +107,7 @@ func TestControlledResourcesFilteredDefault(t *testing.T) { }, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, nil) assert.Contains(t, recommendedResources[containerName].Target, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].LowerBound, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].UpperBound, model.ResourceMemory) diff --git a/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go b/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go index 5a89480e701e..e00648ff9750 100644 --- a/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go +++ b/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go @@ -91,7 +91,7 @@ func (r *recommender) UpdateVPAs() { if !found { continue } - resources := r.podResourceRecommender.GetRecommendedPodResources(GetContainerNameToAggregateStateMap(vpa)) + resources := r.podResourceRecommender.GetRecommendedPodResources(GetContainerNameToAggregateStateMap(vpa), key) had := vpa.HasRecommendation() listOfResourceRecommendation := logic.MapToListOfRecommendedContainerResources(resources) diff --git a/vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go b/vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go index 55695eab11ea..b2811f1171b6 100644 --- a/vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go +++ b/vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go @@ -116,6 +116,13 @@ var ( Buckets: relativeBuckets, }, []string{"update_mode", "resource", "vpa_size_log2"}, ) + lowerThanMinRecommendationCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: metricsNamespace, + Name: "lower_than_min_recommendation_count", + Help: "Count of usage samples when resource recommendation values are lower than the minimum", + }, []string{"update_mode", "resource", "vpa"}, + ) ) // Register initializes all VPA quality metrics @@ -129,6 +136,7 @@ func Register() { prometheus.MustRegister(cpuRecommendations) prometheus.MustRegister(memoryRecommendations) prometheus.MustRegister(relativeRecommendationChange) + prometheus.MustRegister(lowerThanMinRecommendationCounter) } // observeUsageRecommendationRelativeDiff records relative diff between usage and @@ -222,6 +230,11 @@ func ObserveRecommendationChange(previous, current corev1.ResourceList, updateMo } } +// ObserveLowerThanMinRecommendation counts usage samples with lower than min memory recommendations. +func ObserveLowerThanMinRecommendation(updateMode *vpa_types.UpdateMode, resource corev1.ResourceName, vpaKey string) { + lowerThanMinRecommendationCounter.WithLabelValues(updateModeToString(updateMode), string(resource), vpaKey).Inc() +} + // quantityAsFloat converts resource.Quantity to a float64 value, in some scale (constant per resource but unspecified) func quantityAsFloat(resource corev1.ResourceName, quantity resource.Quantity) float64 { switch resource { From 4a8dff00d6698a6482da00ae15c18fc0db415c23 Mon Sep 17 00:00:00 2001 From: Plamen Kokanov Date: Thu, 15 Aug 2024 12:25:33 +0300 Subject: [PATCH 2/3] Apply review comments --- .../pkg/recommender/logic/estimator.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go b/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go index a3248a053d75..c87fa9eb7224 100644 --- a/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go +++ b/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go @@ -159,8 +159,8 @@ func (e *minResourcesEstimator) GetResourceEstimation(s *model.AggregateContaine for resource, resourceAmount := range originalResources { if resourceAmount < e.minResources[resource] { if resource == "memory" { - klog.Warningf("Computed resources for %s were below minimum! Computed %v, minimum is %v.", resource, resourceAmount, e.minResources[resource]) - logHistogramInformation(s) + klog.Warningf("Computed %s resources for VPA %s were below minimum! Computed %v, minimum is %v.", resource, klog.KRef(e.vpaKey.Namespace, e.vpaKey.VpaName), resourceAmount, e.minResources[resource]) + logHistogramInformation(s, e.vpaKey) metrics_quality.ObserveLowerThanMinRecommendation(s.GetUpdateMode(), corev1.ResourceName(resource), e.vpaKey.Namespace+"/"+e.vpaKey.VpaName) } resourceAmount = e.minResources[resource] @@ -170,19 +170,20 @@ func (e *minResourcesEstimator) GetResourceEstimation(s *model.AggregateContaine return newResources } -func logHistogramInformation(s *model.AggregateContainerState) { +func logHistogramInformation(s *model.AggregateContainerState, vpaKey model.VpaID) { if s.AggregateCPUUsage == nil { - klog.Warning("Aggregate CPU usage has no metric samples, cannot show internal histogram data!") + klog.Warning("Aggregate CPU usage has no metric samples, cannot show internal histogram data for VPA %s!", klog.KRef(vpaKey.Namespace, vpaKey.VpaName)) return } if s.AggregateMemoryPeaks == nil { - klog.Warning("Aggregate memory usage has no metric samples, cannot show internal histogram data!") + klog.Warning("Aggregate memory usage has no metric samples, cannot show internal histogram data for VPA %s!", klog.KRef(vpaKey.Namespace, vpaKey.VpaName)) return } c, _ := s.SaveToCheckpoint() prettyCheckpoint, err := json.MarshalIndent(c, "", " ") if err != nil { - klog.Errorf("Error during marshalling checkpoint: %s", err) + klog.Errorf("Error during marshalling checkpoint for VPA %s: %s", klog.KRef(vpaKey.Namespace, vpaKey.VpaName), err) + return } - klog.Warningf("Here's the checkpoint/state: %s", prettyCheckpoint) + klog.Warningf("Here's the checkpoint/state for VPA %s: %s", klog.KRef(vpaKey.Namespace, vpaKey.VpaName), prettyCheckpoint) } From a844531dea40572ae665d8bc356dc19fe81c4147 Mon Sep 17 00:00:00 2001 From: Plamen Kokanov Date: Thu, 15 Aug 2024 12:30:15 +0300 Subject: [PATCH 3/3] Fix tests --- .../pkg/recommender/logic/recommender_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go b/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go index 3b7de5575fe5..d2769f97aeb0 100644 --- a/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go +++ b/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go @@ -37,7 +37,7 @@ func TestMinResourcesApplied(t *testing.T) { "container-1": &model.AggregateContainerState{}, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, nil) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, model.VpaID{}) assert.Equal(t, model.CPUAmountFromCores(*podMinCPUMillicores/1000), recommendedResources["container-1"].Target[model.ResourceCPU]) assert.Equal(t, model.MemoryAmountFromBytes(*podMinMemoryMb*1024*1024), recommendedResources["container-1"].Target[model.ResourceMemory]) } @@ -57,7 +57,7 @@ func TestMinResourcesSplitAcrossContainers(t *testing.T) { "container-2": &model.AggregateContainerState{}, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, nil) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, model.VpaID{}) assert.Equal(t, model.CPUAmountFromCores((*podMinCPUMillicores/1000)/2), recommendedResources["container-1"].Target[model.ResourceCPU]) assert.Equal(t, model.CPUAmountFromCores((*podMinCPUMillicores/1000)/2), recommendedResources["container-2"].Target[model.ResourceCPU]) assert.Equal(t, model.MemoryAmountFromBytes((*podMinMemoryMb*1024*1024)/2), recommendedResources["container-1"].Target[model.ResourceMemory]) @@ -81,7 +81,7 @@ func TestControlledResourcesFiltered(t *testing.T) { }, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, nil) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, model.VpaID{}) assert.Contains(t, recommendedResources[containerName].Target, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].LowerBound, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].UpperBound, model.ResourceMemory) @@ -107,7 +107,7 @@ func TestControlledResourcesFilteredDefault(t *testing.T) { }, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, nil) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, model.VpaID{}) assert.Contains(t, recommendedResources[containerName].Target, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].LowerBound, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].UpperBound, model.ResourceMemory)