From 2cf2b463a5fd3c2999c0e2a4be1f8d8d84156b22 Mon Sep 17 00:00:00 2001 From: Plamen Kokanov <35485709+plkokanov@users.noreply.github.com> Date: Thu, 15 Aug 2024 13:19:07 +0300 Subject: [PATCH] Add logging and monitoring in case we have minimum memory recommendations (#322) * Add logging and monitoring in case we have minimum memory recommendations * Apply review comments * Fix tests --- .../pkg/recommender/logic/estimator.go | 33 +++++++++++++++++-- .../pkg/recommender/logic/recommender.go | 10 +++--- .../pkg/recommender/logic/recommender_test.go | 11 ++++--- .../pkg/recommender/routines/recommender.go | 2 +- .../pkg/utils/metrics/quality/quality.go | 13 ++++++++ 5 files changed, 56 insertions(+), 13 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go b/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go index 079de558f663..c87fa9eb7224 100644 --- a/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go +++ b/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go @@ -17,10 +17,15 @@ limitations under the License. package logic import ( + "encoding/json" "math" "time" + corev1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model" + metrics_quality "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/quality" ) // TODO: Split the estimator to have a separate estimator object for CPU and memory. @@ -53,6 +58,7 @@ type marginEstimator struct { type minResourcesEstimator struct { minResources model.Resources baseEstimator ResourceEstimator + vpaKey model.VpaID } type confidenceMultiplier struct { @@ -79,8 +85,8 @@ func WithMargin(marginFraction float64, baseEstimator ResourceEstimator) Resourc // WithMinResources returns a given ResourceEstimator with minResources applied. // The returned resources are equal to the max(original resources, minResources) -func WithMinResources(minResources model.Resources, baseEstimator ResourceEstimator) ResourceEstimator { - return &minResourcesEstimator{minResources, baseEstimator} +func WithMinResources(minResources model.Resources, baseEstimator ResourceEstimator, vpaKey model.VpaID) ResourceEstimator { + return &minResourcesEstimator{minResources, baseEstimator, vpaKey} } // WithConfidenceMultiplier returns a given ResourceEstimator with confidenceMultiplier applied. @@ -152,9 +158,32 @@ func (e *minResourcesEstimator) GetResourceEstimation(s *model.AggregateContaine newResources := make(model.Resources) for resource, resourceAmount := range originalResources { if resourceAmount < e.minResources[resource] { + if resource == "memory" { + klog.Warningf("Computed %s resources for VPA %s were below minimum! Computed %v, minimum is %v.", resource, klog.KRef(e.vpaKey.Namespace, e.vpaKey.VpaName), resourceAmount, e.minResources[resource]) + logHistogramInformation(s, e.vpaKey) + metrics_quality.ObserveLowerThanMinRecommendation(s.GetUpdateMode(), corev1.ResourceName(resource), e.vpaKey.Namespace+"/"+e.vpaKey.VpaName) + } resourceAmount = e.minResources[resource] } newResources[resource] = resourceAmount } return newResources } + +func logHistogramInformation(s *model.AggregateContainerState, vpaKey model.VpaID) { + if s.AggregateCPUUsage == nil { + klog.Warning("Aggregate CPU usage has no metric samples, cannot show internal histogram data for VPA %s!", klog.KRef(vpaKey.Namespace, vpaKey.VpaName)) + return + } + if s.AggregateMemoryPeaks == nil { + klog.Warning("Aggregate memory usage has no metric samples, cannot show internal histogram data for VPA %s!", klog.KRef(vpaKey.Namespace, vpaKey.VpaName)) + return + } + c, _ := s.SaveToCheckpoint() + prettyCheckpoint, err := json.MarshalIndent(c, "", " ") + if err != nil { + klog.Errorf("Error during marshalling checkpoint for VPA %s: %s", klog.KRef(vpaKey.Namespace, vpaKey.VpaName), err) + return + } + klog.Warningf("Here's the checkpoint/state for VPA %s: %s", klog.KRef(vpaKey.Namespace, vpaKey.VpaName), prettyCheckpoint) +} diff --git a/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go b/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go index 4afa93d69e7f..9b92d75a69d4 100644 --- a/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go +++ b/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go @@ -38,7 +38,7 @@ var ( // PodResourceRecommender computes resource recommendation for a Vpa object. type PodResourceRecommender interface { - GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap) RecommendedPodResources + GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap, vpaKey model.VpaID) RecommendedPodResources } // RecommendedPodResources is a Map from container name to recommended resources. @@ -61,7 +61,7 @@ type podResourceRecommender struct { upperBoundEstimator ResourceEstimator } -func (r *podResourceRecommender) GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap) RecommendedPodResources { +func (r *podResourceRecommender) GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap, vpaKey model.VpaID) RecommendedPodResources { var recommendation = make(RecommendedPodResources) if len(containerNameToAggregateStateMap) == 0 { return recommendation @@ -74,9 +74,9 @@ func (r *podResourceRecommender) GetRecommendedPodResources(containerNameToAggre } recommender := &podResourceRecommender{ - WithMinResources(minResources, r.targetEstimator), - WithMinResources(minResources, r.lowerBoundEstimator), - WithMinResources(minResources, r.upperBoundEstimator), + WithMinResources(minResources, r.targetEstimator, vpaKey), + WithMinResources(minResources, r.lowerBoundEstimator, vpaKey), + WithMinResources(minResources, r.upperBoundEstimator, vpaKey), } for containerName, aggregatedContainerState := range containerNameToAggregateStateMap { diff --git a/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go b/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go index 6e7e788dc391..d2769f97aeb0 100644 --- a/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go +++ b/vertical-pod-autoscaler/pkg/recommender/logic/recommender_test.go @@ -17,9 +17,10 @@ limitations under the License. package logic import ( + "testing" + "github.com/stretchr/testify/assert" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model" - "testing" ) func TestMinResourcesApplied(t *testing.T) { @@ -36,7 +37,7 @@ func TestMinResourcesApplied(t *testing.T) { "container-1": &model.AggregateContainerState{}, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, model.VpaID{}) assert.Equal(t, model.CPUAmountFromCores(*podMinCPUMillicores/1000), recommendedResources["container-1"].Target[model.ResourceCPU]) assert.Equal(t, model.MemoryAmountFromBytes(*podMinMemoryMb*1024*1024), recommendedResources["container-1"].Target[model.ResourceMemory]) } @@ -56,7 +57,7 @@ func TestMinResourcesSplitAcrossContainers(t *testing.T) { "container-2": &model.AggregateContainerState{}, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, model.VpaID{}) assert.Equal(t, model.CPUAmountFromCores((*podMinCPUMillicores/1000)/2), recommendedResources["container-1"].Target[model.ResourceCPU]) assert.Equal(t, model.CPUAmountFromCores((*podMinCPUMillicores/1000)/2), recommendedResources["container-2"].Target[model.ResourceCPU]) assert.Equal(t, model.MemoryAmountFromBytes((*podMinMemoryMb*1024*1024)/2), recommendedResources["container-1"].Target[model.ResourceMemory]) @@ -80,7 +81,7 @@ func TestControlledResourcesFiltered(t *testing.T) { }, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, model.VpaID{}) assert.Contains(t, recommendedResources[containerName].Target, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].LowerBound, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].UpperBound, model.ResourceMemory) @@ -106,7 +107,7 @@ func TestControlledResourcesFilteredDefault(t *testing.T) { }, } - recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap) + recommendedResources := recommender.GetRecommendedPodResources(containerNameToAggregateStateMap, model.VpaID{}) assert.Contains(t, recommendedResources[containerName].Target, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].LowerBound, model.ResourceMemory) assert.Contains(t, recommendedResources[containerName].UpperBound, model.ResourceMemory) diff --git a/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go b/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go index 5a89480e701e..e00648ff9750 100644 --- a/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go +++ b/vertical-pod-autoscaler/pkg/recommender/routines/recommender.go @@ -91,7 +91,7 @@ func (r *recommender) UpdateVPAs() { if !found { continue } - resources := r.podResourceRecommender.GetRecommendedPodResources(GetContainerNameToAggregateStateMap(vpa)) + resources := r.podResourceRecommender.GetRecommendedPodResources(GetContainerNameToAggregateStateMap(vpa), key) had := vpa.HasRecommendation() listOfResourceRecommendation := logic.MapToListOfRecommendedContainerResources(resources) diff --git a/vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go b/vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go index 55695eab11ea..b2811f1171b6 100644 --- a/vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go +++ b/vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go @@ -116,6 +116,13 @@ var ( Buckets: relativeBuckets, }, []string{"update_mode", "resource", "vpa_size_log2"}, ) + lowerThanMinRecommendationCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: metricsNamespace, + Name: "lower_than_min_recommendation_count", + Help: "Count of usage samples when resource recommendation values are lower than the minimum", + }, []string{"update_mode", "resource", "vpa"}, + ) ) // Register initializes all VPA quality metrics @@ -129,6 +136,7 @@ func Register() { prometheus.MustRegister(cpuRecommendations) prometheus.MustRegister(memoryRecommendations) prometheus.MustRegister(relativeRecommendationChange) + prometheus.MustRegister(lowerThanMinRecommendationCounter) } // observeUsageRecommendationRelativeDiff records relative diff between usage and @@ -222,6 +230,11 @@ func ObserveRecommendationChange(previous, current corev1.ResourceList, updateMo } } +// ObserveLowerThanMinRecommendation counts usage samples with lower than min memory recommendations. +func ObserveLowerThanMinRecommendation(updateMode *vpa_types.UpdateMode, resource corev1.ResourceName, vpaKey string) { + lowerThanMinRecommendationCounter.WithLabelValues(updateModeToString(updateMode), string(resource), vpaKey).Inc() +} + // quantityAsFloat converts resource.Quantity to a float64 value, in some scale (constant per resource but unspecified) func quantityAsFloat(resource corev1.ResourceName, quantity resource.Quantity) float64 { switch resource {