From da497b4e8bb5fd25b4faba95253123c57b0f8e6b Mon Sep 17 00:00:00 2001 From: Plamen Kokanov Date: Thu, 17 Oct 2024 16:29:10 +0300 Subject: [PATCH] Enhance logging in cases where we have 0 memory recommendations --- .../pkg/recommender/logic/estimator.go | 97 +++++++++++++++---- .../pkg/recommender/logic/estimator_test.go | 4 +- .../pkg/recommender/logic/recommender.go | 10 +- .../recommender/util/decaying_histogram.go | 2 +- .../pkg/recommender/util/histogram.go | 14 ++- 5 files changed, 100 insertions(+), 27 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go b/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go index c87fa9eb7224..50d68f0b83bf 100644 --- a/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go +++ b/vertical-pod-autoscaler/pkg/recommender/logic/estimator.go @@ -25,7 +25,13 @@ import ( "k8s.io/klog/v2" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model" - metrics_quality "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/quality" + metricsquality "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/quality" +) + +const ( + targetEstimatorName = "targetEstimator" + lowerBoundEstimatorName = "lowerBoundEstimator" + upperBoundEstimatorName = "upperBoundEstimator" ) // TODO: Split the estimator to have a separate estimator object for CPU and memory. @@ -35,12 +41,15 @@ import ( // containers. type ResourceEstimator interface { GetResourceEstimation(s *model.AggregateContainerState) model.Resources + SetVpaKeyAndEstimatorName(vpaKey model.VpaID, estimatorName string) } // Implementation of ResourceEstimator that returns constant amount of // resources. This can be used as by a fake recommender for test purposes. type constEstimator struct { - resources model.Resources + resources model.Resources + vpaKey model.VpaID + estimatorName string } // Simple implementation of the ResourceEstimator interface. It returns specific @@ -48,50 +57,57 @@ type constEstimator struct { type percentileEstimator struct { cpuPercentile float64 memoryPercentile float64 + vpaKey model.VpaID + estimatorName string } type marginEstimator struct { marginFraction float64 baseEstimator ResourceEstimator + vpaKey model.VpaID + estimatorName string } type minResourcesEstimator struct { minResources model.Resources baseEstimator ResourceEstimator vpaKey model.VpaID + estimatorName string } type confidenceMultiplier struct { multiplier float64 exponent float64 baseEstimator ResourceEstimator + vpaKey model.VpaID + estimatorName string } // NewConstEstimator returns a new constEstimator with given resources. func NewConstEstimator(resources model.Resources) ResourceEstimator { - return &constEstimator{resources} + return &constEstimator{resources: resources} } // NewPercentileEstimator returns a new percentileEstimator that uses provided percentiles. func NewPercentileEstimator(cpuPercentile float64, memoryPercentile float64) ResourceEstimator { - return &percentileEstimator{cpuPercentile, memoryPercentile} + return &percentileEstimator{cpuPercentile, memoryPercentile, model.VpaID{}, ""} } // WithMargin returns a given ResourceEstimator with margin applied. // The returned resources are equal to the original resources plus (originalResource * marginFraction) func WithMargin(marginFraction float64, baseEstimator ResourceEstimator) ResourceEstimator { - return &marginEstimator{marginFraction, baseEstimator} + return &marginEstimator{marginFraction, baseEstimator, model.VpaID{}, ""} } // WithMinResources returns a given ResourceEstimator with minResources applied. // The returned resources are equal to the max(original resources, minResources) -func WithMinResources(minResources model.Resources, baseEstimator ResourceEstimator, vpaKey model.VpaID) ResourceEstimator { - return &minResourcesEstimator{minResources, baseEstimator, vpaKey} +func WithMinResources(minResources model.Resources, baseEstimator ResourceEstimator) ResourceEstimator { + return &minResourcesEstimator{minResources, baseEstimator, model.VpaID{}, ""} } // WithConfidenceMultiplier returns a given ResourceEstimator with confidenceMultiplier applied. func WithConfidenceMultiplier(multiplier, exponent float64, baseEstimator ResourceEstimator) ResourceEstimator { - return &confidenceMultiplier{multiplier, exponent, baseEstimator} + return &confidenceMultiplier{multiplier, exponent, baseEstimator, model.VpaID{}, ""} } // Returns a constant amount of resources. @@ -99,14 +115,30 @@ func (e *constEstimator) GetResourceEstimation(s *model.AggregateContainerState) return e.resources } +func (e *constEstimator) SetVpaKeyAndEstimatorName(vpaKey model.VpaID, estimatorName string) { + e.vpaKey = vpaKey + e.estimatorName = estimatorName +} + // Returns specific percentiles of CPU and memory peaks distributions. func (e *percentileEstimator) GetResourceEstimation(s *model.AggregateContainerState) model.Resources { - return model.Resources{ + resources := model.Resources{ model.ResourceCPU: model.CPUAmountFromCores( s.AggregateCPUUsage.Percentile(e.cpuPercentile)), model.ResourceMemory: model.MemoryAmountFromBytes( s.AggregateMemoryPeaks.Percentile(e.memoryPercentile)), } + + if resources["memory"] == 0 && e.estimatorName == targetEstimatorName { + klog.Warningf("Computed %q resources for VPA %q were 0 in percentile estimator of %q!", "memory", klog.KRef(e.vpaKey.Namespace, e.vpaKey.VpaName), e.estimatorName) + } + + return resources +} + +func (e *percentileEstimator) SetVpaKeyAndEstimatorName(vpaKey model.VpaID, estimatorName string) { + e.vpaKey = vpaKey + e.estimatorName = estimatorName } // Returns a non-negative real number that heuristically measures how much @@ -139,29 +171,47 @@ func (e *confidenceMultiplier) GetResourceEstimation(s *model.AggregateContainer for resource, resourceAmount := range originalResources { scaledResources[resource] = model.ScaleResource( resourceAmount, math.Pow(1.+e.multiplier/confidence, e.exponent)) + if resource == "memory" && scaledResources[resource] == 0 && e.estimatorName == targetEstimatorName { + klog.Warningf("Computed %q resources for VPA %q were 0 after applying confidence: %f in confidence multiplier of %q; they were %v before that!", resource, klog.KRef(e.vpaKey.Namespace, e.vpaKey.VpaName), confidence, e.estimatorName, resourceAmount) + } } return scaledResources } +func (e *confidenceMultiplier) SetVpaKeyAndEstimatorName(vpaKey model.VpaID, estimatorName string) { + e.vpaKey = vpaKey + e.estimatorName = estimatorName + e.baseEstimator.SetVpaKeyAndEstimatorName(vpaKey, estimatorName) +} + func (e *marginEstimator) GetResourceEstimation(s *model.AggregateContainerState) model.Resources { originalResources := e.baseEstimator.GetResourceEstimation(s) newResources := make(model.Resources) for resource, resourceAmount := range originalResources { margin := model.ScaleResource(resourceAmount, e.marginFraction) newResources[resource] = originalResources[resource] + margin + if resource == "memory" && newResources[resource] == 0 && e.estimatorName == targetEstimatorName { + klog.Warningf("Computed %q resources for VPA %q were 0 after applying margin in margin estimator of %q, they were %v before that", resource, klog.KRef(e.vpaKey.Namespace, e.vpaKey.VpaName), e.estimatorName, originalResources[resource]) + } } return newResources } +func (e *marginEstimator) SetVpaKeyAndEstimatorName(vpaKey model.VpaID, estimatorName string) { + e.vpaKey = vpaKey + e.estimatorName = estimatorName + e.baseEstimator.SetVpaKeyAndEstimatorName(vpaKey, estimatorName) +} + func (e *minResourcesEstimator) GetResourceEstimation(s *model.AggregateContainerState) model.Resources { originalResources := e.baseEstimator.GetResourceEstimation(s) newResources := make(model.Resources) for resource, resourceAmount := range originalResources { if resourceAmount < e.minResources[resource] { - if resource == "memory" { - klog.Warningf("Computed %s resources for VPA %s were below minimum! Computed %v, minimum is %v.", resource, klog.KRef(e.vpaKey.Namespace, e.vpaKey.VpaName), resourceAmount, e.minResources[resource]) + if resource == "memory" && resourceAmount == 0 && e.estimatorName == targetEstimatorName { + klog.Warningf("Computed %q resources for VPA %q were below minimum in min resource estimator of %q! Computed %v, minimum is %v.", resource, klog.KRef(e.vpaKey.Namespace, e.vpaKey.VpaName), e.estimatorName, resourceAmount, e.minResources[resource]) logHistogramInformation(s, e.vpaKey) - metrics_quality.ObserveLowerThanMinRecommendation(s.GetUpdateMode(), corev1.ResourceName(resource), e.vpaKey.Namespace+"/"+e.vpaKey.VpaName) + metricsquality.ObserveLowerThanMinRecommendation(s.GetUpdateMode(), corev1.ResourceName("memory"), e.vpaKey.Namespace+"/"+e.vpaKey.VpaName) } resourceAmount = e.minResources[resource] } @@ -170,20 +220,33 @@ func (e *minResourcesEstimator) GetResourceEstimation(s *model.AggregateContaine return newResources } +func (e *minResourcesEstimator) SetVpaKeyAndEstimatorName(vpaKey model.VpaID, estimatorName string) { + e.vpaKey = vpaKey + e.estimatorName = estimatorName + e.baseEstimator.SetVpaKeyAndEstimatorName(vpaKey, estimatorName) +} + func logHistogramInformation(s *model.AggregateContainerState, vpaKey model.VpaID) { if s.AggregateCPUUsage == nil { - klog.Warning("Aggregate CPU usage has no metric samples, cannot show internal histogram data for VPA %s!", klog.KRef(vpaKey.Namespace, vpaKey.VpaName)) + klog.Warning("Aggregate CPU usage has no metric samples, cannot show internal histogram data for VPA %q!", klog.KRef(vpaKey.Namespace, vpaKey.VpaName)) return } if s.AggregateMemoryPeaks == nil { - klog.Warning("Aggregate memory usage has no metric samples, cannot show internal histogram data for VPA %s!", klog.KRef(vpaKey.Namespace, vpaKey.VpaName)) + klog.Warning("Aggregate memory usage has no metric samples, cannot show internal histogram data for VPA %q!", klog.KRef(vpaKey.Namespace, vpaKey.VpaName)) return } + + if s.AggregateMemoryPeaks.IsEmpty() { + klog.Warningf("The memory histogram for VPA %q is empty!", klog.KRef(vpaKey.Namespace, vpaKey.VpaName)) + } + + klog.Warningf("Here's the string representation of the memory histogram for VPA %q: %s", klog.KRef(vpaKey.Namespace, vpaKey.VpaName), s.AggregateMemoryPeaks.String()) + c, _ := s.SaveToCheckpoint() - prettyCheckpoint, err := json.MarshalIndent(c, "", " ") + prettyCheckpoint, err := json.Marshal(c) if err != nil { - klog.Errorf("Error during marshalling checkpoint for VPA %s: %s", klog.KRef(vpaKey.Namespace, vpaKey.VpaName), err) + klog.Errorf("Error during marshalling checkpoint for VPA %q: %s", klog.KRef(vpaKey.Namespace, vpaKey.VpaName), err) return } - klog.Warningf("Here's the checkpoint/state for VPA %s: %s", klog.KRef(vpaKey.Namespace, vpaKey.VpaName), prettyCheckpoint) + klog.Warningf("Here's the checkpoint/state for VPA %q: %s", klog.KRef(vpaKey.Namespace, vpaKey.VpaName), prettyCheckpoint) } diff --git a/vertical-pod-autoscaler/pkg/recommender/logic/estimator_test.go b/vertical-pod-autoscaler/pkg/recommender/logic/estimator_test.go index 4e83e448ad7a..3a7772c0bc6b 100644 --- a/vertical-pod-autoscaler/pkg/recommender/logic/estimator_test.go +++ b/vertical-pod-autoscaler/pkg/recommender/logic/estimator_test.go @@ -106,8 +106,8 @@ func TestConfidenceMultiplierNoHistory(t *testing.T) { model.ResourceCPU: model.CPUAmountFromCores(3.14), model.ResourceMemory: model.MemoryAmountFromBytes(3.14e9), }) - testedEstimator1 := &confidenceMultiplier{1.0, 1.0, baseEstimator} - testedEstimator2 := &confidenceMultiplier{1.0, -1.0, baseEstimator} + testedEstimator1 := &confidenceMultiplier{multiplier: 1.0, exponent: 1.0, baseEstimator: baseEstimator} + testedEstimator2 := &confidenceMultiplier{multiplier: 1.0, exponent: -1.0, baseEstimator: baseEstimator} s := model.NewAggregateContainerState() // Expect testedEstimator1 to return the maximum possible resource amount. assert.Equal(t, model.ResourceAmount(1e14), diff --git a/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go b/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go index 9b92d75a69d4..e876add97d7e 100644 --- a/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go +++ b/vertical-pod-autoscaler/pkg/recommender/logic/recommender.go @@ -74,11 +74,15 @@ func (r *podResourceRecommender) GetRecommendedPodResources(containerNameToAggre } recommender := &podResourceRecommender{ - WithMinResources(minResources, r.targetEstimator, vpaKey), - WithMinResources(minResources, r.lowerBoundEstimator, vpaKey), - WithMinResources(minResources, r.upperBoundEstimator, vpaKey), + WithMinResources(minResources, r.targetEstimator), + WithMinResources(minResources, r.lowerBoundEstimator), + WithMinResources(minResources, r.upperBoundEstimator), } + recommender.targetEstimator.SetVpaKeyAndEstimatorName(vpaKey, targetEstimatorName) + recommender.upperBoundEstimator.SetVpaKeyAndEstimatorName(vpaKey, upperBoundEstimatorName) + recommender.lowerBoundEstimator.SetVpaKeyAndEstimatorName(vpaKey, lowerBoundEstimatorName) + for containerName, aggregatedContainerState := range containerNameToAggregateStateMap { recommendation[containerName] = recommender.estimateContainerResources(aggregatedContainerState) } diff --git a/vertical-pod-autoscaler/pkg/recommender/util/decaying_histogram.go b/vertical-pod-autoscaler/pkg/recommender/util/decaying_histogram.go index f2b9847088fd..ed777634c6da 100644 --- a/vertical-pod-autoscaler/pkg/recommender/util/decaying_histogram.go +++ b/vertical-pod-autoscaler/pkg/recommender/util/decaying_histogram.go @@ -94,7 +94,7 @@ func (h *decayingHistogram) IsEmpty() bool { } func (h *decayingHistogram) String() string { - return fmt.Sprintf("referenceTimestamp: %v, halfLife: %v\n%s", h.referenceTimestamp, h.halfLife, h.histogram.String()) + return fmt.Sprintf("referenceTimestamp: %v, halfLife: %v; %s", h.referenceTimestamp, h.halfLife, h.histogram.String()) } func (h *decayingHistogram) shiftReferenceTimestamp(newreferenceTimestamp time.Time) { diff --git a/vertical-pod-autoscaler/pkg/recommender/util/histogram.go b/vertical-pod-autoscaler/pkg/recommender/util/histogram.go index cd0d360d0f4b..8381effe5dd2 100644 --- a/vertical-pod-autoscaler/pkg/recommender/util/histogram.go +++ b/vertical-pod-autoscaler/pkg/recommender/util/histogram.go @@ -184,14 +184,20 @@ func (h *histogram) IsEmpty() bool { func (h *histogram) String() string { lines := []string{ - fmt.Sprintf("minBucket: %d, maxBucket: %d, totalWeight: %.3f", + fmt.Sprintf("minBucket: %d, maxBucket: %d, totalWeight: %.4f", h.minBucket, h.maxBucket, h.totalWeight), - "%-tile\tvalue", + "%-tile value: ", } for i := 0; i <= 100; i += 5 { - lines = append(lines, fmt.Sprintf("%d\t%.3f", i, h.Percentile(0.01*float64(i)))) + lines = append(lines, fmt.Sprintf("%d: %.4f", i, h.Percentile(0.01*float64(i)))) } - return strings.Join(lines, "\n") + + lines = append(lines, "buckets value") + for i := 0; i < h.options.NumBuckets(); i++ { + lines = append(lines, fmt.Sprintf("%d: %.4f", i, h.bucketWeight[i])) + } + + return strings.Join(lines, "; ") } func (h *histogram) Equals(other Histogram) bool {