From 6d5dd4a34ebe8dc2707836ad2ed884160223e85f Mon Sep 17 00:00:00 2001 From: Vish Ramachandran Date: Thu, 7 Nov 2024 10:01:53 -0800 Subject: [PATCH] optionally use min and max for hist_fraction --- .../format/vectors/Histogram.scala | 17 ++++++++++++----- .../format/vectors/HistogramTest.scala | 6 ++++++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/memory/src/main/scala/filodb.memory/format/vectors/Histogram.scala b/memory/src/main/scala/filodb.memory/format/vectors/Histogram.scala index b8020feddf..0b188e483f 100644 --- a/memory/src/main/scala/filodb.memory/format/vectors/Histogram.scala +++ b/memory/src/main/scala/filodb.memory/format/vectors/Histogram.scala @@ -91,11 +91,14 @@ trait Histogram extends Ordered[Histogram] { /** * Adapted from histogram_fraction in Prometheus codebase, but modified to handle - * the fact that bucket values are cumulative. + * the fact that bucket values are cumulative. Also, if min and max are provided, + * then interpolation accuracy is improved. */ //scalastyle:off cyclomatic.complexity //scalastyle:off method.length - def histogramFraction(lower: Double, upper: Double): Double = { + def histogramFraction(lower: Double, upper: Double, + min: Double = Double.NegativeInfinity, + max: Double = Double.PositiveInfinity): Double = { require(lower >= 0 && upper >= 0, s"lower & upper params should be >= 0: lower=$lower, upper=$upper") if (numBuckets == 0 || lower.isNaN || upper.isNaN || topBucketValue == 0) { return Double.NaN @@ -127,13 +130,17 @@ trait Histogram extends Ordered[Histogram] { // Define interpolation functions def interpolateLinearly(v: Double): Double = { - val fraction = (v - bucketLower) / (bucketUpper - bucketLower) + val low = Math.max(bucketLower, min) + val high = Math.min(bucketUpper, max) + val fraction = (v - low) / (high - low) prevBucketVal + (bucketVal - prevBucketVal) * fraction } def interpolateExponentially(v: Double) = { - val logLower = log2(Math.abs(bucketLower)) - val logUpper = log2(Math.abs(bucketUpper)) + val low = Math.max(bucketLower, min) + val high = Math.min(bucketUpper, max) + val logLower = log2(Math.abs(low)) + val logUpper = log2(Math.abs(high)) val logV = log2(Math.abs(v)) val fraction = if (v > 0) (logV - logLower) / (logUpper - logLower) else 1 - ((logV - logUpper) / (logLower - logUpper)) diff --git a/memory/src/test/scala/filodb.memory/format/vectors/HistogramTest.scala b/memory/src/test/scala/filodb.memory/format/vectors/HistogramTest.scala index cb06a45ae7..45f3fca5b4 100644 --- a/memory/src/test/scala/filodb.memory/format/vectors/HistogramTest.scala +++ b/memory/src/test/scala/filodb.memory/format/vectors/HistogramTest.scala @@ -89,6 +89,9 @@ class HistogramTest extends NativeVectorTest { // multiple buckets with interpolation hist.histogramFraction(0.8, 1.2) shouldEqual 0.3899750004807707 +- 0.00001 + // multiple buckets with interpolation using min and max leads to improved accuracy + hist.histogramFraction(0.8, 1.2, 0.76, 1.21) shouldEqual 0.42472117149283567 +- 0.00001 + // multiple buckets without interpolation hist.histogramFraction(bucketScheme.bucketTop(3), bucketScheme.bucketTop(7)) shouldEqual ((hist.bucketValue(7) - hist.bucketValue(3)) / hist.topBucketValue) +- 0.00001 @@ -113,6 +116,9 @@ class HistogramTest extends NativeVectorTest { // multiple buckets with linear interpolation hist.histogramFraction(4.5, 6.6) shouldEqual 0.175 +- 0.00001 + // multiple buckets with interpolation using min and max leads to improved accuracy + hist.histogramFraction(4.5, 6.6, 4.1, 6.8) shouldEqual 0.19212962962962962 +- 0.00001 + // multiple buckets without interpolation hist.histogramFraction(bucketScheme.bucketTop(3), bucketScheme.bucketTop(7)) shouldEqual ((hist.bucketValue(7) - hist.bucketValue(3)) / hist.topBucketValue) +- 0.00001