Skip to content

Commit

Permalink
histogram_quantile calculation correctness for exponential buckets
Browse files Browse the repository at this point in the history
  • Loading branch information
vishramachandran committed Nov 8, 2024
1 parent 6d5dd4a commit ff390d2
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 9 deletions.
19 changes: 13 additions & 6 deletions memory/src/main/scala/filodb.memory/format/vectors/Histogram.scala
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ trait Histogram extends Ordered[Histogram] {
* Calculates histogram quantile based on bucket values using Prometheus scheme (increasing/LE)
*/
def quantile(q: Double): Double = {
// TODO take care of calculating quantile in an exponential way for otel exp buckets
val result = if (q < 0) Double.NegativeInfinity
else if (q > 1) Double.PositiveInfinity
else if (numBuckets < 2) Double.NaN
Expand All @@ -71,9 +70,9 @@ trait Histogram extends Ordered[Histogram] {
// using rank, find the le bucket which would have the identified rank
val b = firstBucketGTE(rank)

// now calculate quantile. If bucket is last one then return second-to-last bucket top
// now calculate quantile. If bucket is last one and last bucket is +Inf then return second-to-last bucket top
// as we cannot interpolate to +Inf.
if (b == numBuckets-1) return bucketTop(numBuckets-2)
if (b == numBuckets-1 && bucketTop(numBuckets - 1).isPosInfinity) return bucketTop(numBuckets-2)
else if (b == 0 && bucketTop(0) <= 0) return bucketTop(0)
else {
// interpolate quantile within le bucket
Expand All @@ -83,12 +82,22 @@ trait Histogram extends Ordered[Histogram] {
count -= bucketValue(b-1)
rank -= bucketValue(b-1)
}
bucketStart + (bucketEnd-bucketStart)*(rank/count)
val fraction = rank/count
if (!hasExponentialBuckets || bucketStart == 0) {
bucketStart + (bucketEnd-bucketStart) * fraction
} else {
val logBucketEnd = log2(bucketEnd)
val logBucketStart = log2(bucketStart)
val logRank = logBucketStart + (logBucketEnd - logBucketStart) * fraction
Math.pow(2, logRank)
}
}
}
result
}

private def log2(v: Double) = Math.log(v) / Math.log(2)

/**
* Adapted from histogram_fraction in Prometheus codebase, but modified to handle
* the fact that bucket values are cumulative. Also, if min and max are provided,
Expand Down Expand Up @@ -126,8 +135,6 @@ trait Histogram extends Ordered[Histogram] {
val bucketVal = bucketValue(b)
val prevBucketVal = if (b == 0) 0.0 else bucketValue(b - 1)

def log2(v: Double) = Math.log(v) / Math.log(2)

// Define interpolation functions
def interpolateLinearly(v: Double): Double = {
val low = Math.max(bucketLower, min)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,29 @@ class HistogramTest extends NativeVectorTest {
}

describe("Histogram") {
it("should calculate quantile correctly") {
it("should calculate quantile correctly for custom and geometric bucket histograms") {
mutableHistograms.zip(quantile50Result).foreach { case (h, res) =>
val quantile = h.quantile(0.50)
info(s"For histogram ${h.values.toList} -> quantile = $quantile")
quantile shouldEqual res
}

// Cannot return anything more than 2nd-to-last bucket (ie 64)
mutableHistograms(0).quantile(0.95) shouldEqual 64
// Cannot return anything more than 2nd-to-last bucket (ie 64) when last bucket is infinity
customHistograms(0).quantile(0.95) shouldEqual 10
}


it("should calculate quantile correctly for exponential bucket histograms") {
val bucketScheme = Base2ExpHistogramBuckets(3, -5, 11) // 0.707 to 1.68
val hist = MutableHistogram(bucketScheme, Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))

// multiple buckets with interpolation
hist.quantile(0.5) shouldEqual 1.0 +- 0.00001
hist.quantile(0.75) shouldEqual 1.2968395546510099 +- 0.00001
hist.quantile(0.25) shouldEqual 0.7711054127039704 +- 0.00001
hist.quantile(0.99) shouldEqual 1.6643974694230492 +- 0.00001
hist.quantile(0.01) shouldEqual 0.0 // zero bucket
hist.quantile(0.085) shouldEqual 0.014142135623730961 +- 0.00001
}

it("should calculate histogram_fraction correctly for exponential histograms using exponential interpolation") {
Expand Down

0 comments on commit ff390d2

Please sign in to comment.