Skip to content

Commit

Permalink
histogram_quantile calculation correctness for exponential buckets
Browse files Browse the repository at this point in the history
  • Loading branch information
vishramachandran committed Nov 13, 2024
1 parent 8ee7d0a commit 82f8381
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 23 deletions.
19 changes: 13 additions & 6 deletions memory/src/main/scala/filodb.memory/format/vectors/Histogram.scala
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ trait Histogram extends Ordered[Histogram] {
* Calculates histogram quantile based on bucket values using Prometheus scheme (increasing/LE)
*/
def quantile(q: Double): Double = {
// TODO take care of calculating quantile in an exponential way for otel exp buckets
val result = if (q < 0) Double.NegativeInfinity
else if (q > 1) Double.PositiveInfinity
else if (numBuckets < 2) Double.NaN
Expand All @@ -71,9 +70,9 @@ trait Histogram extends Ordered[Histogram] {
// using rank, find the le bucket which would have the identified rank
val b = firstBucketGTE(rank)

// now calculate quantile. If bucket is last one then return second-to-last bucket top
// now calculate quantile. If bucket is last one and last bucket is +Inf then return second-to-last bucket top
// as we cannot interpolate to +Inf.
if (b == numBuckets-1) return bucketTop(numBuckets-2)
if (b == numBuckets-1 && bucketTop(numBuckets - 1).isPosInfinity) return bucketTop(numBuckets-2)
else if (b == 0 && bucketTop(0) <= 0) return bucketTop(0)
else {
// interpolate quantile within le bucket
Expand All @@ -83,12 +82,22 @@ trait Histogram extends Ordered[Histogram] {
count -= bucketValue(b-1)
rank -= bucketValue(b-1)
}
bucketStart + (bucketEnd-bucketStart)*(rank/count)
val fraction = rank/count
if (!hasExponentialBuckets || bucketStart == 0) {
bucketStart + (bucketEnd-bucketStart) * fraction
} else {
val logBucketEnd = log2(bucketEnd)
val logBucketStart = log2(bucketStart)
val logRank = logBucketStart + (logBucketEnd - logBucketStart) * fraction
Math.pow(2, logRank)
}
}
}
result
}

private def log2(v: Double) = Math.log(v) / Math.log(2)

/**
* Adapted from histogram_fraction in Prometheus codebase, but modified to handle
* the fact that bucket values are cumulative. Also, if min and max are provided,
Expand Down Expand Up @@ -126,8 +135,6 @@ trait Histogram extends Ordered[Histogram] {
val bucketVal = bucketValue(b)
val prevBucketVal = if (b == 0) 0.0 else bucketValue(b - 1)

def log2(v: Double) = Math.log(v) / Math.log(2)

// Define interpolation functions
def interpolateLinearly(v: Double): Double = {
val low = Math.max(bucketLower, min)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,34 +71,50 @@ class HistogramTest extends NativeVectorTest {
}

describe("Histogram") {
it("should add two histograms with the same bucket scheme correctly") {
val hist1 = LongHistogram(bucketScheme, Array(1, 2, 3, 4, 5, 6, 7, 8))
val hist2 = LongHistogram(bucketScheme, Array(2, 4, 6, 8, 10, 12, 14, 18))
hist1.add(hist2)

hist1.values shouldEqual Array(3, 6, 9, 12, 15, 18, 21, 26)
}
it("should add two histograms with the same bucket scheme correctly") {
val hist1 = LongHistogram(bucketScheme, Array(1, 2, 3, 4, 5, 6, 7, 8))
val hist2 = LongHistogram(bucketScheme, Array(2, 4, 6, 8, 10, 12, 14, 18))
hist1.add(hist2)

it("should not add histograms with different bucket schemes") {
val hist1 = LongHistogram(bucketScheme, Array(1, 2, 3, 4, 5, 6, 7, 8))
val histWithDifferentBuckets = LongHistogram(customScheme, Array(1, 2, 3, 4, 5, 6, 7))
hist1.values shouldEqual Array(3, 6, 9, 12, 15, 18, 21, 26)
}

val thrown = intercept[IllegalArgumentException] {
hist1.add(histWithDifferentBuckets)
}
it("should not add histograms with different bucket schemes") {
val hist1 = LongHistogram(bucketScheme, Array(1, 2, 3, 4, 5, 6, 7, 8))
val histWithDifferentBuckets = LongHistogram(customScheme, Array(1, 2, 3, 4, 5, 6, 7))

thrown.getMessage shouldEqual s"Mismatch in bucket sizes. Cannot add histograms with different bucket configurations. " +
s"Expected: ${hist1.buckets}, Found: ${histWithDifferentBuckets.buckets}"
val thrown = intercept[IllegalArgumentException] {
hist1.add(histWithDifferentBuckets)
}
it("should calculate quantile correctly") {

thrown.getMessage shouldEqual s"Mismatch in bucket sizes. Cannot add histograms with different bucket configurations. " +
s"Expected: ${hist1.buckets}, Found: ${histWithDifferentBuckets.buckets}"
}

it("should calculate quantile correctly for custom and geometric bucket histograms") {
mutableHistograms.zip(quantile50Result).foreach { case (h, res) =>
val quantile = h.quantile(0.50)
info(s"For histogram ${h.values.toList} -> quantile = $quantile")
quantile shouldEqual res
}

// Cannot return anything more than 2nd-to-last bucket (ie 64)
mutableHistograms(0).quantile(0.95) shouldEqual 64
// Cannot return anything more than 2nd-to-last bucket (ie 64) when last bucket is infinity
customHistograms(0).quantile(0.95) shouldEqual 10
}


it("should calculate quantile correctly for exponential bucket histograms") {
val bucketScheme = Base2ExpHistogramBuckets(3, -5, 11) // 0.707 to 1.68
val hist = MutableHistogram(bucketScheme, Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))

// multiple buckets with interpolation
hist.quantile(0.5) shouldEqual 1.0 +- 0.00001
hist.quantile(0.75) shouldEqual 1.2968395546510099 +- 0.00001
hist.quantile(0.25) shouldEqual 0.7711054127039704 +- 0.00001
hist.quantile(0.99) shouldEqual 1.6643974694230492 +- 0.00001
hist.quantile(0.01) shouldEqual 0.0 // zero bucket
hist.quantile(0.085) shouldEqual 0.014142135623730961 +- 0.00001
}

it("should calculate histogram_fraction correctly for exponential histograms using exponential interpolation") {
Expand Down

0 comments on commit 82f8381

Please sign in to comment.