Skip to content

Commit

Permalink
Merge pull request #18 from zeebo/cdf-fix
Browse files Browse the repository at this point in the history
fix cdf for values near the last centroid

Fixes #17
  • Loading branch information
caio authored Jan 19, 2018
2 parents 0ff90c8 + 3a757a1 commit 00da6c1
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
2 changes: 1 addition & 1 deletion tdigest.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ func (t *TDigest) CDF(value float64) float64 {
aMean := t.summary.Mean(aIdx)
if value < aMean+right {
aCount := float64(t.summary.Count(aIdx))
return (tot + aCount*interpolate(value, aMean-left, aMean+right)) / 2
return (tot + aCount*interpolate(value, aMean-left, aMean+right)) / float64(t.Count())
}
return 1
}
Expand Down
18 changes: 18 additions & 0 deletions tdigest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,24 @@ func TestForEachCentroid(t *testing.T) {
}
}

func TestCDFInsideLastCentroid(t *testing.T) {
// values pulled from a live digest. sorry it's a lot!
td := &TDigest{
summary: &summary{
means: []float64{2120.75048828125, 2260.3844299316406, 3900.490264892578, 3937.495807647705, 5390.479816436768, 10450.335285186768, 14152.897296905518, 16442.676349639893, 24303.143146514893, 56961.87361526489, 63891.24959182739, 73982.55232620239, 86477.50447463989, 110746.62556838989, 175479.7388496399, 300492.3404121399, 440452.5279121399, 515611.7700996399, 535827.0025215149, 546241.6822090149, 556965.3648262024, 569791.2124824524, 587320.6870918274, 603969.4175605774, 613751.6177558899, 624708.7593574524, 635060.0718574524, 641924.2007637024, 650656.4302558899, 660653.1714668274, 671380.9009590149, 687094.3667793274, 716595.8824043274, 740870.9800605774, 760276.2437324524, 768857.5786933899, 775021.0025215149, 787686.0337715149, 801473.4624824524, 815225.1255683899, 832358.6997871399, 852438.4751777649, 866134.2935371399, 1.10661549666214e+06, 1.1212118980293274e+06, 1.2230108433418274e+06, 1.5446490620918274e+06, 4.306712312091827e+06, 5.487582562091827e+06, 6.306383562091827e+06, 7.089308312091827e+06, 7.520797593341827e+06},
counts: []uint32{0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, 0x4, 0x5, 0x6, 0x3, 0x3, 0x4, 0x11, 0x23, 0x2f, 0x1e, 0x1b, 0x36, 0x31, 0x33, 0x4e, 0x5f, 0x61, 0x48, 0x2e, 0x26, 0x28, 0x2a, 0x31, 0x39, 0x51, 0x32, 0x2b, 0x12, 0x8, 0xb, 0xa, 0x11, 0xa, 0x11, 0x9, 0x7, 0x1, 0x1, 0x1, 0x3, 0x2, 0x1, 0x1, 0x1, 0x1},
},
compression: 5,
count: 1250,
rng: &globalRNG{},
}
td.summary.rebuildFenwickTree()

if cdf := td.CDF(7.144560976650238e+06); cdf > 1 {
t.Fatalf("invalid: %v", cdf)
}
}

func benchmarkAdd(compression uint32, b *testing.B) {
t := uncheckedNew(Compression(compression))

Expand Down

0 comments on commit 00da6c1

Please sign in to comment.