Skip to content
This repository has been archived by the owner on Jan 20, 2023. It is now read-only.

Commit

Permalink
Merge branch 'binary_encoding'
Browse files Browse the repository at this point in the history
  • Loading branch information
spenczar committed Oct 6, 2017
2 parents 97a3050 + 5ba121a commit 14306f1
Show file tree
Hide file tree
Showing 7 changed files with 676 additions and 170 deletions.
12 changes: 6 additions & 6 deletions benchmarks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,33 @@ type valueSource interface {
func benchmarkAdd(b *testing.B, n int, src valueSource) {
valsToAdd := make([]float64, n)

cset := newCentroidSet(100)
d := NewWithCompression(100)
for i := 0; i < n; i++ {
v := src.Next()
valsToAdd[i] = v
cset.Add(v, 1)
d.Add(v, 1)
}

b.ResetTimer()
for i := 0; i < b.N; i++ {
cset.Add(valsToAdd[i%n], 1)
d.Add(valsToAdd[i%n], 1)
}
b.StopTimer()
}

func benchmarkQuantile(b *testing.B, n int, src valueSource) {
quantilesToCheck := make([]float64, n)

cset := newCentroidSet(100)
d := NewWithCompression(100)
for i := 0; i < n; i++ {
v := src.Next()
quantilesToCheck[i] = v
cset.Add(v, 1)
d.Add(v, 1)
}

b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = cset.Quantile(quantilesToCheck[i%n])
_ = d.Quantile(quantilesToCheck[i%n])
}
b.StopTimer()
}
Expand Down
42 changes: 42 additions & 0 deletions fuzz.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// +build gofuzz

package tdigest

import (
"bytes"
"fmt"
"log"

"github.com/davecgh/go-spew/spew"
)

func Fuzz(data []byte) int {
v := new(TDigest)
err := v.UnmarshalBinary(data)
if err != nil {
return 0
}

remarshaled, err := v.MarshalBinary()
if err != nil {
panic(err)
}

if !bytes.HasPrefix(data, remarshaled) {
panic(fmt.Sprintf("not equal: \n%v\nvs\n%v", data, remarshaled))
}

for q := float64(0.1); q <= 1.0; q += 0.05 {
prev, this := v.Quantile(q-0.1), v.Quantile(q)
if prev-this > 1e-100 { // Floating point math makes this slightly imprecise.
log.Printf("v: %s", spew.Sprint(v))
log.Printf("q: %v", q)
log.Printf("prev: %v", prev)
log.Printf("this: %v", this)
panic("quantiles should only increase")
}
}

v.Add(1, 1)
return 1
}
84 changes: 84 additions & 0 deletions fuzz_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package tdigest

import (
"bytes"
"testing"

"github.com/davecgh/go-spew/spew"
)

func TestFuzzPanicRegressions(t *testing.T) {
// This test contains a list of byte sequences discovered by
// github.com/dvyukov/go-fuzz which, at one time, caused tdigest to panic. The
// test just makes sure that they no longer cause a panic.
testcase := func(crasher []byte) func(*testing.T) {
return func(t *testing.T) {
v := new(TDigest)
err := v.UnmarshalBinary(crasher)
if err != nil {
return
}
remarshaled, err := v.MarshalBinary()
if err != nil {
t.Fatalf("marshal error: %v", err)
}

if !bytes.HasPrefix(crasher, remarshaled) {
t.Fatalf("not equal: \n%v\nvs\n%v", crasher, remarshaled)
}

for q := float64(0.1); q <= 1.0; q += 0.05 {
prev, this := v.Quantile(q-0.1), v.Quantile(q)
if prev-this > 1e-100 { // Floating point math makes this slightly imprecise.
t.Logf("v: %s", spew.Sprint(v))
t.Logf("q: %v", q)
t.Logf("prev: %v", prev)
t.Logf("this: %v", this)
t.Fatal("quantiles should only increase")
}
}

v.Add(1, 1)
}
}
t.Run("fuzz1", testcase([]byte{
0x01, 0x00, 0x00, 0x00, 0x30, 0x30, 0x30, 0x30,
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0xfc,
}))
t.Run("fuzz2", testcase([]byte{
0x01, 0x00, 0x00, 0x00, 0xdb, 0x46, 0x5f, 0xbd,
0xdb, 0x46, 0x00, 0xbd, 0xe0, 0xdf, 0xca, 0xab,
0x37, 0x31, 0x37, 0x32, 0x37, 0x33, 0x37, 0x34,
0x37, 0x35, 0x37, 0x36, 0x37, 0x37, 0x37, 0x38,
0x37, 0x39, 0x28,
}))
t.Run("fuzz3", testcase([]byte{
0x80, 0x0c, 0x01, 0x00, 0x00, 0x00, 0x30, 0x30,
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x02, 0x00,
0x00, 0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
0x30, 0xbf,
}))
t.Run("fuzz4", testcase([]byte{
0x80, 0x0c, 0x01, 0x00, 0x00, 0x00, 0x30, 0x30,
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x02, 0x00,
0x00, 0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
0x30, 0x63, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
0x30, 0x4e,
}))
t.Run("fuzz5", testcase([]byte{
0x80, 0x0c, 0x01, 0x00, 0x00, 0x00, 0x30, 0x30,
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x02, 0x00,
0x00, 0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
0x30, 0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
0x30, 0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
0x92, 0x00,
}))
}
123 changes: 123 additions & 0 deletions serde.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package tdigest

import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math"
)

const (
magic = int16(0xc80)
encodingVersion = int32(1)
)

func marshalBinary(d *TDigest) ([]byte, error) {
buf := bytes.NewBuffer(nil)
w := &binaryBufferWriter{buf: buf}
w.writeValue(magic)
w.writeValue(encodingVersion)
w.writeValue(d.compression)
w.writeValue(int32(len(d.centroids)))
for _, c := range d.centroids {
w.writeValue(c.count)
w.writeValue(c.mean)
}

if w.err != nil {
return nil, w.err
}
return buf.Bytes(), nil
}

func unmarshalBinary(d *TDigest, p []byte) error {
var (
mv int16
ev int32
n int32
)
r := &binaryReader{r: bytes.NewReader(p)}
r.readValue(&mv)
if r.err != nil {
return r.err
}
if mv != magic {
return fmt.Errorf("data corruption detected: invalid header magic value 0x%04x", mv)
}
r.readValue(&ev)
if r.err != nil {
return r.err
}
if ev != encodingVersion {
return fmt.Errorf("data corruption detected: invalid encoding version %d", ev)
}
r.readValue(&d.compression)
r.readValue(&n)
if r.err != nil {
return r.err
}
if n < 0 {
return fmt.Errorf("data corruption detected: number of centroids cannot be negative, have %v", n)

}
if n > 1<<20 {
return fmt.Errorf("invalid n, cannot be greater than 2^20: %v", n)
}
d.centroids = make([]*centroid, int(n))
for i := 0; i < int(n); i++ {
c := new(centroid)
r.readValue(&c.count)
r.readValue(&c.mean)
if r.err != nil {
return r.err
}
if c.count < 0 {
return fmt.Errorf("data corruption detected: negative count: %d", c.count)
}
if i > 0 {
prev := d.centroids[i-1]
if c.mean < prev.mean {
return fmt.Errorf("data corruption detected: centroid %d has lower mean (%v) than preceding centroid %d (%v)", i, c.mean, i-1, prev.mean)
}
}
d.centroids[i] = c
if c.count > math.MaxInt64-d.countTotal {
return fmt.Errorf("data corruption detected: centroid total size overflow")
}
d.countTotal += c.count
}

if n := r.r.Len(); n > 0 {
return fmt.Errorf("found %d unexpected bytes trailing the tdigest", n)
}

return nil
}

type binaryBufferWriter struct {
buf *bytes.Buffer
err error
}

func (w *binaryBufferWriter) writeValue(v interface{}) {
if w.err != nil {
return
}
w.err = binary.Write(w.buf, binary.LittleEndian, v)
}

type binaryReader struct {
r *bytes.Reader
err error
}

func (r *binaryReader) readValue(v interface{}) {
if r.err != nil {
return
}
r.err = binary.Read(r.r, binary.LittleEndian, v)
if r.err == io.EOF {
r.err = io.ErrUnexpectedEOF
}
}
Loading

0 comments on commit 14306f1

Please sign in to comment.