Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize repeatedly serializing array bitmaps #364

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions arraycontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,10 @@ func (ac *arrayContainer) clone() container {
return &ptr
}

func (ac *arrayContainer) clear() {
ac.content = ac.content[:0]
}

func (ac *arrayContainer) contains(x uint16) bool {
return binarySearch(ac.content, x) >= 0
}
Expand Down
24 changes: 20 additions & 4 deletions benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@ package roaring
import (
"bytes"
"fmt"
"github.com/stretchr/testify/require"
"math/rand"
"testing"

"github.com/stretchr/testify/require"

"github.com/bits-and-blooms/bitset"
)

// BENCHMARKS, to run them type "go test -bench Benchmark -run -"


// go test -bench BenchmarkIteratorAlloc -benchmem -run -
func BenchmarkIteratorAlloc(b *testing.B) {
bm := NewBitmap()
Expand Down Expand Up @@ -84,7 +84,6 @@ func BenchmarkIteratorAlloc(b *testing.B) {
b.Fatalf("Cardinalities don't match: %d, %d", counter, expected_cardinality)
}


b.Run("many iteration with alloc", func(b *testing.B) {
for n := 0; n < b.N; n++ {
counter = 0
Expand Down Expand Up @@ -117,7 +116,6 @@ func BenchmarkIteratorAlloc(b *testing.B) {
}
}


// go test -bench BenchmarkOrs -benchmem -run -
func BenchmarkOrs(b *testing.B) {

Expand Down Expand Up @@ -1134,3 +1132,21 @@ func BenchmarkAndAny(b *testing.B) {
runSet("small-filters", genOne(r, largeSize, domain), genMulti(r, filtersNum, smallSize, domain))
runSet("equal", genOne(r, defaultSize, domain), genMulti(r, filtersNum, defaultSize, domain))
}

func BenchmarkRepeatedSparseSerialization(b *testing.B) {
var (
l = NewBitmap()
buf = bytes.NewBuffer(nil)
)
for i := 0; i < b.N; i++ {
l.ClearRetainStructures()
for j := 0; j < 16; j++ {
l.Add(uint32(j))
}
buf.Reset()
_, err := l.WriteTo(buf)
if err != nil {
panic(err)
}
}
}
7 changes: 7 additions & 0 deletions bitmapcontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,13 @@ func (bc *bitmapContainer) clone() container {
return &ptr
}

func (bc *bitmapContainer) clear() {
for i := range bc.bitmap {
bc.bitmap[i] = 0
}
bc.cardinality = 0
}

// add all values in range [firstOfRange,lastOfRange)
func (bc *bitmapContainer) iaddRange(firstOfRange, lastOfRange int) container {
bc.cardinality += setBitmapRangeAndCardinalityChange(bc.bitmap, firstOfRange, lastOfRange)
Expand Down
3 changes: 3 additions & 0 deletions parallel.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,9 @@ func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer
make([]container, 0, expectedKeys),
make([]bool, 0, expectedKeys),
false,
nil,
},
nil,
}
for i := range keys {
if containers[i] != nil { // in case a resulting container was empty, see ParAnd function
Expand Down Expand Up @@ -440,6 +442,7 @@ func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
keys: make([]uint16, containerCount),
needCopyOnWrite: make([]bool, containerCount),
},
nil,
}

resultOffset := 0
Expand Down
216 changes: 216 additions & 0 deletions prop_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
package roaring

import (
"fmt"
"math/rand"
"testing"

"github.com/stretchr/testify/require"
)

func TestPropertyRepeatedSerializationWithClearRetainStructures(t *testing.T) {
var (
// Make test deterministic.
rand = rand.New(rand.NewSource(0))
reusable = NewBitmap()
)
testFn := func(t *testing.T) {
roaring1, roaring2, reference1, reference2 := genPropTestInputs(rand)

reusable.ClearRetainStructures()
roaring1.Iterate(func(x uint32) bool {
reusable.Add(x)
return true
})
assertRoaringEqualsReference(t, roundTripRoaring(t, reusable), reference1)

reusable.ClearRetainStructures()
roaring2.Iterate(func(x uint32) bool {
reusable.Add(x)
return true
})
assertRoaringEqualsReference(t, roundTripRoaring(t, reusable), reference2)
}

for i := 0; i < 1000; i++ {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
testFn(t)
})
}
}

func TestPropertyOr(t *testing.T) {
var (
// Make test deterministic.
rand = rand.New(rand.NewSource(0))
reusable = NewBitmap()
)
testFn := func(t *testing.T) {
roaring1, roaring2, reference1, reference2 := genPropTestInputs(rand)

reusable.ClearRetainStructures()
reusable.Or(roaring1)
reusable.Or(roaring2)
roaring1.Or(roaring2)
reference1.Or(reference2)

assertRoaringEqualsReference(t, reusable, reference1)
assertRoaringEqualsReference(t, roaring1, reference1)
}

for i := 0; i < 1000; i++ {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
testFn(t)
})
}
}

func TestPropertyAnd(t *testing.T) {
var (
// Make test deterministic.
rand = rand.New(rand.NewSource(0))
reusable = NewBitmap()
)
testFn := func(t *testing.T) {
roaring1, roaring2, reference1, reference2 := genPropTestInputs(rand)

reusable.ClearRetainStructures()
reusable.And(roaring1)
reusable.And(roaring2)
roaring1.And(roaring2)
reference1.And(reference2)

assertRoaringEqualsReference(t, reusable, reference1)
assertRoaringEqualsReference(t, roaring1, reference1)
}

for i := 0; i < 100; i++ {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
testFn(t)
})
}
}

func genPropTestInputs(rand *rand.Rand) (*Bitmap, *Bitmap, *reference, *reference) {
var (
aSize = rand.Intn(1000)
bSize = rand.Intn(1000)
aValues = make([]uint32, 0, aSize)
bValues = make([]uint32, 0, bSize)
)
for j := 0; j < aSize; j++ {
aValues = append(aValues, rand.Uint32())
}
for j := 0; j < bSize; j++ {
bValues = append(bValues, rand.Uint32())
}

var (
roaring1 = New()
roaring2 = New()

reference1 = newReference()
reference2 = newReference()
)
for _, v := range aValues {
if rand.Intn(20) == 0 {
rangeStart := rand.Uint32()
roaring1.AddRange(uint64(rangeStart), uint64(rangeStart+100))
reference1.AddRange(uint64(rangeStart), uint64(rangeStart+100))
continue
}

roaring1.Add(v)
reference1.Add(v)
}

for _, v := range bValues {
if rand.Intn(20) == 0 {
rangeStart := rand.Uint32()
roaring2.AddRange(uint64(rangeStart), uint64(rangeStart+100))
reference2.AddRange(uint64(rangeStart), uint64(rangeStart+100))
continue
}

roaring2.Add(v)
reference2.Add(v)
}

return roaring1, roaring2, reference1, reference2
}

// reference is a reference implementation that can be used in property tests
// to assert the correctness of the actual roaring implementation.
type reference struct {
m map[uint32]struct{}
}

func newReference() *reference {
return &reference{
m: make(map[uint32]struct{}),
}
}

func (r *reference) Add(x uint32) {
r.m[x] = struct{}{}
}

func (r *reference) AddRange(start, end uint64) {
for i := start; i < end; i++ {
r.m[uint32(i)] = struct{}{}
}
}

func (r *reference) Contains(x uint32) bool {
_, ok := r.m[x]
return ok
}

func (r *reference) Cardinality() uint64 {
return uint64(len(r.m))
}

func (r *reference) Or(other *reference) {
for v := range other.m {
r.m[v] = struct{}{}
}
}

func (r *reference) And(other *reference) {
newM := map[uint32]struct{}{}
for v := range other.m {
if _, ok := r.m[v]; ok {
newM[v] = struct{}{}
}
}
r.m = newM
}

func assertRoaringEqualsReference(
t *testing.T,
roaring *Bitmap,
reference *reference,
) {
// round-trip the roaring bitmap to ensure our property still holds
// true after a round of ser/der.
rounedTrippedRoaring := roundTripRoaring(t, roaring)
require.Equal(t, reference.Cardinality(), rounedTrippedRoaring.Stats().Cardinality)
roaring.Iterate(func(x uint32) bool {
require.True(t, reference.Contains(x))
return true
})
}

func roundTripRoaring(t *testing.T, b *Bitmap) *Bitmap {
b.RunOptimize()

marshaled, err := b.ToBytes()
require.NoError(t, err)

unmarshaled := New()
p, err := unmarshaled.FromBuffer(marshaled)
require.NoError(t, err)
require.Equal(t, int64(len(marshaled)), p)

return unmarshaled
}
Loading