Skip to content

Commit

Permalink
Merge pull request #19 from osspkg/dev
Browse files Browse the repository at this point in the history
add bloom filter
  • Loading branch information
markus621 authored Jun 19, 2024
2 parents 32707e5 + 0affc07 commit 4936418
Show file tree
Hide file tree
Showing 11 changed files with 261 additions and 7 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ go get -u go.osspkg.com/algorithms
- [Insertion sort](sorts/insertion.go)
- [Merge sort](sorts/merge.go)
- [Selection sort](sorts/selection.go)
- [Heapsort](sorts/heapsort.go)
- Filtering algorithms
- [Bloom filter](filters/bloom/bloom.go)

## License

Expand Down
99 changes: 99 additions & 0 deletions filters/bloom/bloom.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* Copyright (c) 2019-2024 Mikhail Knyazhev <[email protected]>. All rights reserved.
* Use of this source code is governed by a BSD 3-Clause license that can be found in the LICENSE file.
*/

// see: https://en.wikipedia.org/wiki/Bloom_filter

package bloom

import (
"crypto/hmac"
"crypto/rand"
"crypto/sha1"
"encoding/binary"
"fmt"
"math"
"sync"
)

const blockSize = 64

type Bloom struct {
bits []uint64
size uint64
salts [][32]byte

mux sync.RWMutex
}

func New(n uint64, p float64) (*Bloom, error) {
if n == 0 {
return nil, fmt.Errorf("bitset size cannot be 0")
}
if p <= 0 || p >= 1.0 {
return nil, fmt.Errorf("false positive rate must be between 0 and 1")
}

b := &Bloom{}
m, k := b.calcOptimalParams(n, p)
b.size = m
b.bits = make([]uint64, m/blockSize+1)
b.salts = make([][32]byte, k)
for i := 0; i < int(k); i++ {
if _, err := rand.Read(b.salts[i][:]); err != nil {
return nil, fmt.Errorf("generate hash salt: %w", err)
}
}
return b, nil
}

func (b *Bloom) Add(v []byte) {
b.mux.Lock()
defer b.mux.Unlock()

for i := 0; i < len(b.salts)-1; i++ {
p := b.createHash(i, v)
indx, num := b.getPosition(p)
b.bits[indx] |= num
}
}

func (b *Bloom) Contain(v []byte) bool {
b.mux.RLock()
defer b.mux.RUnlock()

for i := 0; i < len(b.salts)-1; i++ {
p := b.createHash(i, v)
indx, num := b.getPosition(p)
if b.bits[indx]&num > 0 {
continue
}
return false
}
return true
}

func (b *Bloom) createHash(i int, key []byte) uint64 {
mac := hmac.New(sha1.New, b.salts[i][:])
mac.Write(key)
return binary.BigEndian.Uint64(mac.Sum(nil)) % b.size
}

func (*Bloom) getPosition(p uint64) (uint64, uint64) {
index := uint64(math.Ceil(float64(p+1)/blockSize)) - 1
num := uint64(1) << (p - index*blockSize)
return index, num
}

func (*Bloom) calcOptimalParams(n uint64, p float64) (m, k uint64) {
m = uint64(math.Ceil(-float64(n) * math.Log(p) / math.Pow(math.Log(2.0), 2.0)))
if m == 0 {
m = 1
}
k = uint64(math.Ceil(float64(m) * math.Log(2.0) / float64(n)))
if k == 0 {
k = 1
}
return
}
51 changes: 51 additions & 0 deletions filters/bloom/bloom_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) 2019-2024 Mikhail Knyazhev <[email protected]>. All rights reserved.
* Use of this source code is governed by a BSD 3-Clause license that can be found in the LICENSE file.
*/

package bloom

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestUnit_Bloom(t *testing.T) {
bf, err := New(1000, 0.00001)
require.NoError(t, err)

bf.Add([]byte("hello"))
bf.Add([]byte("user"))
bf.Add([]byte("home"))

require.False(t, bf.Contain([]byte("users")))
require.True(t, bf.Contain([]byte("user")))
}

func TestUnit_Bloom2(t *testing.T) {
_, err := New(0, 0.00001)
require.Error(t, err)

_, err = New(1, 1)
require.Error(t, err)

_, err = New(1, 0.0001)
require.NoError(t, err)
}

func Benchmark_Bloom(b *testing.B) {
bf, err := New(1000, 0.00001)
if err != nil {
b.FailNow()
}
bf.Add([]byte("hello"))
bf.Add([]byte("user"))
bf.Add([]byte("home"))

b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
bf.Contain([]byte("users"))
}
}
2 changes: 1 addition & 1 deletion sorts/bubble_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func TestUnit_SortBubble(t *testing.T) {
func Benchmark_SortBubble(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
arr := []int{30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}
arr := []int{45, 61, 87, 20, 65, 36, 25, 86, 64, 4, 36, 53, 17, 38, 48, 52, 53, 59, 80, 79, 95, 72, 85, 52, 9, 12, 9, 36, 47, 34}
sorts.Bubble(arr, func(i, j int) bool {
return arr[i] < arr[j]
})
Expand Down
2 changes: 1 addition & 1 deletion sorts/cocktail_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func TestUnit_SortCocktail(t *testing.T) {
func Benchmark_SortCocktail(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
arr := []int{30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}
arr := []int{45, 61, 87, 20, 65, 36, 25, 86, 64, 4, 36, 53, 17, 38, 48, 52, 53, 59, 80, 79, 95, 72, 85, 52, 9, 12, 9, 36, 47, 34}
sorts.Cocktail(arr, func(i, j int) bool {
return arr[i] < arr[j]
})
Expand Down
4 changes: 2 additions & 2 deletions sorts/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Use of this source code is governed by a BSD 3-Clause license that can be found in the LICENSE file.
*/

package sorts
package sorts_test

import (
"sort"
Expand All @@ -13,7 +13,7 @@ import (
func Benchmark_Default_SortSlice(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
arr := []int{30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}
arr := []int{45, 61, 87, 20, 65, 36, 25, 86, 64, 4, 36, 53, 17, 38, 48, 52, 53, 59, 80, 79, 95, 72, 85, 52, 9, 12, 9, 36, 47, 34}
sort.Slice(arr, func(i, j int) bool {
return arr[i] < arr[j]
})
Expand Down
36 changes: 36 additions & 0 deletions sorts/heapsort.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Copyright (c) 2019-2024 Mikhail Knyazhev <[email protected]>. All rights reserved.
* Use of this source code is governed by a BSD 3-Clause license that can be found in the LICENSE file.
*/

// see: https://en.wikipedia.org/wiki/Heapsort

package sorts

func Heapsort[T any](list []T, less func(i, j int) bool) {
for i := len(list)/2 - 1; i >= 0; i-- {
heapsortSort[T](list, i, len(list), less)
}
for i := len(list) - 1; i >= 0; i-- {
list[0], list[i] = list[i], list[0]
heapsortSort[T](list, 0, i, less)
}
}

func heapsortSort[T any](list []T, parent, max int, less func(i, j int) bool) {
for {
child := parent
left, right := parent*2+1, parent*2+2
if left < max && !less(left, child) {
child = left
}
if right < max && !less(right, child) {
child = right
}
if child == parent {
return
}
list[parent], list[child] = list[child], list[parent]
parent = child
}
}
65 changes: 65 additions & 0 deletions sorts/heapsort_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright (c) 2019-2024 Mikhail Knyazhev <[email protected]>. All rights reserved.
* Use of this source code is governed by a BSD 3-Clause license that can be found in the LICENSE file.
*/

package sorts_test

import (
"testing"

"github.com/stretchr/testify/require"
"go.osspkg.com/algorithms/sorts"
)

func TestUnit_SortHeapsort(t *testing.T) {
tests := []struct {
name string
args []int
want []int
}{
{
name: "IntCase1",
args: nil,
want: nil,
},
{
name: "IntCase2",
args: []int{1, 67, 23, 1, 5, 9, 5, 32, 1, 34, 68, 9, 5, 23, 0, 0, 0, 0, 0, 5, 5, 3, 2, 1},
want: []int{0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 5, 5, 5, 5, 5, 9, 9, 23, 23, 32, 34, 67, 68},
},
{
name: "IntCase3",
args: []int{1},
want: []int{1},
},
{
name: "IntCase4",
args: []int{4, 0, 1, 2, 3},
want: []int{0, 1, 2, 3, 4},
},
{
name: "IntCase5",
args: []int{0, 3, 1, 2},
want: []int{0, 1, 2, 3},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
sorts.Heapsort(tt.args, func(i, j int) bool {
return tt.args[i] < tt.args[j]
})
require.Equal(t, tt.want, tt.args)
})
}
}

func Benchmark_SortHeapsort(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
arr := []int{45, 61, 87, 20, 65, 36, 25, 86, 64, 4, 36, 53, 17, 38, 48, 52, 53, 59, 80, 79, 95, 72, 85, 52, 9, 12, 9, 36, 47, 34}
sorts.Heapsort(arr, func(i, j int) bool {
return arr[i] < arr[j]
})
}
}
2 changes: 1 addition & 1 deletion sorts/insertion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func TestUnit_SortInsertion(t *testing.T) {
func Benchmark_SortInsertion(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
arr := []int{30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}
arr := []int{45, 61, 87, 20, 65, 36, 25, 86, 64, 4, 36, 53, 17, 38, 48, 52, 53, 59, 80, 79, 95, 72, 85, 52, 9, 12, 9, 36, 47, 34}
sorts.Insertion(arr, func(i, j int) bool {
return arr[i] < arr[j]
})
Expand Down
2 changes: 1 addition & 1 deletion sorts/merge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func TestUnit_SortMerge(t *testing.T) {
func Benchmark_SortMerge(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
arr := []int{30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}
arr := []int{45, 61, 87, 20, 65, 36, 25, 86, 64, 4, 36, 53, 17, 38, 48, 52, 53, 59, 80, 79, 95, 72, 85, 52, 9, 12, 9, 36, 47, 34}
sorts.Merge(arr, func(i, j int) bool {
return arr[i] < arr[j]
})
Expand Down
2 changes: 1 addition & 1 deletion sorts/selection_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func TestUnit_SortSelection(t *testing.T) {
func Benchmark_SortSelection(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
arr := []int{30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}
arr := []int{45, 61, 87, 20, 65, 36, 25, 86, 64, 4, 36, 53, 17, 38, 48, 52, 53, 59, 80, 79, 95, 72, 85, 52, 9, 12, 9, 36, 47, 34}
sorts.Selection(arr, func(i, j int) bool {
return arr[i] < arr[j]
})
Expand Down

0 comments on commit 4936418

Please sign in to comment.