-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #19 from osspkg/dev
add bloom filter
- Loading branch information
Showing
11 changed files
with
261 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
/* | ||
* Copyright (c) 2019-2024 Mikhail Knyazhev <[email protected]>. All rights reserved. | ||
* Use of this source code is governed by a BSD 3-Clause license that can be found in the LICENSE file. | ||
*/ | ||
|
||
// see: https://en.wikipedia.org/wiki/Bloom_filter | ||
|
||
package bloom | ||
|
||
import ( | ||
"crypto/hmac" | ||
"crypto/rand" | ||
"crypto/sha1" | ||
"encoding/binary" | ||
"fmt" | ||
"math" | ||
"sync" | ||
) | ||
|
||
const blockSize = 64 | ||
|
||
type Bloom struct { | ||
bits []uint64 | ||
size uint64 | ||
salts [][32]byte | ||
|
||
mux sync.RWMutex | ||
} | ||
|
||
func New(n uint64, p float64) (*Bloom, error) { | ||
if n == 0 { | ||
return nil, fmt.Errorf("bitset size cannot be 0") | ||
} | ||
if p <= 0 || p >= 1.0 { | ||
return nil, fmt.Errorf("false positive rate must be between 0 and 1") | ||
} | ||
|
||
b := &Bloom{} | ||
m, k := b.calcOptimalParams(n, p) | ||
b.size = m | ||
b.bits = make([]uint64, m/blockSize+1) | ||
b.salts = make([][32]byte, k) | ||
for i := 0; i < int(k); i++ { | ||
if _, err := rand.Read(b.salts[i][:]); err != nil { | ||
return nil, fmt.Errorf("generate hash salt: %w", err) | ||
} | ||
} | ||
return b, nil | ||
} | ||
|
||
func (b *Bloom) Add(v []byte) { | ||
b.mux.Lock() | ||
defer b.mux.Unlock() | ||
|
||
for i := 0; i < len(b.salts)-1; i++ { | ||
p := b.createHash(i, v) | ||
indx, num := b.getPosition(p) | ||
b.bits[indx] |= num | ||
} | ||
} | ||
|
||
func (b *Bloom) Contain(v []byte) bool { | ||
b.mux.RLock() | ||
defer b.mux.RUnlock() | ||
|
||
for i := 0; i < len(b.salts)-1; i++ { | ||
p := b.createHash(i, v) | ||
indx, num := b.getPosition(p) | ||
if b.bits[indx]&num > 0 { | ||
continue | ||
} | ||
return false | ||
} | ||
return true | ||
} | ||
|
||
func (b *Bloom) createHash(i int, key []byte) uint64 { | ||
mac := hmac.New(sha1.New, b.salts[i][:]) | ||
mac.Write(key) | ||
return binary.BigEndian.Uint64(mac.Sum(nil)) % b.size | ||
} | ||
|
||
func (*Bloom) getPosition(p uint64) (uint64, uint64) { | ||
index := uint64(math.Ceil(float64(p+1)/blockSize)) - 1 | ||
num := uint64(1) << (p - index*blockSize) | ||
return index, num | ||
} | ||
|
||
func (*Bloom) calcOptimalParams(n uint64, p float64) (m, k uint64) { | ||
m = uint64(math.Ceil(-float64(n) * math.Log(p) / math.Pow(math.Log(2.0), 2.0))) | ||
if m == 0 { | ||
m = 1 | ||
} | ||
k = uint64(math.Ceil(float64(m) * math.Log(2.0) / float64(n))) | ||
if k == 0 { | ||
k = 1 | ||
} | ||
return | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
/* | ||
* Copyright (c) 2019-2024 Mikhail Knyazhev <[email protected]>. All rights reserved. | ||
* Use of this source code is governed by a BSD 3-Clause license that can be found in the LICENSE file. | ||
*/ | ||
|
||
package bloom | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestUnit_Bloom(t *testing.T) { | ||
bf, err := New(1000, 0.00001) | ||
require.NoError(t, err) | ||
|
||
bf.Add([]byte("hello")) | ||
bf.Add([]byte("user")) | ||
bf.Add([]byte("home")) | ||
|
||
require.False(t, bf.Contain([]byte("users"))) | ||
require.True(t, bf.Contain([]byte("user"))) | ||
} | ||
|
||
func TestUnit_Bloom2(t *testing.T) { | ||
_, err := New(0, 0.00001) | ||
require.Error(t, err) | ||
|
||
_, err = New(1, 1) | ||
require.Error(t, err) | ||
|
||
_, err = New(1, 0.0001) | ||
require.NoError(t, err) | ||
} | ||
|
||
func Benchmark_Bloom(b *testing.B) { | ||
bf, err := New(1000, 0.00001) | ||
if err != nil { | ||
b.FailNow() | ||
} | ||
bf.Add([]byte("hello")) | ||
bf.Add([]byte("user")) | ||
bf.Add([]byte("home")) | ||
|
||
b.ResetTimer() | ||
b.ReportAllocs() | ||
for i := 0; i < b.N; i++ { | ||
bf.Contain([]byte("users")) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
/* | ||
* Copyright (c) 2019-2024 Mikhail Knyazhev <[email protected]>. All rights reserved. | ||
* Use of this source code is governed by a BSD 3-Clause license that can be found in the LICENSE file. | ||
*/ | ||
|
||
// see: https://en.wikipedia.org/wiki/Heapsort | ||
|
||
package sorts | ||
|
||
func Heapsort[T any](list []T, less func(i, j int) bool) { | ||
for i := len(list)/2 - 1; i >= 0; i-- { | ||
heapsortSort[T](list, i, len(list), less) | ||
} | ||
for i := len(list) - 1; i >= 0; i-- { | ||
list[0], list[i] = list[i], list[0] | ||
heapsortSort[T](list, 0, i, less) | ||
} | ||
} | ||
|
||
func heapsortSort[T any](list []T, parent, max int, less func(i, j int) bool) { | ||
for { | ||
child := parent | ||
left, right := parent*2+1, parent*2+2 | ||
if left < max && !less(left, child) { | ||
child = left | ||
} | ||
if right < max && !less(right, child) { | ||
child = right | ||
} | ||
if child == parent { | ||
return | ||
} | ||
list[parent], list[child] = list[child], list[parent] | ||
parent = child | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
/* | ||
* Copyright (c) 2019-2024 Mikhail Knyazhev <[email protected]>. All rights reserved. | ||
* Use of this source code is governed by a BSD 3-Clause license that can be found in the LICENSE file. | ||
*/ | ||
|
||
package sorts_test | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
"go.osspkg.com/algorithms/sorts" | ||
) | ||
|
||
func TestUnit_SortHeapsort(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
args []int | ||
want []int | ||
}{ | ||
{ | ||
name: "IntCase1", | ||
args: nil, | ||
want: nil, | ||
}, | ||
{ | ||
name: "IntCase2", | ||
args: []int{1, 67, 23, 1, 5, 9, 5, 32, 1, 34, 68, 9, 5, 23, 0, 0, 0, 0, 0, 5, 5, 3, 2, 1}, | ||
want: []int{0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 5, 5, 5, 5, 5, 9, 9, 23, 23, 32, 34, 67, 68}, | ||
}, | ||
{ | ||
name: "IntCase3", | ||
args: []int{1}, | ||
want: []int{1}, | ||
}, | ||
{ | ||
name: "IntCase4", | ||
args: []int{4, 0, 1, 2, 3}, | ||
want: []int{0, 1, 2, 3, 4}, | ||
}, | ||
{ | ||
name: "IntCase5", | ||
args: []int{0, 3, 1, 2}, | ||
want: []int{0, 1, 2, 3}, | ||
}, | ||
} | ||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
sorts.Heapsort(tt.args, func(i, j int) bool { | ||
return tt.args[i] < tt.args[j] | ||
}) | ||
require.Equal(t, tt.want, tt.args) | ||
}) | ||
} | ||
} | ||
|
||
func Benchmark_SortHeapsort(b *testing.B) { | ||
b.ReportAllocs() | ||
for i := 0; i < b.N; i++ { | ||
arr := []int{45, 61, 87, 20, 65, 36, 25, 86, 64, 4, 36, 53, 17, 38, 48, 52, 53, 59, 80, 79, 95, 72, 85, 52, 9, 12, 9, 36, 47, 34} | ||
sorts.Heapsort(arr, func(i, j int) bool { | ||
return arr[i] < arr[j] | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters