From 4d7001d70423e0c8470f40a08a103f0c2c6dd1cb Mon Sep 17 00:00:00 2001 From: Ariel Shtul Date: Sat, 24 Aug 2024 18:40:38 +0300 Subject: [PATCH 1/3] add benchmarks --- benches/bench.rs | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 benches/bench.rs diff --git a/benches/bench.rs b/benches/bench.rs new file mode 100644 index 0000000..bae0983 --- /dev/null +++ b/benches/bench.rs @@ -0,0 +1,82 @@ +#![feature(test)] + +extern crate test; + +use bloomfilter::Bloom; + +/* Set benchmarks */ + +fn inner_insert_bench(b: &mut test::Bencher, bitmap_size: usize, items_count: usize) { + let mut bf: Bloom = Bloom::new(bitmap_size / 8, items_count); + let mut index = items_count; + b.iter(|| { + index += 1; + test::black_box(bf.set(&index)); + }); +} + +#[bench] +#[inline(always)] +fn bench_insert_100(b: &mut test::Bencher) { + inner_insert_bench(b, 1000, 100); +} + + +#[bench] +#[inline(always)] +fn bench_insert_1000(b: &mut test::Bencher) { + inner_insert_bench(b, 10000, 1000); +} + +#[bench] +#[inline(always)] +fn bench_insert_m_1(b: &mut test::Bencher) { + inner_insert_bench(b, 10_000_000, 1_000_000); +} + +#[bench] +#[inline(always)] +fn bench_insert_m_10(b: &mut test::Bencher) { + inner_insert_bench(b, 100_000_000, 10_000_000); +} + +/* Get benchmarks */ + +fn inner_get_bench(b: &mut test::Bencher, bitmap_size: usize, items_count: usize) { + let mut bf: Bloom = Bloom::new(bitmap_size / 8, items_count); + for index in 0..items_count { + bf.set(&index); + } + let mut index = items_count; + b.iter(|| { + index += 1; + test::black_box(bf.check(&index)); + }); +} + + +#[bench] +#[inline(always)] +fn bench_get_100(b: &mut test::Bencher) { + inner_get_bench(b, 1000, 100); +} + + +#[bench] +#[inline(always)] +fn bench_get_1000(b: &mut test::Bencher) { + inner_get_bench(b, 10000, 1000); +} + + +#[bench] +#[inline(always)] +fn bench_get_m_1(b: &mut test::Bencher) { + inner_get_bench(b, 10_000_000, 1_000_000); +} + +#[bench] +#[inline(always)] +fn bench_get_m_10(b: &mut test::Bencher) { + inner_get_bench(b, 100_000_000, 10_000_000); +} From 6befceb73b0d0552c15178e747439dee92f13b97 Mon Sep 17 00:00:00 2001 From: Ariel Shtul Date: Sat, 24 Aug 2024 19:14:02 +0300 Subject: [PATCH 2/3] add false positive test --- tests/bloom.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/bloom.rs b/tests/bloom.rs index a997676..3533a24 100644 --- a/tests/bloom.rs +++ b/tests/bloom.rs @@ -50,3 +50,24 @@ fn bloom_test_load() { ); assert!(cloned.check(&k)); } + +/// Test the false positive rate of the bloom filter +/// to ensure that using floor doesn't affect false positive rate +/// in a significant way +#[test] +fn test_false_positive_rate() { + let capacities = [100, 1000, 10000, 100000, 1000000]; + for capacity in capacities.iter() { + let mut bf: Bloom = Bloom::new(*capacity * 10 / 8, *capacity); + for index in 0..*capacity { + bf.set(&index); + } + let mut false_positives_count = 0.0; + for index in *capacity..11 * *capacity { + if bf.check(&index) { + false_positives_count += 1.0; + } + } + println!("False positive rate for capacity {}: {}", *capacity, false_positives_count / (10.0 * *capacity as f64)); + } +} \ No newline at end of file From 3230729041227dfc7f9a795d46e47004601079d2 Mon Sep 17 00:00:00 2001 From: Ariel Shtul Date: Sat, 24 Aug 2024 19:24:56 +0300 Subject: [PATCH 3/3] always floor your bloom --- src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index a6554ee..ee620c9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,6 +14,7 @@ use std::convert::TryFrom; use std::f64; use std::hash::{Hash, Hasher}; use std::marker::PhantomData; +use std::cmp::max; use bit_vec::BitVec; #[cfg(feature = "random")] @@ -220,7 +221,7 @@ impl Bloom { fn optimal_k_num(bitmap_bits: u64, items_count: usize) -> u32 { let m = bitmap_bits as f64; let n = items_count as f64; - let k_num = (m / n * f64::ln(2.0f64)).ceil() as u32; + let k_num = max((m / n * f64::ln(2.0f64)).floor() as u32, 1); cmp::max(k_num, 1) }