-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #115 from saul-jb/fix/bloom-filter
fix: replace broken bloom filter
- Loading branch information
Showing
5 changed files
with
214 additions
and
50 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/** | ||
* This is a slimmed down Bloom Filter based of: | ||
* https://github.com/Callidon/bloom-filters | ||
* https://github.com/fission-codes/bloom-filters | ||
*/ | ||
import XXH from 'xxhashjs' | ||
|
||
const uint8ToBits = (uint8: number): number[] => [128, 64, 32, 16, 8, 4, 2, 1].map( | ||
x => (x & uint8) > 0 ? 1 : 0 | ||
) | ||
|
||
const bitsToUint8 = (bits: number[]): number => bits.reduce( | ||
(acc, cur, i) => cur === 0 ? acc : acc + Math.pow(2, 7 - i), | ||
0 | ||
) | ||
|
||
const uint8ArrayToBuffer = (a: Uint8Array): ArrayBuffer => a.buffer.slice(a.byteOffset, a.byteLength + a.byteOffset) | ||
|
||
const hashTwice = (value: Uint8Array, seed: number): [number, number] => [ | ||
XXH.h64(uint8ArrayToBuffer(value), seed + 1).toNumber(), | ||
XXH.h64(uint8ArrayToBuffer(value), seed + 2).toNumber() | ||
] | ||
|
||
const getDistinctIndices = (element: Uint8Array, size: number, number: number, seed: number): number[] => { | ||
const indexes = new Set<number>() | ||
let n = 0 | ||
let hashes = hashTwice(element, seed) | ||
|
||
while (indexes.size < number) { | ||
const ind = hashes[0] % size | ||
if (!indexes.has(ind)) { | ||
indexes.add(ind) | ||
} | ||
|
||
hashes[0] = (hashes[0] + hashes[1]) % size | ||
hashes[1] = (hashes[1] + n) % size | ||
n++ | ||
|
||
if (n > size) { | ||
seed++ | ||
hashes = hashTwice(element, seed) | ||
} | ||
} | ||
|
||
return [...indexes.values()] | ||
} | ||
|
||
export default class BloomFilter { | ||
public seed: number | ||
private readonly _size: number | ||
private readonly _nbHashes: number | ||
private _filter: number[] | ||
|
||
constructor (size: number, nbHashes: number, seed: number = 0x1111111111) { | ||
if (nbHashes < 1) { | ||
throw new Error('A Bloom Filter must have at least 2 hash functions.') | ||
} | ||
|
||
this.seed = seed | ||
this._size = size | ||
this._nbHashes = nbHashes | ||
this._filter = new Array<number>(this._size).fill(0) | ||
} | ||
|
||
static fromBytes (bytes: Uint8Array, nbHashes: number): BloomFilter { | ||
const bits = bytes.reduce((a, c) => a.concat(uint8ToBits(c)), [] as number[]) | ||
const filter = new BloomFilter(bits.length, nbHashes) | ||
|
||
filter._filter = bits | ||
|
||
return filter | ||
} | ||
|
||
add (element: Uint8Array): void { | ||
const indexes = getDistinctIndices(element, this._size, this._nbHashes, this.seed) | ||
|
||
for (let i = 0; i < indexes.length; i++) { | ||
this._filter[indexes[i]] = 1 | ||
} | ||
} | ||
|
||
has (element: Uint8Array): boolean { | ||
const indexes = getDistinctIndices(element, this._size, this._nbHashes, this.seed) | ||
|
||
for (let i = 0; i < indexes.length; i++) { | ||
if (this._filter[indexes[i]] == null || this._filter[indexes[i]] === 0) { | ||
return false | ||
} | ||
} | ||
|
||
return true | ||
} | ||
|
||
toBytes (): Uint8Array { | ||
const arr = new Uint8Array(Math.ceil(this._size / 8)) | ||
|
||
for (let i = 0; i < arr.length; i++) { | ||
const bits = this._filter.slice(i * 8, i * 8 + 8) | ||
arr[i] = bitsToUint8(bits) | ||
} | ||
|
||
return arr | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import { assert } from 'aegir/chai' | ||
import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' | ||
import BloomFilter from '../src/utils/bloom-filter.js' | ||
|
||
const testData = [ | ||
uint8ArrayFromString('test-1'), | ||
uint8ArrayFromString('test-2'), | ||
uint8ArrayFromString('test-3'), | ||
uint8ArrayFromString('abc123'), | ||
uint8ArrayFromString('A very long uint8array..........'), | ||
uint8ArrayFromString(''), | ||
uint8ArrayFromString('1'), | ||
uint8ArrayFromString('a'), | ||
uint8ArrayFromString('b'), | ||
uint8ArrayFromString('c') | ||
] | ||
|
||
describe('bloom filter', () => { | ||
it('creates a filter with the specified seed', () => { | ||
const seed = 0x123456789 | ||
const filter = new BloomFilter(2, 2, seed) | ||
|
||
assert.equal(filter.seed, seed) | ||
}) | ||
|
||
it('the has method returns false on an empty filter', () => { | ||
const filter = new BloomFilter(2, 2) | ||
|
||
for (const data of testData) { | ||
assert.isFalse(filter.has(data)) | ||
} | ||
}) | ||
|
||
it('the has method returns true if it has that element', () => { | ||
const filter = new BloomFilter(20, 4) | ||
|
||
for (const data of testData) { | ||
filter.add(data) | ||
} | ||
|
||
for (const data of testData) { | ||
assert.isTrue(filter.has(data)) | ||
} | ||
}) | ||
|
||
it('the has method returns true only on elements that are contained in a partial filter', () => { | ||
const filter = new BloomFilter(20, 4) | ||
|
||
for (let i = 0; i < testData.length / 2; i++) { | ||
filter.add(testData[i]) | ||
} | ||
|
||
for (let i = 0; i < testData.length; i++) { | ||
if (i < testData.length / 2) { | ||
assert.isTrue(filter.has(testData[i])) | ||
} else { | ||
assert.isFalse(filter.has(testData[i])) | ||
} | ||
} | ||
}) | ||
|
||
it('encodes the filter', () => { | ||
const filter = new BloomFilter(20, 4) | ||
|
||
for (const data of testData) { | ||
filter.add(data) | ||
} | ||
|
||
const f = filter.toBytes() | ||
|
||
assert.isOk(f) | ||
}) | ||
|
||
it('decodes the filter', () => { | ||
const nbHashes = 4 | ||
const filter = new BloomFilter(20, nbHashes) | ||
|
||
for (const data of testData) { | ||
filter.add(data) | ||
} | ||
|
||
const f = filter.toBytes() | ||
|
||
const filter2 = BloomFilter.fromBytes(f, nbHashes) | ||
|
||
assert.deepEqual(filter2.toBytes(), filter.toBytes()) | ||
}) | ||
}) |