Skip to content

Commit

Permalink
Merge pull request #115 from saul-jb/fix/bloom-filter
Browse files Browse the repository at this point in the history
fix: replace broken bloom filter
  • Loading branch information
tabcat authored Feb 13, 2024
2 parents 0a0ecb7 + e995dda commit 47c08dd
Show file tree
Hide file tree
Showing 5 changed files with 214 additions and 50 deletions.
53 changes: 15 additions & 38 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
"@libp2p/websockets": "^8.0.10",
"@multiformats/multiaddr": "^12.1.12",
"@types/mocha": "^10.0.6",
"@types/xxhashjs": "^0.2.4",
"aegir": "^42.1.0",
"blockstore-level": "^1.1.7",
"copy-deps": "^1.1.2",
Expand Down Expand Up @@ -123,7 +124,6 @@
"@open-draft/deferred-promise": "^2.2.0",
"@tabcat/zzzync": "^5.0.0",
"datastore-core": "^9.2.7",
"fission-bloom-filters": "^1.7.1",
"helia": "^3.0.0",
"interface-blockstore": "^5.2.9",
"it-all": "^3.0.4",
Expand All @@ -138,7 +138,8 @@
"streaming-iterables": "^8.0.1",
"uint8arrays": "^5.0.1",
"w3name": "^1.0.8",
"web3.storage": "^4.5.5"
"web3.storage": "^4.5.5",
"xxhashjs": "^0.2.2"
},
"overrides": {
"@alanshaw/pail": {
Expand Down
104 changes: 104 additions & 0 deletions src/utils/bloom-filter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/**
* This is a slimmed down Bloom Filter based of:
* https://github.com/Callidon/bloom-filters
* https://github.com/fission-codes/bloom-filters
*/
import XXH from 'xxhashjs'

const uint8ToBits = (uint8: number): number[] => [128, 64, 32, 16, 8, 4, 2, 1].map(
x => (x & uint8) > 0 ? 1 : 0
)

const bitsToUint8 = (bits: number[]): number => bits.reduce(
(acc, cur, i) => cur === 0 ? acc : acc + Math.pow(2, 7 - i),
0
)

const uint8ArrayToBuffer = (a: Uint8Array): ArrayBuffer => a.buffer.slice(a.byteOffset, a.byteLength + a.byteOffset)

const hashTwice = (value: Uint8Array, seed: number): [number, number] => [
XXH.h64(uint8ArrayToBuffer(value), seed + 1).toNumber(),
XXH.h64(uint8ArrayToBuffer(value), seed + 2).toNumber()
]

const getDistinctIndices = (element: Uint8Array, size: number, number: number, seed: number): number[] => {
const indexes = new Set<number>()
let n = 0
let hashes = hashTwice(element, seed)

while (indexes.size < number) {
const ind = hashes[0] % size
if (!indexes.has(ind)) {
indexes.add(ind)
}

hashes[0] = (hashes[0] + hashes[1]) % size
hashes[1] = (hashes[1] + n) % size
n++

if (n > size) {
seed++
hashes = hashTwice(element, seed)
}
}

return [...indexes.values()]
}

export default class BloomFilter {
public seed: number
private readonly _size: number
private readonly _nbHashes: number
private _filter: number[]

constructor (size: number, nbHashes: number, seed: number = 0x1111111111) {
if (nbHashes < 1) {
throw new Error('A Bloom Filter must have at least 2 hash functions.')
}

this.seed = seed
this._size = size
this._nbHashes = nbHashes
this._filter = new Array<number>(this._size).fill(0)
}

static fromBytes (bytes: Uint8Array, nbHashes: number): BloomFilter {
const bits = bytes.reduce((a, c) => a.concat(uint8ToBits(c)), [] as number[])
const filter = new BloomFilter(bits.length, nbHashes)

filter._filter = bits

return filter
}

add (element: Uint8Array): void {
const indexes = getDistinctIndices(element, this._size, this._nbHashes, this.seed)

for (let i = 0; i < indexes.length; i++) {
this._filter[indexes[i]] = 1
}
}

has (element: Uint8Array): boolean {
const indexes = getDistinctIndices(element, this._size, this._nbHashes, this.seed)

for (let i = 0; i < indexes.length; i++) {
if (this._filter[indexes[i]] == null || this._filter[indexes[i]] === 0) {
return false
}
}

return true
}

toBytes (): Uint8Array {
const arr = new Uint8Array(Math.ceil(this._size / 8))

for (let i = 0; i < arr.length; i++) {
const bits = this._filter.slice(i * 8, i * 8 + 8)
arr[i] = bitsToUint8(bits)
}

return arr
}
}
14 changes: 4 additions & 10 deletions src/utils/heads-exchange.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
import { DeferredPromise } from '@open-draft/deferred-promise'
import { BloomFilter } from 'fission-bloom-filters'
import * as lp from 'it-length-prefixed'
import { pipe } from 'it-pipe'
import { type Pushable, pushable } from 'it-pushable'
import { CID } from 'multiformats/cid'
import { consume } from 'streaming-iterables'
import BloomFilter from './bloom-filter.js'
import type { Stream } from '@libp2p/interface/connection'
import type { PeerId } from '@libp2p/interface/peer-id'
import type { Uint8ArrayList } from 'uint8arraylist'
import { Message } from '@/message/heads.js'
import { hashHeads } from '@/utils/replicator.js'

const uint8ArrayToBuffer = (a: Uint8Array): ArrayBuffer => a.buffer.slice(a.byteOffset, a.byteLength + a.byteOffset)

const calculateFilterParams = (length: number, rate: number): { size: number, hashes: number } => {
const safeLength = length <= 0 ? 1 : length
const size = Math.ceil(-((safeLength * Math.log(rate)) / Math.pow(Math.log(2), 2)))
Expand All @@ -23,14 +21,10 @@ const calculateFilterParams = (length: number, rate: number): { size: number, ha

const createFilter = (heads: CID[], options: Partial<{ collisionRate: number, seed: number }> = {}): { filter: BloomFilter, hashes: number } => {
const { size, hashes } = calculateFilterParams(heads.length, options.collisionRate ?? 0.1)
const filter = new BloomFilter(size, hashes)

if (options.seed != null) {
filter.seed = options.seed
}
const filter = new BloomFilter(size, hashes, options.seed)

for (const head of heads) {
filter.add(uint8ArrayToBuffer(head.bytes))
filter.add(head.bytes)
}

return { filter, hashes }
Expand Down Expand Up @@ -223,7 +217,7 @@ export class HeadsExchange {

filter.seed = message.filter.seed ?? this.remoteSeed

const missing = this.heads.map(h => h.bytes).filter(b => !filter.has(uint8ArrayToBuffer(b)))
const missing = this.heads.map(h => h.bytes).filter(b => !filter.has(b))

return { heads: missing }
}
Expand Down
88 changes: 88 additions & 0 deletions test/test-bloom-filter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import { assert } from 'aegir/chai'
import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string'
import BloomFilter from '../src/utils/bloom-filter.js'

const testData = [
uint8ArrayFromString('test-1'),
uint8ArrayFromString('test-2'),
uint8ArrayFromString('test-3'),
uint8ArrayFromString('abc123'),
uint8ArrayFromString('A very long uint8array..........'),
uint8ArrayFromString(''),
uint8ArrayFromString('1'),
uint8ArrayFromString('a'),
uint8ArrayFromString('b'),
uint8ArrayFromString('c')
]

describe('bloom filter', () => {
it('creates a filter with the specified seed', () => {
const seed = 0x123456789
const filter = new BloomFilter(2, 2, seed)

assert.equal(filter.seed, seed)
})

it('the has method returns false on an empty filter', () => {
const filter = new BloomFilter(2, 2)

for (const data of testData) {
assert.isFalse(filter.has(data))
}
})

it('the has method returns true if it has that element', () => {
const filter = new BloomFilter(20, 4)

for (const data of testData) {
filter.add(data)
}

for (const data of testData) {
assert.isTrue(filter.has(data))
}
})

it('the has method returns true only on elements that are contained in a partial filter', () => {
const filter = new BloomFilter(20, 4)

for (let i = 0; i < testData.length / 2; i++) {
filter.add(testData[i])
}

for (let i = 0; i < testData.length; i++) {
if (i < testData.length / 2) {
assert.isTrue(filter.has(testData[i]))
} else {
assert.isFalse(filter.has(testData[i]))
}
}
})

it('encodes the filter', () => {
const filter = new BloomFilter(20, 4)

for (const data of testData) {
filter.add(data)
}

const f = filter.toBytes()

assert.isOk(f)
})

it('decodes the filter', () => {
const nbHashes = 4
const filter = new BloomFilter(20, nbHashes)

for (const data of testData) {
filter.add(data)
}

const f = filter.toBytes()

const filter2 = BloomFilter.fromBytes(f, nbHashes)

assert.deepEqual(filter2.toBytes(), filter.toBytes())
})
})

0 comments on commit 47c08dd

Please sign in to comment.