-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#3: switched to FixedBitSet, using bincode to performa binary encodin…
…g and decoding
- Loading branch information
1 parent
8367356
commit 1a6b78d
Showing
8 changed files
with
108 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,72 +1,34 @@ | ||
use bit_vec::BitVec; | ||
use fixedbitset::FixedBitSet; | ||
|
||
pub fn tanimoto_vec(a: &BitVec, b: &BitVec) -> f32 { | ||
pub fn tanimoto_bitset(a: &FixedBitSet, b: &FixedBitSet) -> f32 { | ||
let mut and_ = a.clone(); | ||
let mut or_ = a.clone(); | ||
and_.and(b); | ||
or_.or(b); | ||
|
||
let mut dividend: u32 = 0; | ||
for b in and_.blocks() { | ||
dividend += b.count_ones(); | ||
} | ||
let mut divisor: u32 = 0; | ||
for b in or_.blocks() { | ||
divisor += b.count_ones(); | ||
} | ||
|
||
return dividend as f32 / divisor as f32; | ||
} | ||
|
||
pub unsafe fn tanimoto_array(a: &[u64; 4], b: &[u64; 4]) -> f32 { | ||
let mut dividend: u32 = 0; | ||
let mut divisor: u32 = 0; | ||
|
||
for i in 0..4 { | ||
dividend += ((a[i] & b[i]) as i64).count_ones(); | ||
divisor += ((a[i] | b[i]) as i64).count_ones(); | ||
} | ||
return dividend as f32 / divisor as f32; | ||
and_.intersect_with(b); | ||
return and_.count_ones(..) as f32 / (a.count_ones(..) + b.count_ones(..) - and_.count_ones(..)) as f32; | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use bit_vec::BitVec; | ||
use crate::ringo::math::similarity::tanimoto::{tanimoto_array, tanimoto_vec}; | ||
use fixedbitset::FixedBitSet; | ||
use crate::ringo::math::similarity::tanimoto::{tanimoto_bitset}; | ||
|
||
#[test] | ||
fn test_tanimoto_vec_033() { | ||
let a: BitVec = BitVec::from_bytes(&[0b00000101]); | ||
let b = BitVec::from_bytes(&[0b00000011]); | ||
|
||
assert_eq!(tanimoto_vec(&a, &b), 0.33333334); | ||
fn test_tanimoto_bitset_033() { | ||
let mut a = FixedBitSet::with_capacity(8); | ||
a.insert(0); | ||
a.insert(2); | ||
let mut b = FixedBitSet::with_capacity(8); | ||
b.insert(0); | ||
b.insert(1); | ||
assert_eq!(tanimoto_bitset(&a, &b), 0.33333334); | ||
} | ||
|
||
#[test] | ||
fn test_tanimoto_vec_05() { | ||
let a: BitVec = BitVec::from_bytes(&[0b0000001]); | ||
let b = BitVec::from_bytes(&[0b00000011]); | ||
|
||
assert_eq!(tanimoto_vec(&a, &b), 0.5); | ||
fn test_tanimoto_bitset_05() { | ||
let mut a = FixedBitSet::with_capacity(8); | ||
a.insert(0); | ||
let mut b = FixedBitSet::with_capacity(8); | ||
b.insert(0); | ||
b.insert(1); | ||
assert_eq!(tanimoto_bitset(&a, &b), 0.5); | ||
} | ||
|
||
#[test] | ||
fn test_tanimoto_array_033() { | ||
let a: [u64; 4] = [0b00000101, 0, 0, 0]; | ||
let b = [0b00000011, 0, 0, 0]; | ||
|
||
unsafe { | ||
assert_eq!(tanimoto_array(&a, &b), 0.33333334); | ||
} | ||
} | ||
|
||
#[test] | ||
fn test_tanimoto_array_05() { | ||
let a: [u64; 4] = [0b00000001, 0, 0, 0]; | ||
let b = [0b00000011, 0, 0, 0]; | ||
|
||
unsafe { | ||
assert_eq!(tanimoto_array(&a, &b), 0.5); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
mod index; | ||
mod search; | ||
mod index_item; | ||
pub(crate) mod fingerprint; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
use bincode::de::BorrowDecoder; | ||
use bincode::error::{DecodeError, EncodeError}; | ||
use fixedbitset::{Block, FixedBitSet}; | ||
|
||
pub const FINGERPRINT_SIZE: usize = 512; | ||
|
||
#[derive(Debug)] | ||
pub struct Fingerprint(pub FixedBitSet); | ||
|
||
impl bincode::Encode for Fingerprint { | ||
fn encode<E: bincode::enc::Encoder>(&self, encoder: &mut E) -> Result<(), EncodeError> { | ||
self.0.as_slice().encode(encoder)?; | ||
Ok(()) | ||
} | ||
} | ||
|
||
impl bincode::Decode for Fingerprint { | ||
fn decode<D: bincode::de::Decoder>(decoder: &mut D) -> Result<Self, DecodeError> { | ||
let slice = Vec::<Block>::decode(decoder)?; | ||
let fp = FixedBitSet::with_capacity_and_blocks(FINGERPRINT_SIZE, slice); | ||
Ok(Fingerprint(fp)) | ||
} | ||
} | ||
|
||
impl<'de> bincode::BorrowDecode<'de> for Fingerprint { | ||
fn borrow_decode<D: BorrowDecoder<'de>>(decoder: &mut D) -> Result<Self, DecodeError> { | ||
let slice = Vec::<Block>::borrow_decode(decoder)?; | ||
let fp = FixedBitSet::with_capacity_and_blocks(FINGERPRINT_SIZE, slice); | ||
Ok(Fingerprint(fp)) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use fixedbitset::{FixedBitSet}; | ||
use crate::ringo::ringo::fingerprint::{Fingerprint, FINGERPRINT_SIZE}; | ||
|
||
#[test] | ||
fn test_fingerprint_encode_decode() { | ||
let mut fp = Fingerprint(FixedBitSet::with_capacity(FINGERPRINT_SIZE)); | ||
fp.0.set(1, true); | ||
fp.0.set(17, true); | ||
|
||
let encoded = bincode::encode_to_vec(&fp, bincode::config::standard()).unwrap(); | ||
let decoded: Fingerprint = bincode::decode_from_slice(&encoded, bincode::config::standard()).unwrap().0; | ||
assert_eq!(decoded.0.ones().collect::<Vec<usize>>(), vec![1, 17]); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,30 @@ | ||
use bit_vec::BitVec; | ||
use bincode::{Decode, Encode}; | ||
use crate::ringo::ringo::fingerprint::Fingerprint; | ||
|
||
#[derive(Debug, Encode, Decode)] | ||
pub struct IndexItem { | ||
pub position: usize, | ||
pub fingerprint: Vec<u8> | ||
pub fingerprint: Fingerprint | ||
} | ||
|
||
impl IndexItem { | ||
pub fn new(position: usize, fingerprint: BitVec) -> IndexItem { | ||
IndexItem { | ||
position, | ||
fingerprint: fingerprint.to_bytes() | ||
} | ||
#[cfg(test)] | ||
mod tests { | ||
use bincode::config::standard; | ||
use bincode::{decode_from_slice, encode_to_vec}; | ||
use fixedbitset::FixedBitSet; | ||
use crate::ringo::ringo::index_item::IndexItem; | ||
use crate::ringo::ringo::fingerprint::Fingerprint; | ||
|
||
#[test] | ||
fn test_index_item_encode_decode() { | ||
let fp = Fingerprint(FixedBitSet::with_capacity(512)); | ||
let mut ii = IndexItem {position: 0, fingerprint: fp}; | ||
ii.position = 0; | ||
ii.fingerprint.0.set(1, true); | ||
ii.fingerprint.0.set(17, true); | ||
|
||
let encoded = encode_to_vec(&ii, standard()).unwrap(); | ||
let decoded: IndexItem = decode_from_slice(&encoded, standard()).unwrap().0; | ||
assert_eq!(decoded.fingerprint.0.ones().collect::<Vec<usize>>(), vec![1, 17]); | ||
} | ||
} | ||
} |