Skip to content

Commit

Permalink
#6: code reorganization, added binaries for index and search
Browse files Browse the repository at this point in the history
  • Loading branch information
mkviatkovskii committed Jul 6, 2024
1 parent bc6a100 commit 5a474ef
Show file tree
Hide file tree
Showing 37 changed files with 85 additions and 70 deletions.
9 changes: 7 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,19 @@ version = "0.0.1"

[dependencies]
bincode = "2.0.0-rc.3"
clickrs = "0.1.5"
fixedbitset = "0.5.7"
nom = "7.1.3"
petgraph = "0.6.5"

[lib]
name = "ringo"
path = "src/lib.rs"

[[bin]]
name = "ringo-index"
path = "src/ringo/ringo/index/main.rs"
path = "src/bin/ringo_index.rs"

[[bin]]
name = "ringo-search"
path = "src/ringo/ringo/search/main.rs"
path = "src/bin/ringo_search.rs"
1 change: 0 additions & 1 deletion src/bin.rs

This file was deleted.

8 changes: 8 additions & 0 deletions src/bin/ringo_index.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
extern crate ringo;

use ringo::db::index::index_file;

fn main() {
let args: Vec<String> = std::env::args().collect();
index_file(&args[1]);
}
11 changes: 11 additions & 0 deletions src/bin/ringo_search.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
extern crate ringo;

use ringo::db::search::similarity_search;

fn main() {
let args: Vec<String> = std::env::args().collect();
let results = similarity_search(&args[1], &args[2], args[3].parse().unwrap(), args[4].parse().unwrap());
for result in results {
println!("{:?} {:?}", result.line, result.similarity);
}
}
4 changes: 4 additions & 0 deletions src/db.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub mod index_item;
pub mod index;
pub mod search;
pub mod fingerprint;
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ impl<'de> bincode::BorrowDecode<'de> for Fingerprint {

#[cfg(test)]
mod tests {
use crate::ringo::fingerprint::fingerprint::{Fingerprint, FINGERPRINT_SIZE};
use crate::db::fingerprint::{Fingerprint, FINGERPRINT_SIZE};
use fixedbitset::FixedBitSet;

#[test]
Expand Down
14 changes: 8 additions & 6 deletions src/ringo/ringo/index/index.rs → src/db/index.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
use crate::ringo::fingerprint::fingerprint::FINGERPRINT_SIZE;
use crate::ringo::molecule::smiles::reader::molecule::parse_molecule;
use crate::ringo::ringo::index::index_item::IndexItem;

use bincode::encode_into_slice;
use std::fs::File;
use std::io::{BufRead, BufWriter, Write};
use crate::db::index_item::IndexItem;
use crate::db::fingerprint::FINGERPRINT_SIZE;
use crate::molecule::smiles::reader::molecule::parse_molecule;

#[cfg(windows)]
const LINE_ENDING_LENGTH: usize = 2;
#[cfg(not(windows))]
const LINE_ENDING_LENGTH: usize = 1;

pub(crate) fn index(smiles_file: &str) {
pub fn index_file(smiles_file: &str) {
// open file for reading
let fi = File::open(smiles_file).expect("Could not open file");

Expand All @@ -35,12 +36,13 @@ pub(crate) fn index(smiles_file: &str) {
}
}


#[cfg(test)]
mod test {
use crate::ringo::ringo::index::index::index;
use crate::db::index::index_file;

#[test]
fn test_index() {
index("molecules.smi");
index_file("molecules.smi");
}
}
6 changes: 3 additions & 3 deletions src/ringo/ringo/index/index_item.rs → src/db/index_item.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::ringo::fingerprint::fingerprint::Fingerprint;
use crate::db::fingerprint::Fingerprint;
use bincode::{Decode, Encode};

#[derive(Debug, Encode, Decode)]
Expand All @@ -9,11 +9,11 @@ pub struct IndexItem {

#[cfg(test)]
mod tests {
use crate::ringo::fingerprint::fingerprint::Fingerprint;
use crate::ringo::ringo::index::index_item::IndexItem;
use bincode::config::standard;
use bincode::{decode_from_slice, encode_to_vec};
use fixedbitset::FixedBitSet;
use crate::db::fingerprint::Fingerprint;
use crate::db::index_item::IndexItem;

#[test]
fn test_index_item_encode_decode() {
Expand Down
17 changes: 10 additions & 7 deletions src/ringo/ringo/search/search.rs → src/db/search.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::ringo::math::similarity::tanimoto::tanimoto_bitset;
use crate::ringo::molecule::smiles::reader::molecule::parse_molecule;
use crate::ringo::ringo::index::index_item::IndexItem;
use crate::math::similarity::tanimoto::tanimoto_bitset;
use crate::molecule::smiles::reader::molecule::parse_molecule;
use crate::db::index_item::IndexItem;
use std::fs::File;
use std::io::{BufRead, BufReader, Read, Seek};

Expand Down Expand Up @@ -43,7 +43,6 @@ pub fn similarity_search(

// calculate similarity
let similarity = tanimoto_bitset(&index_item.fingerprint.0, &query_fp.0);

// print similarity if it is greater than min_similarity
if similarity >= min_similarity {
let position = index_item.position;
Expand All @@ -68,14 +67,18 @@ pub fn similarity_search(
results
}

fn main() {
println!("db-search");
}

#[cfg(test)]
mod test {
use crate::ringo::ringo::index::index::index;
use crate::ringo::ringo::search::search::similarity_search;
use crate::db::index::index_file;
use crate::db::search::similarity_search;

#[test]
fn test_similarity_search() {
index("molecles.smi");
index_file("molecles.smi");
let results = similarity_search("molecules.smi", "CC(C)CC1=CC=C(C=C1)C(C)C(=O)O", 0.7, 100);
assert_eq!(results.len(), 1);
assert!(results[0].line.starts_with("CC(C)CC1=CC=C(C=C1)C(C)C(=O)O"));
Expand Down
5 changes: 3 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
mod bin;
pub mod ringo;
pub mod db;
mod math;
mod molecule;
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub fn tanimoto_bitset(a: &FixedBitSet, b: &FixedBitSet) -> f32 {

#[cfg(test)]
mod tests {
use crate::ringo::math::similarity::tanimoto::tanimoto_bitset;
use crate::math::similarity::tanimoto::tanimoto_bitset;
use fixedbitset::FixedBitSet;

#[test]
Expand Down
2 changes: 2 additions & 0 deletions src/molecule.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
mod model;
pub mod smiles;
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::ringo::molecule::model::element::Element;
use crate::molecule::model::element::Element;

#[derive(Hash, Eq, PartialEq, Debug)]
pub struct Atom {
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use crate::ringo::fingerprint::fingerprint::{Fingerprint, FINGERPRINT_SIZE};
use crate::ringo::math::similarity::tanimoto::tanimoto_bitset;
use crate::ringo::molecule::model::atom::Atom;
use crate::ringo::molecule::model::bond::Bond;
use crate::ringo::molecule::smiles::reader::molecule::parse_molecule;
use crate::db::fingerprint::{Fingerprint, FINGERPRINT_SIZE};
use crate::math::similarity::tanimoto::tanimoto_bitset;
use crate::molecule::model::atom::Atom;
use crate::molecule::model::bond::Bond;
use crate::molecule::smiles::reader::molecule::parse_molecule;
use fixedbitset::FixedBitSet;
use petgraph::stable_graph::{EdgeIndex, NodeIndex, StableGraph};
use petgraph::visit::EdgeRef;
Expand Down
1 change: 1 addition & 0 deletions src/molecule/smiles.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod reader;
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ mod charge;
mod element;
mod hydrogens;
mod isotope;
pub(crate) mod molecule;
pub mod molecule;
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use crate::ringo::molecule::model::atom::Atom;
use crate::ringo::molecule::model::element::Element;
use crate::ringo::molecule::smiles::reader::charge::parse_charge;
use crate::ringo::molecule::smiles::reader::element::parse_element;
use crate::ringo::molecule::smiles::reader::hydrogens::parse_hydrogens;
use crate::ringo::molecule::smiles::reader::isotope::parse_isotope;
use crate::molecule::model::atom::Atom;
use crate::molecule::model::element::Element;
use crate::molecule::smiles::reader::charge::parse_charge;
use crate::molecule::smiles::reader::element::parse_element;
use crate::molecule::smiles::reader::hydrogens::parse_hydrogens;
use crate::molecule::smiles::reader::isotope::parse_isotope;
use nom::combinator::opt;
use nom::IResult;

pub(crate) fn parse_atom(input: &str) -> IResult<&str, Atom> {
pub fn parse_atom(input: &str) -> IResult<&str, Atom> {
let mut isotope: Option<u8> = None;
let mut charge: Option<i8> = None;
let mut hs: Option<u8> = None;
Expand Down Expand Up @@ -47,7 +47,7 @@ pub(crate) fn parse_atom(input: &str) -> IResult<&str, Atom> {

#[cfg(test)]
mod tests {
use crate::ringo::molecule::smiles::reader::atom::parse_atom;
use crate::molecule::smiles::reader::atom::parse_atom;

fn do_test_parse_atom(input: &str, atomic_number: u8, charge: i8, hs: u8, isotope: u8) {
let (remaining_input, atom) = parse_atom(input).unwrap();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use crate::ringo::molecule::model::bond::{Bond, BondOrder};
use crate::molecule::model::bond::{Bond, BondOrder};
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::combinator::map;
use nom::IResult;

pub(crate) fn parse_bond(input: &str) -> IResult<&str, Bond> {
pub fn parse_bond(input: &str) -> IResult<&str, Bond> {
let (input, bond_order) = alt((
map(tag("="), |_| BondOrder::Double),
map(tag("#"), |_| BondOrder::Triple),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use nom::{
/// * single `+` or `-` sign => return 1 or -1
/// * multiple `+` or `-` signs => return n or -n
/// * `+` or `-` sign followed by a number => return n or -n
pub(crate) fn parse_charge(input: &str) -> IResult<&str, i8> {
pub fn parse_charge(input: &str) -> IResult<&str, i8> {
let (input, sign) = alt((char('+'), char('-')))(input)?;
let charge = match sign {
'+' => 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use nom::bytes::complete::tag;
use nom::combinator::map_res;
use nom::IResult;

pub(crate) fn parse_element(input: &str) -> IResult<&str, u8> {
pub fn parse_element(input: &str) -> IResult<&str, u8> {
map_res(
alt((
tag("Cl"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use nom::IResult;
/// * H - 1 hydrogen
/// * Hn - n hydrogens
/// Returns the number of hydrogens
pub(crate) fn parse_hydrogens(input: &str) -> IResult<&str, u8> {
pub fn parse_hydrogens(input: &str) -> IResult<&str, u8> {
let single_hydrogen_parser = char('H');
let mut hydrogen_parser = preceded(char('H'), map_res(digit1, str::parse::<u8>));
single_hydrogen_parser(input)?;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use nom::{character::complete::digit1, combinator::map_res, IResult};

/// Parses isotope value, that should be a number
/// Returns the isotope value
pub(crate) fn parse_isotope(input: &str) -> IResult<&str, u8> {
pub fn parse_isotope(input: &str) -> IResult<&str, u8> {
Ok(map_res(digit1, str::parse::<u8>)(input)?)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::ringo::molecule::model::bond::{Bond, BondOrder};
use crate::ringo::molecule::model::molecule::Molecule;
use crate::ringo::molecule::smiles::reader::atom::parse_atom;
use crate::ringo::molecule::smiles::reader::bond::parse_bond;
use crate::molecule::model::bond::{Bond, BondOrder};
use crate::molecule::model::molecule::Molecule;
use crate::molecule::smiles::reader::atom::parse_atom;
use crate::molecule::smiles::reader::bond::parse_bond;
use nom::branch::alt;
use nom::character::complete::{char, digit1};
use nom::combinator::{map, map_res};
Expand All @@ -15,7 +15,7 @@ fn parse_cycle_digit(input: &str) -> IResult<&str, u8> {
map_res(digit1, str::parse::<u8>)(input)
}

pub(crate) fn parse_molecule(input: &str) -> IResult<&str, Molecule> {
pub fn parse_molecule(input: &str) -> IResult<&str, Molecule> {
let mut molecule = Molecule::new();
let mut open_cycles: HashMap<u8, NodeIndex> = HashMap::new();
let mut stack: Vec<(NodeIndex, BondOrder)> = Vec::new();
Expand Down
4 changes: 0 additions & 4 deletions src/ringo.rs

This file was deleted.

1 change: 0 additions & 1 deletion src/ringo/fingerprint.rs

This file was deleted.

2 changes: 0 additions & 2 deletions src/ringo/molecule.rs

This file was deleted.

1 change: 0 additions & 1 deletion src/ringo/molecule/smiles.rs

This file was deleted.

2 changes: 0 additions & 2 deletions src/ringo/ringo.rs

This file was deleted.

3 changes: 0 additions & 3 deletions src/ringo/ringo/index.rs

This file was deleted.

3 changes: 0 additions & 3 deletions src/ringo/ringo/index/main.rs

This file was deleted.

2 changes: 0 additions & 2 deletions src/ringo/ringo/search.rs

This file was deleted.

3 changes: 0 additions & 3 deletions src/ringo/ringo/search/main.rs

This file was deleted.

0 comments on commit 5a474ef

Please sign in to comment.