From 67174666005bd144582f90d054cf7b2acc504f41 Mon Sep 17 00:00:00 2001 From: Austin Richardson Date: Fri, 5 Jan 2024 19:54:22 -0800 Subject: [PATCH 1/3] Increase capnproto size limit Bump limit for Mash Make Clippy happy Fix comment. Decrease limit --- README.md | 10 ++++++++++ cli/Cargo.toml | 2 +- lib/src/serialization/json.rs | 1 - lib/src/serialization/mash.rs | 6 +++++- lib/src/serialization/mod.rs | 6 +++++- 5 files changed, 21 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ff92c60..1ac20f7 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,17 @@ This repository provides a library and command-line interface that reimplements You may build Finch from source, which requires Rust >= `1.49`. Rust's Cargo package manager (see [rustup](https://www.rustup.rs) for Cargo installation instructions) can automatically build and install Finch with `cargo install finch_cli`. If you require python bindings, you must take extra steps (see [python support](#python-support)). Alternatively, [download a prebuilt binary](https://github.com/onecodex/finch-rs/releases) or install from [PyPi](https://pypi.org/project/finch-sketch/) `pip install finch-sketch`. +### Building ### + +To compile from source: + +```sh +# from finch-rs +cargo +nightly-2023-06-28 build --release +``` + ### Example Usage ### + To get started, we first compute sketches for several FASTA or FASTQ files. These sketches are compact, sampled representations of the underlying genomic data, and what allow `finch` to rapidly estimate distances between datasets. Sketching files uses the `finch sketch` command: ``` diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 7eacc26..a4fe088 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -16,7 +16,7 @@ path = "src/main.rs" [dependencies] clap = "2.33.0" -finch = "0.6" +finch = { path = "../lib" } serde_json = "1" anyhow = "1" diff --git a/lib/src/serialization/json.rs b/lib/src/serialization/json.rs index 61402bc..56fcf89 100644 --- a/lib/src/serialization/json.rs +++ b/lib/src/serialization/json.rs @@ -9,7 +9,6 @@ use serde::{Deserialize, Serialize}; use crate::bail; use crate::errors::FinchResult; use crate::filtering::FilterParams; -pub use crate::serialization::mash::{read_mash_file, write_mash_file}; use crate::serialization::Sketch; use crate::sketch_schemes::{KmerCount, SketchParams}; diff --git a/lib/src/serialization/mash.rs b/lib/src/serialization/mash.rs index 761d084..1790456 100644 --- a/lib/src/serialization/mash.rs +++ b/lib/src/serialization/mash.rs @@ -58,7 +58,11 @@ pub fn write_mash_file(file: &mut dyn Write, sketches: &[Sketch]) -> FinchResult } pub fn read_mash_file(file: &mut dyn BufRead) -> FinchResult> { - let options = *message::ReaderOptions::new().traversal_limit_in_words(Some(1024 * 1024 * 1024)); + let options = *message::ReaderOptions::new().traversal_limit_in_words( + // measured in words + // 1 word = 8 bytes + Some(2 * 1024 * 1024 * 1024), + ); let reader = capnp_serialize::read_message(file, options)?; let mash_data: min_hash::Reader = reader.get_root::()?; diff --git a/lib/src/serialization/mod.rs b/lib/src/serialization/mod.rs index 19ccb37..6b0dee1 100644 --- a/lib/src/serialization/mod.rs +++ b/lib/src/serialization/mod.rs @@ -171,7 +171,11 @@ pub fn write_finch_file(file: &mut dyn Write, sketches: &[Sketch]) -> FinchResul } pub fn read_finch_file(file: &mut dyn BufRead) -> FinchResult> { - let options = *message::ReaderOptions::new().traversal_limit_in_words(Some(1024 * 1024 * 1024)); + let options = *message::ReaderOptions::new().traversal_limit_in_words( + // measured in words + // 1 word = 8 bytes + Some(2 * 1024 * 1024 * 1024), + ); let reader = capnp_serialize::read_message(file, options)?; let cap_data: multisketch::Reader = reader.get_root::()?; let cap_sketches = cap_data.get_sketches()?; From e6e4d4d791293d1dd35f46b817c7edead47b8fd2 Mon Sep 17 00:00:00 2001 From: Austin Richardson Date: Fri, 5 Jan 2024 20:08:23 -0800 Subject: [PATCH 2/3] Update install instructions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1ac20f7..50784db 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ To compile from source: ```sh # from finch-rs -cargo +nightly-2023-06-28 build --release +cargo +stable build --release ``` ### Example Usage ### From 92d3b7c82122c55b2f54ecaa7f6bc99338417cbd Mon Sep 17 00:00:00 2001 From: Austin Richardson Date: Mon, 8 Jan 2024 08:43:15 -0800 Subject: [PATCH 3/3] Revert change --- cli/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index a4fe088..7eacc26 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -16,7 +16,7 @@ path = "src/main.rs" [dependencies] clap = "2.33.0" -finch = { path = "../lib" } +finch = "0.6" serde_json = "1" anyhow = "1"