From 255c14d71cff3809975250e527ba624385275f7c Mon Sep 17 00:00:00 2001 From: Brad Langhorst Date: Sat, 10 Feb 2024 10:00:38 -0500 Subject: [PATCH 1/8] updates to latest dependencies --- Cargo.lock | 8 ++++---- Cargo.toml | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2ae91d3..c09a75e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -594,9 +594,9 @@ dependencies = [ [[package]] name = "either" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "elsa" @@ -924,9 +924,9 @@ checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" [[package]] name = "is-terminal" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe8f25ce1159c7740ff0b9b2f5cdf4a8428742ba7c112b9f20f22cd5219c7dab" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" dependencies = [ "hermit-abi", "libc", diff --git a/Cargo.toml b/Cargo.toml index 0de52c1..e6b2f53 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,19 +15,19 @@ the specified PCR schemes was used to create the library. [dependencies] -log = "0.4" -simple_logger = { version = "4.0.0", features = ["stderr"] } -clap = { version = "4.1.4", features = ["cargo", "derive"] } -human-panic = "1.0.3" +log = "0.4.20" +simple_logger = { version = "4.3.3", features = ["stderr"] } +clap = { version = "4.5.0", features = ["cargo", "derive"] } +human-panic = "1.2.3" better-panic = "0.3.0" noodles = { version = "0.63.0", features = ["fasta", "bam", "fastq"] } debruijn = "0.3.4" -anyhow = "1.0" +anyhow = "1.0.79" [dev-dependencies] assert_cmd = "2.0.13" predicates = "3.1.0" -flamegraph = "0.6.2" +flamegraph = "0.6.5" duct = "0.13.7" # for testing piping between upstream decompressors and ampseer [profile.dev] From d452735079bb488cea9dcd28d76a0fc982b06805 Mon Sep 17 00:00:00 2001 From: Brad Langhorst Date: Sat, 10 Feb 2024 10:11:24 -0500 Subject: [PATCH 2/8] updates to latest checkout action, clearer name for badge --- .github/workflows/rust.yml | 4 ++-- README.md | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 92c436d..8760c17 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -1,4 +1,4 @@ -name: Rust +name: Tests on: pull_request: @@ -9,7 +9,7 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Build run: cargo build --verbose - name: Run Tests diff --git a/README.md b/README.md index 8c16d5b..25d0410 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ Ampseer examines reads in fastq format and identifies which multiplex PCR primer It is intended to differentiate between ARTIC v3, ARTIC v4, ARTIC v4.1, VarSkip 1a, VarSkip 2a, Midnight, and VarSkip Long primer sets. ## This program is not yet fully tested, it's shared now to enable commentary from the scientific community. +![Tests](https://github.com/nebiolabs/ampseer/workflows/Tests/badge.svg) + Pull requests and issues are welcome. When compiled with --release optimizations, Ampseer processes reads at the same speed as samtools fastq ( less than 4s for a 155M bam file on 2019 Macbook Pro) From f4fff532dc96ac2e7364ca4d73e52f5d3c6ac0c6 Mon Sep 17 00:00:00 2001 From: Brad Langhorst Date: Sat, 10 Feb 2024 15:28:19 -0500 Subject: [PATCH 3/8] better comments --- src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index 02dde2f..ce3b84d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -88,7 +88,7 @@ fn main() -> Result<(), Box> { let reads = if let Some(reads) = args.reads.as_deref() { Box::new(File::open(reads)?) } else { - // If --reads was not specified, use stdin + //--reads was not specified, use stdin Box::new(File::open("/dev/stdin")?) }; @@ -105,7 +105,7 @@ fn main() -> Result<(), Box> { Ok(()) } -/// checks the passed input structure for reasonableness, printing error if necessary. +/// checks the passed input structure for reasonableness, printing errors as necessary. fn check_inputs(args: &Cli) -> Result<(), anyhow::Error> { let mut error_messages = Vec::new(); From 7b5db1bb6cb34c1efd8c767cc30deb9961306d3b Mon Sep 17 00:00:00 2001 From: Brad Langhorst Date: Sat, 10 Feb 2024 15:28:38 -0500 Subject: [PATCH 4/8] rust fmt updates --- src/main.rs | 23 +++++++++++----------- tests/test_cli.rs | 50 ++++++++++++++++++++++------------------------- 2 files changed, 35 insertions(+), 38 deletions(-) diff --git a/src/main.rs b/src/main.rs index ce3b84d..3ccab71 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,7 +12,7 @@ use simple_logger::SimpleLogger; use std::cmp::Ordering; use std::{ collections::hash_map::Entry, collections::HashMap, collections::HashSet, fs::File, - io::BufReader, path::PathBuf, io::Read + io::BufReader, io::Read, path::PathBuf, }; #[derive(Parser)] @@ -91,7 +91,6 @@ fn main() -> Result<(), Box> { //--reads was not specified, use stdin Box::new(File::open("/dev/stdin")?) }; - let mut primer_set_counters = import_primer_sets(&args.primer_sets)?; @@ -109,12 +108,15 @@ fn main() -> Result<(), Box> { fn check_inputs(args: &Cli) -> Result<(), anyhow::Error> { let mut error_messages = Vec::new(); - if args.reads.clone().is_some_and(|reads| reads.exists()) && - args.primer_sets.iter().all(|ps| ps.exists()) { + if args.reads.clone().is_some_and(|reads| reads.exists()) + && args.primer_sets.iter().all(|ps| ps.exists()) + { log::info!( "Searching for primers from {:?} in reads from: {:?}", args.primer_sets, - args.reads.as_deref().unwrap_or(&PathBuf::from("/dev/stdin")) + args.reads + .as_deref() + .unwrap_or(&PathBuf::from("/dev/stdin")) ); } else { for ps in &args.primer_sets { @@ -122,7 +124,7 @@ fn check_inputs(args: &Cli) -> Result<(), anyhow::Error> { error_messages.push(format!("Could not find primer set at {:?}", ps.as_path())); } } - if args.reads.clone().is_some_and(|reads| ! reads.exists()) { + if args.reads.clone().is_some_and(|reads| !reads.exists()) { error_messages.push(format!( "Could not find reads at {:?}", args.reads.as_ref().unwrap().as_path() @@ -290,9 +292,9 @@ fn identify_primer_set(primer_set_counters: &[PrimerSet]) -> (String, f32) { //ideas: - consider random selections of reads ( number of ways to select 80% of reads = permutations, data structure to hold 1000 seeds in columns ) // i need to figure out a way to increment the appropriate columns for each read (maybe a bitmask?) // a function that returns a unique bitmask for each seed? - - // - consider random amplicons (simulating dropouts) - // - + + // - consider random amplicons (simulating dropouts) + // - // score = num consistent with all amplicons answer/ 1000 ps_fracs.sort_unstable_by_key(|ps_frac| (ps_frac.1 * -1000.0) as i32); log::debug!("matching fractions: {:?}", ps_fracs); @@ -327,11 +329,10 @@ fn identify_primer_set(primer_set_counters: &[PrimerSet]) -> (String, f32) { //noise = number of points of evidence that are inconsistent with identified set //score = SNR -//goal: score = 1 if all available evidence is in favor of winning candidate +//goal: score = 1 if all available evidence is in favor of winning candidate // score = 0 if cannot differentiate between 2 candidates //winner - runnerup =* confidence - /// compares a ratio of only the unique keys to see if we can differentiate between two similar sets /// confidence is estimated considering the number of unique k-mers used fn compare_only_unique_primers(primer_set_counters: &[PrimerSet]) -> (String, f32) { diff --git a/tests/test_cli.rs b/tests/test_cli.rs index 4e54e02..3a43197 100644 --- a/tests/test_cli.rs +++ b/tests/test_cli.rs @@ -1,7 +1,6 @@ +use assert_cmd::Command; #[cfg(test)] - use predicates::prelude::*; -use assert_cmd::Command; use std::path::{Path, PathBuf}; fn path_to_fixtures() -> &'static Path { @@ -13,7 +12,7 @@ fn set_cwd_to_fixtures() { fn path_to_ampseer() -> PathBuf { let mut bin_path = std::path::PathBuf::from( std::env::var("CARGO_TARGET_DIR") - .unwrap_or_else(|_| concat!(env!("CARGO_MANIFEST_DIR"),"/target").to_string()) + .unwrap_or_else(|_| concat!(env!("CARGO_MANIFEST_DIR"), "/target").to_string()), ); #[cfg(debug_assertions)] bin_path.push("debug"); @@ -50,12 +49,15 @@ fn test_insufficient_arguments() { .assert() .stderr(predicate::str::contains("required argument")); } + #[test] fn missing_fastq_file() { let mut cmd = Command::cargo_bin("ampseer").expect("Calling binary failed"); - cmd.arg("--primer-sets").arg("primer_sets/Midnight_1200.fasta"); + cmd.arg("--primer-sets") + .arg("primer_sets/Midnight_1200.fasta"); cmd.arg("--reads").arg("missing.fastq"); - cmd.assert().stderr(predicate::str::contains("Could not find reads")); + cmd.assert() + .stderr(predicate::str::contains("Could not find reads")); } #[test] @@ -63,10 +65,10 @@ fn test_classify_reads_from_stdin() { let mut cmd = Command::cargo_bin("ampseer").unwrap(); set_cwd_to_fixtures(); - cmd.arg("--primer-sets") - .arg("primer_sets/neb_vss1a.fasta"); + cmd.arg("--primer-sets").arg("primer_sets/neb_vss1a.fasta"); - cmd.pipe_stdin("vss.fastq").unwrap() + cmd.pipe_stdin("vss.fastq") + .unwrap() .assert() .stdout(predicate::str::contains("neb_vss1a")); } @@ -76,11 +78,9 @@ fn non_matching_primer_sets() { let mut cmd = Command::cargo_bin("ampseer").expect("Calling binary failed"); set_cwd_to_fixtures(); - cmd.arg("--primer-sets") - .arg("primer_sets/ARTIC_v3.fasta"); + cmd.arg("--primer-sets").arg("primer_sets/ARTIC_v3.fasta"); cmd.arg("--reads").arg("vss.fastq"); - cmd.assert() - .stdout(predicate::str::contains("unknown")); + cmd.assert().stdout(predicate::str::contains("unknown")); } #[test] @@ -88,12 +88,10 @@ fn ont_amps_find_both_orientations() { let mut cmd = Command::cargo_bin("ampseer").expect("Calling binary failed"); set_cwd_to_fixtures(); - cmd.arg("--primer-sets") - .arg("vss_18_28.fasta"); + cmd.arg("--primer-sets").arg("vss_18_28.fasta"); cmd.arg("--reads") .arg("ont_vss_full_length_amp18rev_amp28for.fastq"); - cmd.assert() - .stdout(predicate::str::contains("vss_18_28")); + cmd.assert().stdout(predicate::str::contains("vss_18_28")); } #[test] @@ -105,8 +103,7 @@ fn differentiate_vss_from_artic_v3() { .arg("primer_sets/ARTIC_v3.fasta") .arg("primer_sets/neb_vss1a.fasta"); cmd.arg("--reads").arg("vss1a.fastq"); - cmd.assert() - .stdout(predicate::str::contains("neb_vss1a")); + cmd.assert().stdout(predicate::str::contains("neb_vss1a")); } #[test] fn differentiate_artic_v3_from_vss() { @@ -121,8 +118,7 @@ fn differentiate_artic_v3_from_vss() { .arg("primer_sets/neb_vsl1a.fasta") .arg("primer_sets/neb_vss1a.fasta"); cmd.arg("--reads").arg("artic_v3.fastq"); - cmd.assert() - .stdout(predicate::str::contains("ARTIC_v3")); + cmd.assert().stdout(predicate::str::contains("ARTIC_v3")); } #[test] @@ -136,36 +132,36 @@ fn differentiate_vss2_from_vss1a() { .arg("primer_sets/neb_vss2a.fasta") .arg("primer_sets/ARTIC_v4.fasta"); cmd.arg("--reads").arg("vss2.fastq"); - cmd.assert() - .stdout(predicate::str::contains("neb_vss2a")); + cmd.assert().stdout(predicate::str::contains("neb_vss2a")); } #[test] fn vss2_within_common_primer_sets_2023() { let mut cmd = Command::cargo_bin("ampseer").expect("Calling binary failed"); set_cwd_to_fixtures(); - + cmd.arg("--primer-sets") .arg("primer_sets/ARTIC_v4.fasta") .arg("primer_sets/neb_vss1a.fasta") .arg("primer_sets/neb_vss2a.fasta"); cmd.arg("--reads").arg("vss2.fastq"); - cmd.assert() - .stdout(predicate::str::contains("neb_vss2a")); + cmd.assert().stdout(predicate::str::contains("neb_vss2a")); } #[test] fn vss1_within_common_primer_sets_2023() { set_cwd_to_fixtures(); let decompress = duct::cmd!("zstd", "-d", "-c", "broad_vss1a.fastq.zstd"); - let ampseer_cmd = duct::cmd!(path_to_ampseer(), + let ampseer_cmd = duct::cmd!( + path_to_ampseer(), "--primer-sets", "primer_sets/ARTIC_v3.fasta", "primer_sets/ARTIC_v4.fasta", "primer_sets/Midnight_1200.fasta", "primer_sets/neb_vsl1a.fasta", "primer_sets/neb_vss1a.fasta", - "primer_sets/neb_vss2a.fasta"); + "primer_sets/neb_vss2a.fasta" + ); let pipeline = decompress.pipe(ampseer_cmd); From edac2b72289ae16ae27644ee8aec35f486e00dea Mon Sep 17 00:00:00 2001 From: Brad Langhorst Date: Sat, 10 Feb 2024 15:43:28 -0500 Subject: [PATCH 5/8] adds debug levels to + test for missing primer_set file --- tests/test_cli.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_cli.rs b/tests/test_cli.rs index 3a43197..6c253db 100644 --- a/tests/test_cli.rs +++ b/tests/test_cli.rs @@ -56,9 +56,19 @@ fn missing_fastq_file() { cmd.arg("--primer-sets") .arg("primer_sets/Midnight_1200.fasta"); cmd.arg("--reads").arg("missing.fastq"); + cmd.arg("-d"); cmd.assert() .stderr(predicate::str::contains("Could not find reads")); } +#[test] +fn missing_primer_set_file() { + let mut cmd = Command::cargo_bin("ampseer").expect("Calling binary failed"); + cmd.arg("--primer-sets").arg("primer_sets/missing.fasta"); + cmd.arg("--reads").arg("missing.fastq"); + cmd.arg("-ddd"); + cmd.assert() + .stderr(predicate::str::contains("Could not find primer set")); +} #[test] fn test_classify_reads_from_stdin() { @@ -80,6 +90,7 @@ fn non_matching_primer_sets() { cmd.arg("--primer-sets").arg("primer_sets/ARTIC_v3.fasta"); cmd.arg("--reads").arg("vss.fastq"); + cmd.arg("-dd"); cmd.assert().stdout(predicate::str::contains("unknown")); } From a90304363d5add5c8197ff44f79b12f3eb581edf Mon Sep 17 00:00:00 2001 From: Brad Langhorst Date: Sat, 10 Feb 2024 16:31:50 -0500 Subject: [PATCH 6/8] more checks for github actions --- .github/workflows/rust.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 8760c17..99ccc7d 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -10,7 +10,12 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: taiki-e/install-action@cargo-llvm-cov@v2 - name: Build run: cargo build --verbose - - name: Run Tests - run: cargo test --verbose \ No newline at end of file + - name: Run tests + run: cargo test --verbose + - name: Check code style + run: cargo clippy -- -D warnings + - name: Check code coverage + run: cargo llvm-cov --html --verbose \ No newline at end of file From b9a2d74ef951eeb898ff206ad6f2791c64ca1a40 Mon Sep 17 00:00:00 2001 From: Brad Langhorst Date: Sat, 10 Feb 2024 16:33:04 -0500 Subject: [PATCH 7/8] adds license badge and readme details for coverage and style checks --- README.md | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 25d0410..3ea9352 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ It is intended to differentiate between ARTIC v3, ARTIC v4, ARTIC v4.1, VarSkip ## This program is not yet fully tested, it's shared now to enable commentary from the scientific community. ![Tests](https://github.com/nebiolabs/ampseer/workflows/Tests/badge.svg) +[![License: AGPL v3](https://img.shields.io/badge/License-AGPL_v3-blue.svg)](https://www.gnu.org/licenses/agpl-3.0) Pull requests and issues are welcome. @@ -20,7 +21,7 @@ samtools fastq tests/fixtures/vss2_large.bam 3.55s user 0.11s system 99% cpu 3. target/release/ampseer --reads /dev/stdin --primer-sets primer_sets/*.fasta 3.40s user 0.09s system 95% cpu 3.661 total ``` -Note: Ampseer will produce "unknown" unless one primer set can be clearly separated from other candidates. It will not be able to identify differences between related sets unless both candidate sets are included. For example, ampseer will identify a ARTIC v4.1 library as ARTIC v4 unless both primer sets are included as candidates. +Note: Ampseer will produce "unknown" unless one primer set can be clearly separated from other candidates. It will not be able to identify differences between related sets unless both candidate sets are included. For example, ampseer will identify n ARTIC v4.1 library as ARTIC v4 unless both primer sets are included as candidates. ## Example Commands: This tool does not yet have any binary releases. To try it, you will need to [install rustup](https://www.rust-lang.org/tools/install), or `rustup update` if you are using an older rust installation. @@ -34,13 +35,25 @@ samtools fastq tests/fixtures/vss2_small.bam \ ``` ### view ampseer help: ```sh -cargo build --release # may take some time to compile the first time +cargo build --release target/release/ampseer -h ``` ### run the tests: ```sh -cargo test # may take some time to compile the first time +cargo test +``` +### code style checking: +```sh +cargo clippy +``` +### evaluate code coverage via html: +```sh +cargo llvm-cov --open +``` +### evaluate code coverage in vscode: +```sh +cargo llvm-cov --lcov --output-path lcov.info ``` ### make a flamegraph (--root needed on MacOS): From fbfb9c0de04f3c1dfa648fa6198c0846f72e11a6 Mon Sep 17 00:00:00 2001 From: Brad Langhorst Date: Sat, 10 Feb 2024 16:42:03 -0500 Subject: [PATCH 8/8] corrects llvm install --- .github/workflows/rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 99ccc7d..216e4c2 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: taiki-e/install-action@cargo-llvm-cov@v2 + - uses: taiki-e/install-action@cargo-llvm-cov - name: Build run: cargo build --verbose - name: Run tests