nebiolabs · bwlang · Feb 10, 2024 · Feb 10, 2024 · Feb 10, 2024 · Feb 10, 2024
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -1,4 +1,4 @@
-name: Rust
+name: Tests
 
 on:
     pull_request:
@@ -9,8 +9,13 @@ jobs:
     build:
         runs-on: ubuntu-latest
         steps:
-        - uses: actions/checkout@v2
+        - uses: actions/checkout@v4
+        - uses: taiki-e/install-action@cargo-llvm-cov
         - name: Build
           run: cargo build --verbose
-        - name: Run Tests
-          run: cargo test --verbose
+        - name: Run tests
+          run: cargo test --verbose
+        - name: Check code style
+          run: cargo clippy -- -D warnings
+        - name: Check code coverage
+          run: cargo llvm-cov --html --verbose
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -15,19 +15,19 @@ the specified PCR schemes was used to create the library.
 
 
 [dependencies]
-log = "0.4"
-simple_logger = { version = "4.0.0", features = ["stderr"] }
-clap = { version = "4.1.4", features = ["cargo", "derive"] }
-human-panic = "1.0.3"
+log = "0.4.20"
+simple_logger = { version = "4.3.3", features = ["stderr"] }
+clap = { version = "4.5.0", features = ["cargo", "derive"] }
+human-panic = "1.2.3"
 better-panic = "0.3.0"
 noodles = { version = "0.63.0", features = ["fasta", "bam", "fastq"] }
 debruijn = "0.3.4"
-anyhow = "1.0"
+anyhow = "1.0.79"
 
 [dev-dependencies]
 assert_cmd = "2.0.13"
 predicates = "3.1.0"
-flamegraph = "0.6.2"    
+flamegraph = "0.6.5"    
 duct = "0.13.7" # for testing piping between upstream decompressors and ampseer
 
 [profile.dev]

diff --git a/README.md b/README.md
@@ -3,6 +3,9 @@ Ampseer examines reads in fastq format and identifies which multiplex PCR primer
 It is intended to differentiate between ARTIC v3, ARTIC v4, ARTIC v4.1, VarSkip 1a, VarSkip 2a, Midnight, and VarSkip Long primer sets.
 ## This program is not yet fully tested, it's shared now to enable commentary from the scientific community.
 
+![Tests](https://github.com/nebiolabs/ampseer/workflows/Tests/badge.svg)
+[![License: AGPL v3](https://img.shields.io/badge/License-AGPL_v3-blue.svg)](https://www.gnu.org/licenses/agpl-3.0)
+
 Pull requests and issues are welcome.
 
 When compiled with --release optimizations, Ampseer processes reads at the same speed as samtools fastq ( less than 4s for a 155M bam file on 2019 Macbook Pro)
@@ -18,7 +21,7 @@ samtools fastq tests/fixtures/vss2_large.bam  3.55s user 0.11s system 99% cpu 3.
 target/release/ampseer --reads /dev/stdin --primer-sets primer_sets/*.fasta  3.40s user 0.09s system 95% cpu 3.661 total
 ```
 
-Note: Ampseer will produce "unknown" unless one primer set can be clearly separated from other candidates. It will not be able to identify differences between related sets unless both candidate sets are included. For example, ampseer will identify a ARTIC v4.1 library as ARTIC v4 unless both primer sets are included as candidates.
+Note: Ampseer will produce "unknown" unless one primer set can be clearly separated from other candidates. It will not be able to identify differences between related sets unless both candidate sets are included. For example, ampseer will identify n ARTIC v4.1 library as ARTIC v4 unless both primer sets are included as candidates.
 
 ## Example Commands:
 This tool does not yet have any binary releases. To try it, you will need to [install rustup](https://www.rust-lang.org/tools/install), or `rustup update` if you are using an older rust installation.
@@ -32,13 +35,25 @@ samtools fastq tests/fixtures/vss2_small.bam \
 ```
 ### view ampseer help:
 ```sh
-cargo build --release # may take some time to compile the first time
+cargo build --release
 target/release/ampseer -h
 ```
 
 ### run the tests:
 ```sh
-cargo test # may take some time to compile the first time
+cargo test
+```
+### code style checking:
+```sh
+cargo clippy
+```
+### evaluate code coverage via html:
+```sh
+cargo llvm-cov --open 
+```
+### evaluate code coverage in vscode:
+```sh
+cargo llvm-cov --lcov --output-path lcov.info  
 ```
 
 ### make a flamegraph (--root needed on MacOS):

diff --git a/src/main.rs b/src/main.rs
@@ -12,7 +12,7 @@ use simple_logger::SimpleLogger;
 use std::cmp::Ordering;
 use std::{
     collections::hash_map::Entry, collections::HashMap, collections::HashSet, fs::File,
-    io::BufReader, path::PathBuf, io::Read
+    io::BufReader, io::Read, path::PathBuf,
 };
 
 #[derive(Parser)]
@@ -88,10 +88,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     let reads = if let Some(reads) = args.reads.as_deref() {
         Box::new(File::open(reads)?)
     } else {
-        // If --reads was not specified, use stdin
+        //--reads was not specified, use stdin
         Box::new(File::open("/dev/stdin")?)
     };
-
 
     let mut primer_set_counters = import_primer_sets(&args.primer_sets)?;
 
@@ -105,24 +104,27 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     Ok(())
 }
 
-/// checks the passed input structure for reasonableness, printing error if necessary.
+/// checks the passed input structure for reasonableness, printing errors as necessary.
 fn check_inputs(args: &Cli) -> Result<(), anyhow::Error> {
     let mut error_messages = Vec::new();
 
-    if args.reads.clone().is_some_and(|reads| reads.exists()) && 
-       args.primer_sets.iter().all(|ps| ps.exists()) {
+    if args.reads.clone().is_some_and(|reads| reads.exists())
+        && args.primer_sets.iter().all(|ps| ps.exists())
+    {
         log::info!(
             "Searching for primers from {:?} in reads from: {:?}",
             args.primer_sets,
-            args.reads.as_deref().unwrap_or(&PathBuf::from("/dev/stdin"))
+            args.reads
+                .as_deref()
+                .unwrap_or(&PathBuf::from("/dev/stdin"))
         );
     } else {
         for ps in &args.primer_sets {
             if !ps.exists() {
                 error_messages.push(format!("Could not find primer set at {:?}", ps.as_path()));
             }
         }
-        if args.reads.clone().is_some_and(|reads| ! reads.exists()) {
+        if args.reads.clone().is_some_and(|reads| !reads.exists()) {
             error_messages.push(format!(
                 "Could not find reads at {:?}",
                 args.reads.as_ref().unwrap().as_path()
@@ -290,9 +292,9 @@ fn identify_primer_set(primer_set_counters: &[PrimerSet]) -> (String, f32) {
     //ideas: - consider random selections of reads  ( number of ways to select 80% of reads = permutations, data structure to hold 1000 seeds in columns )
     // i need to figure out a way to increment the appropriate columns for each read (maybe a bitmask?)
     // a function that returns a unique bitmask for each seed?
-    
-    //       - consider random amplicons (simulating dropouts) 
-    //       - 
+
+    //       - consider random amplicons (simulating dropouts)
+    //       -
     // score = num consistent with all amplicons answer/ 1000
     ps_fracs.sort_unstable_by_key(|ps_frac| (ps_frac.1 * -1000.0) as i32);
     log::debug!("matching fractions: {:?}", ps_fracs);
@@ -327,11 +329,10 @@ fn identify_primer_set(primer_set_counters: &[PrimerSet]) -> (String, f32) {
 //noise = number of points of evidence that are inconsistent with identified set
 //score = SNR
 
-//goal: score = 1 if all available evidence is in favor of winning candidate 
+//goal: score = 1 if all available evidence is in favor of winning candidate
 //      score = 0 if cannot differentiate between 2 candidates
 //winner - runnerup  =* confidence
 
-
 /// compares a ratio of only the unique keys to see if we can differentiate between two similar sets
 /// confidence is estimated considering the number of unique k-mers used
 fn compare_only_unique_primers(primer_set_counters: &[PrimerSet]) -> (String, f32) {

diff --git a/tests/test_cli.rs b/tests/test_cli.rs
@@ -1,7 +1,6 @@
+use assert_cmd::Command;
 #[cfg(test)]
-
 use predicates::prelude::*;
-use assert_cmd::Command;
 use std::path::{Path, PathBuf};
 
 fn path_to_fixtures() -> &'static Path {
@@ -13,7 +12,7 @@ fn set_cwd_to_fixtures() {
 fn path_to_ampseer() -> PathBuf {
     let mut bin_path = std::path::PathBuf::from(
         std::env::var("CARGO_TARGET_DIR")
-        .unwrap_or_else(|_| concat!(env!("CARGO_MANIFEST_DIR"),"/target").to_string())
+            .unwrap_or_else(|_| concat!(env!("CARGO_MANIFEST_DIR"), "/target").to_string()),
     );
     #[cfg(debug_assertions)]
     bin_path.push("debug");
@@ -50,23 +49,36 @@ fn test_insufficient_arguments() {
         .assert()
         .stderr(predicate::str::contains("required argument"));
 }
+
 #[test]
 fn missing_fastq_file() {
     let mut cmd = Command::cargo_bin("ampseer").expect("Calling binary failed");
-    cmd.arg("--primer-sets").arg("primer_sets/Midnight_1200.fasta");
+    cmd.arg("--primer-sets")
+        .arg("primer_sets/Midnight_1200.fasta");
+    cmd.arg("--reads").arg("missing.fastq");
+    cmd.arg("-d");
+    cmd.assert()
+        .stderr(predicate::str::contains("Could not find reads"));
+}
+#[test]
+fn missing_primer_set_file() {
+    let mut cmd = Command::cargo_bin("ampseer").expect("Calling binary failed");
+    cmd.arg("--primer-sets").arg("primer_sets/missing.fasta");
     cmd.arg("--reads").arg("missing.fastq");
-    cmd.assert().stderr(predicate::str::contains("Could not find reads"));
+    cmd.arg("-ddd");
+    cmd.assert()
+        .stderr(predicate::str::contains("Could not find primer set"));
 }
 
 #[test]
 fn test_classify_reads_from_stdin() {
     let mut cmd = Command::cargo_bin("ampseer").unwrap();
     set_cwd_to_fixtures();
 
-    cmd.arg("--primer-sets")
-       .arg("primer_sets/neb_vss1a.fasta");
+    cmd.arg("--primer-sets").arg("primer_sets/neb_vss1a.fasta");
 
-    cmd.pipe_stdin("vss.fastq").unwrap()
+    cmd.pipe_stdin("vss.fastq")
+        .unwrap()
         .assert()
         .stdout(predicate::str::contains("neb_vss1a"));
 }
@@ -76,24 +88,21 @@ fn non_matching_primer_sets() {
     let mut cmd = Command::cargo_bin("ampseer").expect("Calling binary failed");
     set_cwd_to_fixtures();
 
-    cmd.arg("--primer-sets")
-        .arg("primer_sets/ARTIC_v3.fasta");
+    cmd.arg("--primer-sets").arg("primer_sets/ARTIC_v3.fasta");
     cmd.arg("--reads").arg("vss.fastq");
-    cmd.assert()
-        .stdout(predicate::str::contains("unknown"));
+    cmd.arg("-dd");
+    cmd.assert().stdout(predicate::str::contains("unknown"));
 }
 
 #[test]
 fn ont_amps_find_both_orientations() {
     let mut cmd = Command::cargo_bin("ampseer").expect("Calling binary failed");
     set_cwd_to_fixtures();
 
-    cmd.arg("--primer-sets")
-        .arg("vss_18_28.fasta");
+    cmd.arg("--primer-sets").arg("vss_18_28.fasta");
     cmd.arg("--reads")
         .arg("ont_vss_full_length_amp18rev_amp28for.fastq");
-    cmd.assert()
-        .stdout(predicate::str::contains("vss_18_28"));
+    cmd.assert().stdout(predicate::str::contains("vss_18_28"));
 }
 
 #[test]
@@ -105,8 +114,7 @@ fn differentiate_vss_from_artic_v3() {
         .arg("primer_sets/ARTIC_v3.fasta")
         .arg("primer_sets/neb_vss1a.fasta");
     cmd.arg("--reads").arg("vss1a.fastq");
-    cmd.assert()
-        .stdout(predicate::str::contains("neb_vss1a"));
+    cmd.assert().stdout(predicate::str::contains("neb_vss1a"));
 }
 #[test]
 fn differentiate_artic_v3_from_vss() {
@@ -121,8 +129,7 @@ fn differentiate_artic_v3_from_vss() {
         .arg("primer_sets/neb_vsl1a.fasta")
         .arg("primer_sets/neb_vss1a.fasta");
     cmd.arg("--reads").arg("artic_v3.fastq");
-    cmd.assert()
-        .stdout(predicate::str::contains("ARTIC_v3"));
+    cmd.assert().stdout(predicate::str::contains("ARTIC_v3"));
 }
 
 #[test]
@@ -136,36 +143,36 @@ fn differentiate_vss2_from_vss1a() {
         .arg("primer_sets/neb_vss2a.fasta")
         .arg("primer_sets/ARTIC_v4.fasta");
     cmd.arg("--reads").arg("vss2.fastq");
-    cmd.assert()
-        .stdout(predicate::str::contains("neb_vss2a"));
+    cmd.assert().stdout(predicate::str::contains("neb_vss2a"));
 }
 
 #[test]
 fn vss2_within_common_primer_sets_2023() {
     let mut cmd = Command::cargo_bin("ampseer").expect("Calling binary failed");
     set_cwd_to_fixtures();
-    
+
     cmd.arg("--primer-sets")
         .arg("primer_sets/ARTIC_v4.fasta")
         .arg("primer_sets/neb_vss1a.fasta")
         .arg("primer_sets/neb_vss2a.fasta");
     cmd.arg("--reads").arg("vss2.fastq");
-    cmd.assert()
-        .stdout(predicate::str::contains("neb_vss2a"));
+    cmd.assert().stdout(predicate::str::contains("neb_vss2a"));
 }
 #[test]
 fn vss1_within_common_primer_sets_2023() {
     set_cwd_to_fixtures();
     let decompress = duct::cmd!("zstd", "-d", "-c", "broad_vss1a.fastq.zstd");
 
-    let ampseer_cmd = duct::cmd!(path_to_ampseer(), 
+    let ampseer_cmd = duct::cmd!(
+        path_to_ampseer(),
         "--primer-sets",
         "primer_sets/ARTIC_v3.fasta",
         "primer_sets/ARTIC_v4.fasta",
         "primer_sets/Midnight_1200.fasta",
         "primer_sets/neb_vsl1a.fasta",
         "primer_sets/neb_vss1a.fasta",
-        "primer_sets/neb_vss2a.fasta");
+        "primer_sets/neb_vss2a.fasta"
+    );
 
     let pipeline = decompress.pipe(ampseer_cmd);