1.5.6 PR (fixes #144) (#145)

* Pre 1.5.6 push * Fix align_trim unit test * Remove verbose as a global arg * Fix minion validator test * Fix minion_validator again * Remove unused import
artic-network · Nov 28, 2024 · 6cc5f5d · 6cc5f5d
1 parent a3e1df7
commit 6cc5f5d
Show file tree

Hide file tree

Showing 5 changed files with 62 additions and 47 deletions.
diff --git a/artic/align_trim.py b/artic/align_trim.py
@@ -60,7 +60,7 @@ def find_primer(bed, pos, direction, threshold=20):
     return closest
 
 
-def trim(segment, primer_pos, end, debug):
+def trim(segment, primer_pos, end, verbose=False):
     """Soft mask an alignment to fit within primer start/end sites.
 
     Parameters
@@ -71,7 +71,7 @@ def trim(segment, primer_pos, end, debug):
         The position in the reference to soft mask up to (equates to the start/end position of the primer in the reference)
     end : bool
         If True, the segment is being masked from the end (i.e. for the reverse primer)
-    debug : bool
+    vernose : bool
         If True, will print soft masking info during trimming
     """
     # get a copy of the cigar tuples to work with
@@ -93,13 +93,14 @@ def trim(segment, primer_pos, end, debug):
                 flag, length = cigar.pop()
             else:
                 flag, length = cigar.pop(0)
-            if debug:
+            if verbose:
                 print("Chomped a %s, %s" % (flag, length), file=sys.stderr)
         except IndexError:
-            print(
-                "Ran out of cigar during soft masking - completely masked read will be ignored",
-                file=sys.stderr,
-            )
+            if verbose:
+                print(
+                    "Ran out of cigar during soft masking - completely masked read will be ignored",
+                    file=sys.stderr,
+                )
             break
 
         # if the CIGAR operation consumes the reference sequence, increment/decrement the position by the CIGAR operation length
@@ -121,10 +122,10 @@ def trim(segment, primer_pos, end, debug):
 
     # calculate how many extra matches are needed in the CIGAR
     extra = abs(pos - primer_pos)
-    if debug:
+    if verbose:
         print("extra %s" % (extra), file=sys.stderr)
     if extra:
-        if debug:
+        if verbose:
             print("Inserted a %s, %s" % (0, extra), file=sys.stderr)
         if end:
             cigar.append((0, extra))
@@ -137,12 +138,12 @@ def trim(segment, primer_pos, end, debug):
 
         # update the position of the leftmost mappinng base
         segment.pos = pos - extra
-        if debug:
+        if verbose:
             print("New pos: %s" % (segment.pos), file=sys.stderr)
 
         # if proposed softmask leads straight into a deletion, shuffle leftmost mapping base along and ignore the deletion
         if cigar[0][0] == 2:
-            if debug:
+            if verbose:
                 print(
                     "softmask created a leading deletion in the CIGAR, shuffling the alignment",
                     file=sys.stderr,
@@ -188,34 +189,39 @@ def handle_segment(
 
     # filter out unmapped and supplementary alignment segments
     if segment.is_unmapped:
-        print("%s skipped as unmapped" % (segment.query_name), file=sys.stderr)
+        if args.verbose:
+            print("%s skipped as unmapped" % (segment.query_name), file=sys.stderr)
         return False
     if segment.is_supplementary:
-        print("%s skipped as supplementary" % (segment.query_name), file=sys.stderr)
+        if args.verbose:
+            print("%s skipped as supplementary" % (segment.query_name), file=sys.stderr)
         return False
     if segment.mapping_quality < min_mapq:
-        print(
-            "%s skipped as mapping quality below threshold" % (segment.query_name),
-            file=sys.stderr,
-        )
+        if args.verbose:
+            print(
+                "%s skipped as mapping quality below threshold" % (segment.query_name),
+                file=sys.stderr,
+            )
         return False
 
     # locate the nearest primers to this alignment segment
     p1 = find_primer(bed, segment.reference_start, "+", args.primer_match_threshold)
     p2 = find_primer(bed, segment.reference_end, "-", args.primer_match_threshold)
 
     if not p1 or not p2:
-        print(
-            "%s skipped as no primer found for segment" % (segment.query_name),
-            file=sys.stderr,
-        )
+        if args.verbose:
+            print(
+                "%s skipped as no primer found for segment" % (segment.query_name),
+                file=sys.stderr,
+            )
         return False
 
     # check if primers are correctly paired and then assign read group
     # NOTE: removed this as a function as only called once
     # TODO: will try improving this / moving it to the primer scheme processing code
-    correctly_paired = p1[2]["Primer_ID"].split("_")[1] == p2[2]["Primer_ID"].split("_")[1]
-
+    correctly_paired = (
+        p1[2]["Primer_ID"].split("_")[1] == p2[2]["Primer_ID"].split("_")[1]
+    )
 
     if not args.no_read_groups:
         if correctly_paired:
@@ -247,10 +253,11 @@ def handle_segment(
         report_writer.writerow(report)
 
     if args.remove_incorrect_pairs and not correctly_paired:
-        print(
-            "%s skipped as not correctly paired" % (segment.query_name),
-            file=sys.stderr,
-        )
+        if args.verbose:
+            print(
+                "%s skipped as not correctly paired" % (segment.query_name),
+                file=sys.stderr,
+            )
         return False
 
     if args.verbose:
@@ -306,11 +313,12 @@ def handle_segment(
 
     # check the the alignment still contains bases matching the reference
     if "M" not in segment.cigarstring:
-        print(
-            "%s dropped as does not match reference post masking"
-            % (segment.query_name),
-            file=sys.stderr,
-        )
+        if args.verbose:
+            print(
+                "%s dropped as does not match reference post masking"
+                % (segment.query_name),
+                file=sys.stderr,
+            )
         return False
 
     return (amplicon, segment)
@@ -364,7 +372,7 @@ def generate_amplicons(bed: list):
     return amplicons
 
 
-def normalise(trimmed_segments: dict, normalise: int, bed: list):
+def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool = False):
     """Normalise the depth of the trimmed segments to a given value. Perform per-amplicon normalisation using numpy vector maths to determine whether the segment in question would take the depth closer to the desired depth accross the amplicon.
 
     Args:
@@ -397,10 +405,11 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list):
         amplicon_depth = np.zeros((amplicons[amplicon]["length"],), dtype=int)
 
         if not segments:
-            print(
-                f"No segments assigned to amplicon {amplicon}, skipping",
-                file=sys.stderr,
-            )
+            if verbose:
+                print(
+                    f"No segments assigned to amplicon {amplicon}, skipping",
+                    file=sys.stderr,
+                )
             continue
 
         random.shuffle(segments)
@@ -507,7 +516,7 @@ def go(args):
     # normalise if requested
     if args.normalise:
         output_segments, mean_amp_depths = normalise(
-            trimmed_segments, args.normalise, bed
+            trimmed_segments, args.normalise, bed, args.verbose
         )
 
         # write mean amplicon depths to file
@@ -567,6 +576,7 @@ def main():
     parser.add_argument("--remove-incorrect-pairs", action="store_true")
 
     args = parser.parse_args()
+
     go(args)
 
 

diff --git a/artic/minion.py b/artic/minion.py
@@ -134,11 +134,19 @@ def run(parser, args):
         normalise_string = ""
 
     cmds.append(
-        f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --remove-incorrect-pairs --min-mapq {args.min_mapq} --report {args.sample}.alignreport.csv < {args.sample}.sorted.bam 2> {args.sample}.alignreport.er | samtools sort -T {args.sample} - -o {args.sample}.trimmed.rg.sorted.bam"
+        f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --remove-incorrect-pairs --min-mapq {args.min_mapq} --report {args.sample}.alignreport.csv < {args.sample}.sorted.bam > {args.sample}.trimmed.rg.bam"
     )
 
     cmds.append(
-        f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --min-mapq {args.min_mapq} --remove-incorrect-pairs --trim-primers --report {args.sample}.alignreport.csv --amp-depth-report {args.sample}.amplicon_depths.tsv < {args.sample}.sorted.bam 2> {args.sample}.alignreport.er | samtools sort -T {args.sample} - -o {args.sample}.primertrimmed.rg.sorted.bam"
+        f"samtools sort -T {args.sample} {args.sample}.trimmed.rg.bam -o {args.sample}.trimmed.rg.sorted.bam"
+    )
+
+    cmds.append(
+        f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --min-mapq {args.min_mapq} --remove-incorrect-pairs --trim-primers --report {args.sample}.alignreport.csv --amp-depth-report {args.sample}.amplicon_depths.tsv < {args.sample}.sorted.bam > {args.sample}.primertrimmed.rg.bam"
+    )
+
+    cmds.append(
+        f"samtools sort -T {args.sample} {args.sample}.primertrimmed.rg.bam -o {args.sample}.primertrimmed.rg.sorted.bam"
     )
 
     cmds.append(f"samtools index {args.sample}.trimmed.rg.sorted.bam")

diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = artic
-version = 1.5.5
+version = 1.5.6
 author = Nick Loman
 author_email = [email protected]
 maintainer = Sam Wilkinson

diff --git a/tests/align_trim_unit_test.py b/tests/align_trim_unit_test.py
@@ -1,7 +1,6 @@
 # align_trim_unit_test.py contains unit tests for alignment trimming
 import os
 import pysam
-import pytest
 
 from artic import align_trim
 from artic import utils
@@ -114,7 +113,7 @@ def testRunner(seg, expectedCIGAR):
 
         # trim the forward primer
         try:
-            align_trim.trim(seg, p1_position, False, False)
+            align_trim.trim(seg, p1_position, False)
         except Exception as e:
             raise Exception(
                 "problem soft masking left primer in {} (error: {})".format(
@@ -132,7 +131,7 @@ def testRunner(seg, expectedCIGAR):
 
         # trim the reverse primer
         try:
-            align_trim.trim(seg, p2_position, True, False)
+            align_trim.trim(seg, p2_position, True)
         except Exception as e:
             raise Exception(
                 "problem soft masking right primer in {} (error: {})".format(

diff --git a/tests/minion_validator.py b/tests/minion_validator.py
@@ -184,9 +184,6 @@ def genCommand(sampleID, workflow):
         "--scheme-directory",
         dataDir + "primer-schemes",
     ]
-    if workflow == "medaka":
-        cmd.append("--model")
-        cmd.append("r941_min_high_g351")
 
     if workflow == "clair3":
         cmd.append("--model")
@@ -255,6 +252,7 @@ def runner(workflow, sampleID):
         args = parser.parse_args(cmd)
     except SystemExit:
         sys.stderr.write("failed to parse valid command for `artic minion`")
+        assert False
 
     # run the minion pipeline
     try: