Skip to content

Commit

Permalink
1.5.6 PR (fixes #144) (#145)
Browse files Browse the repository at this point in the history
* Pre 1.5.6 push

* Fix align_trim unit test

* Remove verbose as a global arg

* Fix minion validator test

* Fix minion_validator again

* Remove unused import
  • Loading branch information
BioWilko authored Nov 28, 2024
1 parent a3e1df7 commit 6cc5f5d
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 47 deletions.
86 changes: 48 additions & 38 deletions artic/align_trim.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def find_primer(bed, pos, direction, threshold=20):
return closest


def trim(segment, primer_pos, end, debug):
def trim(segment, primer_pos, end, verbose=False):
"""Soft mask an alignment to fit within primer start/end sites.
Parameters
Expand All @@ -71,7 +71,7 @@ def trim(segment, primer_pos, end, debug):
The position in the reference to soft mask up to (equates to the start/end position of the primer in the reference)
end : bool
If True, the segment is being masked from the end (i.e. for the reverse primer)
debug : bool
vernose : bool
If True, will print soft masking info during trimming
"""
# get a copy of the cigar tuples to work with
Expand All @@ -93,13 +93,14 @@ def trim(segment, primer_pos, end, debug):
flag, length = cigar.pop()
else:
flag, length = cigar.pop(0)
if debug:
if verbose:
print("Chomped a %s, %s" % (flag, length), file=sys.stderr)
except IndexError:
print(
"Ran out of cigar during soft masking - completely masked read will be ignored",
file=sys.stderr,
)
if verbose:
print(
"Ran out of cigar during soft masking - completely masked read will be ignored",
file=sys.stderr,
)
break

# if the CIGAR operation consumes the reference sequence, increment/decrement the position by the CIGAR operation length
Expand All @@ -121,10 +122,10 @@ def trim(segment, primer_pos, end, debug):

# calculate how many extra matches are needed in the CIGAR
extra = abs(pos - primer_pos)
if debug:
if verbose:
print("extra %s" % (extra), file=sys.stderr)
if extra:
if debug:
if verbose:
print("Inserted a %s, %s" % (0, extra), file=sys.stderr)
if end:
cigar.append((0, extra))
Expand All @@ -137,12 +138,12 @@ def trim(segment, primer_pos, end, debug):

# update the position of the leftmost mappinng base
segment.pos = pos - extra
if debug:
if verbose:
print("New pos: %s" % (segment.pos), file=sys.stderr)

# if proposed softmask leads straight into a deletion, shuffle leftmost mapping base along and ignore the deletion
if cigar[0][0] == 2:
if debug:
if verbose:
print(
"softmask created a leading deletion in the CIGAR, shuffling the alignment",
file=sys.stderr,
Expand Down Expand Up @@ -188,34 +189,39 @@ def handle_segment(

# filter out unmapped and supplementary alignment segments
if segment.is_unmapped:
print("%s skipped as unmapped" % (segment.query_name), file=sys.stderr)
if args.verbose:
print("%s skipped as unmapped" % (segment.query_name), file=sys.stderr)
return False
if segment.is_supplementary:
print("%s skipped as supplementary" % (segment.query_name), file=sys.stderr)
if args.verbose:
print("%s skipped as supplementary" % (segment.query_name), file=sys.stderr)
return False
if segment.mapping_quality < min_mapq:
print(
"%s skipped as mapping quality below threshold" % (segment.query_name),
file=sys.stderr,
)
if args.verbose:
print(
"%s skipped as mapping quality below threshold" % (segment.query_name),
file=sys.stderr,
)
return False

# locate the nearest primers to this alignment segment
p1 = find_primer(bed, segment.reference_start, "+", args.primer_match_threshold)
p2 = find_primer(bed, segment.reference_end, "-", args.primer_match_threshold)

if not p1 or not p2:
print(
"%s skipped as no primer found for segment" % (segment.query_name),
file=sys.stderr,
)
if args.verbose:
print(
"%s skipped as no primer found for segment" % (segment.query_name),
file=sys.stderr,
)
return False

# check if primers are correctly paired and then assign read group
# NOTE: removed this as a function as only called once
# TODO: will try improving this / moving it to the primer scheme processing code
correctly_paired = p1[2]["Primer_ID"].split("_")[1] == p2[2]["Primer_ID"].split("_")[1]

correctly_paired = (
p1[2]["Primer_ID"].split("_")[1] == p2[2]["Primer_ID"].split("_")[1]
)

if not args.no_read_groups:
if correctly_paired:
Expand Down Expand Up @@ -247,10 +253,11 @@ def handle_segment(
report_writer.writerow(report)

if args.remove_incorrect_pairs and not correctly_paired:
print(
"%s skipped as not correctly paired" % (segment.query_name),
file=sys.stderr,
)
if args.verbose:
print(
"%s skipped as not correctly paired" % (segment.query_name),
file=sys.stderr,
)
return False

if args.verbose:
Expand Down Expand Up @@ -306,11 +313,12 @@ def handle_segment(

# check the the alignment still contains bases matching the reference
if "M" not in segment.cigarstring:
print(
"%s dropped as does not match reference post masking"
% (segment.query_name),
file=sys.stderr,
)
if args.verbose:
print(
"%s dropped as does not match reference post masking"
% (segment.query_name),
file=sys.stderr,
)
return False

return (amplicon, segment)
Expand Down Expand Up @@ -364,7 +372,7 @@ def generate_amplicons(bed: list):
return amplicons


def normalise(trimmed_segments: dict, normalise: int, bed: list):
def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool = False):
"""Normalise the depth of the trimmed segments to a given value. Perform per-amplicon normalisation using numpy vector maths to determine whether the segment in question would take the depth closer to the desired depth accross the amplicon.
Args:
Expand Down Expand Up @@ -397,10 +405,11 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list):
amplicon_depth = np.zeros((amplicons[amplicon]["length"],), dtype=int)

if not segments:
print(
f"No segments assigned to amplicon {amplicon}, skipping",
file=sys.stderr,
)
if verbose:
print(
f"No segments assigned to amplicon {amplicon}, skipping",
file=sys.stderr,
)
continue

random.shuffle(segments)
Expand Down Expand Up @@ -507,7 +516,7 @@ def go(args):
# normalise if requested
if args.normalise:
output_segments, mean_amp_depths = normalise(
trimmed_segments, args.normalise, bed
trimmed_segments, args.normalise, bed, args.verbose
)

# write mean amplicon depths to file
Expand Down Expand Up @@ -567,6 +576,7 @@ def main():
parser.add_argument("--remove-incorrect-pairs", action="store_true")

args = parser.parse_args()

go(args)


Expand Down
12 changes: 10 additions & 2 deletions artic/minion.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,19 @@ def run(parser, args):
normalise_string = ""

cmds.append(
f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --remove-incorrect-pairs --min-mapq {args.min_mapq} --report {args.sample}.alignreport.csv < {args.sample}.sorted.bam 2> {args.sample}.alignreport.er | samtools sort -T {args.sample} - -o {args.sample}.trimmed.rg.sorted.bam"
f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --remove-incorrect-pairs --min-mapq {args.min_mapq} --report {args.sample}.alignreport.csv < {args.sample}.sorted.bam > {args.sample}.trimmed.rg.bam"
)

cmds.append(
f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --min-mapq {args.min_mapq} --remove-incorrect-pairs --trim-primers --report {args.sample}.alignreport.csv --amp-depth-report {args.sample}.amplicon_depths.tsv < {args.sample}.sorted.bam 2> {args.sample}.alignreport.er | samtools sort -T {args.sample} - -o {args.sample}.primertrimmed.rg.sorted.bam"
f"samtools sort -T {args.sample} {args.sample}.trimmed.rg.bam -o {args.sample}.trimmed.rg.sorted.bam"
)

cmds.append(
f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --min-mapq {args.min_mapq} --remove-incorrect-pairs --trim-primers --report {args.sample}.alignreport.csv --amp-depth-report {args.sample}.amplicon_depths.tsv < {args.sample}.sorted.bam > {args.sample}.primertrimmed.rg.bam"
)

cmds.append(
f"samtools sort -T {args.sample} {args.sample}.primertrimmed.rg.bam -o {args.sample}.primertrimmed.rg.sorted.bam"
)

cmds.append(f"samtools index {args.sample}.trimmed.rg.sorted.bam")
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = artic
version = 1.5.5
version = 1.5.6
author = Nick Loman
author_email = [email protected]
maintainer = Sam Wilkinson
Expand Down
5 changes: 2 additions & 3 deletions tests/align_trim_unit_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# align_trim_unit_test.py contains unit tests for alignment trimming
import os
import pysam
import pytest

from artic import align_trim
from artic import utils
Expand Down Expand Up @@ -114,7 +113,7 @@ def testRunner(seg, expectedCIGAR):

# trim the forward primer
try:
align_trim.trim(seg, p1_position, False, False)
align_trim.trim(seg, p1_position, False)
except Exception as e:
raise Exception(
"problem soft masking left primer in {} (error: {})".format(
Expand All @@ -132,7 +131,7 @@ def testRunner(seg, expectedCIGAR):

# trim the reverse primer
try:
align_trim.trim(seg, p2_position, True, False)
align_trim.trim(seg, p2_position, True)
except Exception as e:
raise Exception(
"problem soft masking right primer in {} (error: {})".format(
Expand Down
4 changes: 1 addition & 3 deletions tests/minion_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,6 @@ def genCommand(sampleID, workflow):
"--scheme-directory",
dataDir + "primer-schemes",
]
if workflow == "medaka":
cmd.append("--model")
cmd.append("r941_min_high_g351")

if workflow == "clair3":
cmd.append("--model")
Expand Down Expand Up @@ -255,6 +252,7 @@ def runner(workflow, sampleID):
args = parser.parse_args(cmd)
except SystemExit:
sys.stderr.write("failed to parse valid command for `artic minion`")
assert False

# run the minion pipeline
try:
Expand Down

0 comments on commit 6cc5f5d

Please sign in to comment.