Skip to content

Commit

Permalink
Merge pull request #191 from nextstrain/fix-reverse-complement
Browse files Browse the repository at this point in the history
Fix reverse complement script, use-fft again, exclude unalignable sequences
  • Loading branch information
corneliusroemer authored Sep 22, 2023
2 parents 9fe0536 + 6c9b922 commit dc19890
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 10 deletions.
11 changes: 4 additions & 7 deletions scripts/reverse_reversed_sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,10 @@
with open(args.output, 'w') as f_out:
for seq in SeqIO.parse(f_in, 'fasta'):
# Check if metadata['reverse'] is True
try:
if metadata.loc[metadata['strain'] == seq.id, 'reverse'].values[0] == True:
# Reverse-complement sequence
seq.seq = seq.seq.reverse_complement()
print("Reverse-complementing sequence:", seq.id)
except:
print("No reverse complement for:", seq.id)
if metadata.loc[metadata['accession'] == seq.id, 'is_reverse_complement'].values[0] == True:
# Reverse-complement sequence
seq.seq = seq.seq.reverse_complement()
print("Reverse-complementing sequence:", seq.id)

# Write sequences to file
SeqIO.write(seq, f_out, 'fasta')
7 changes: 4 additions & 3 deletions workflow/snakemake_rules/core.smk
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ rule filter:
--exclude {params.exclude} \
--min-date {params.min_date} \
--min-length {params.min_length} \
--exclude-where QC_rare_mutations=bad \
--query "(QC_rare_mutations == 'good' | QC_rare_mutations == 'mediocre')" \
--output-log {output.log}
"""

Expand Down Expand Up @@ -102,7 +102,7 @@ rule combine_samples:
"""


rule separate_reverse_complement:
rule reverse_reverse_complements:
input:
metadata=build_dir + "/{build_name}/metadata.tsv",
sequences=build_dir + "/{build_name}/filtered.fasta",
Expand All @@ -124,7 +124,7 @@ rule align:
- filling gaps with N
"""
input:
sequences=rules.separate_reverse_complement.output,
sequences=rules.reverse_reverse_complements.output,
reference=config["reference"],
genemap=config["genemap"],
output:
Expand Down Expand Up @@ -256,6 +256,7 @@ rule refine:
--root {params.root} \
--precision 3 \
--keep-polytomies \
--use-fft \
{params.clock_rate} \
{params.clock_std_dev} \
--output-node-data {output.node_data} \
Expand Down

0 comments on commit dc19890

Please sign in to comment.