diff --git a/scripts/reverse_reversed_sequences.py b/scripts/reverse_reversed_sequences.py index f838a20d..21b4ebbb 100644 --- a/scripts/reverse_reversed_sequences.py +++ b/scripts/reverse_reversed_sequences.py @@ -20,13 +20,10 @@ with open(args.output, 'w') as f_out: for seq in SeqIO.parse(f_in, 'fasta'): # Check if metadata['reverse'] is True - try: - if metadata.loc[metadata['strain'] == seq.id, 'reverse'].values[0] == True: - # Reverse-complement sequence - seq.seq = seq.seq.reverse_complement() - print("Reverse-complementing sequence:", seq.id) - except: - print("No reverse complement for:", seq.id) + if metadata.loc[metadata['accession'] == seq.id, 'is_reverse_complement'].values[0] == True: + # Reverse-complement sequence + seq.seq = seq.seq.reverse_complement() + print("Reverse-complementing sequence:", seq.id) # Write sequences to file SeqIO.write(seq, f_out, 'fasta') diff --git a/workflow/snakemake_rules/core.smk b/workflow/snakemake_rules/core.smk index 77299730..ea9bac8f 100644 --- a/workflow/snakemake_rules/core.smk +++ b/workflow/snakemake_rules/core.smk @@ -40,7 +40,7 @@ rule filter: --exclude {params.exclude} \ --min-date {params.min_date} \ --min-length {params.min_length} \ - --exclude-where QC_rare_mutations=bad \ + --query "(QC_rare_mutations == 'good' | QC_rare_mutations == 'mediocre')" \ --output-log {output.log} """ @@ -102,7 +102,7 @@ rule combine_samples: """ -rule separate_reverse_complement: +rule reverse_reverse_complements: input: metadata=build_dir + "/{build_name}/metadata.tsv", sequences=build_dir + "/{build_name}/filtered.fasta", @@ -124,7 +124,7 @@ rule align: - filling gaps with N """ input: - sequences=rules.separate_reverse_complement.output, + sequences=rules.reverse_reverse_complements.output, reference=config["reference"], genemap=config["genemap"], output: @@ -256,6 +256,7 @@ rule refine: --root {params.root} \ --precision 3 \ --keep-polytomies \ + --use-fft \ {params.clock_rate} \ {params.clock_std_dev} \ --output-node-data {output.node_data} \