diff --git a/phylogenetic/config/config_dengue.yaml b/phylogenetic/config/config_dengue.yaml index a110c969..ebd9a318 100644 --- a/phylogenetic/config/config_dengue.yaml +++ b/phylogenetic/config/config_dengue.yaml @@ -9,6 +9,7 @@ display_strain_field: "strain" filter: exclude: "config/exclude.txt" + include: "config/include_{serotype}.txt" group_by: "year region" min_length: genome: 5000 diff --git a/phylogenetic/config/include_all.txt b/phylogenetic/config/include_all.txt new file mode 100644 index 00000000..b98b661e --- /dev/null +++ b/phylogenetic/config/include_all.txt @@ -0,0 +1 @@ +# Format: [# ] \ No newline at end of file diff --git a/phylogenetic/config/include_denv1.txt b/phylogenetic/config/include_denv1.txt new file mode 100644 index 00000000..b98b661e --- /dev/null +++ b/phylogenetic/config/include_denv1.txt @@ -0,0 +1 @@ +# Format: [# ] \ No newline at end of file diff --git a/phylogenetic/config/include_denv2.txt b/phylogenetic/config/include_denv2.txt new file mode 100644 index 00000000..b98b661e --- /dev/null +++ b/phylogenetic/config/include_denv2.txt @@ -0,0 +1 @@ +# Format: [# ] \ No newline at end of file diff --git a/phylogenetic/config/include_denv3.txt b/phylogenetic/config/include_denv3.txt new file mode 100644 index 00000000..b98b661e --- /dev/null +++ b/phylogenetic/config/include_denv3.txt @@ -0,0 +1 @@ +# Format: [# ] \ No newline at end of file diff --git a/phylogenetic/config/include_denv4.txt b/phylogenetic/config/include_denv4.txt new file mode 100644 index 00000000..b98b661e --- /dev/null +++ b/phylogenetic/config/include_denv4.txt @@ -0,0 +1 @@ +# Format: [# ] \ No newline at end of file diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk index 551c698a..2eb89284 100644 --- a/phylogenetic/rules/annotate_phylogeny.smk +++ b/phylogenetic/rules/annotate_phylogeny.smk @@ -25,7 +25,8 @@ rule ancestral: """Reconstructing ancestral sequences and mutations""" input: tree = "results/{gene}/tree_{serotype}.nwk", - alignment = "results/{gene}/aligned_{serotype}.fasta" + alignment = "results/{gene}/aligned_{serotype}.fasta", + root_sequence = lambda wildcard: "config/reference_{serotype}_genome.gb" if wildcard.gene in ['genome'] else "results/config/reference_{serotype}_{gene}.gb", output: node_data = "results/{gene}/nt-muts_{serotype}.json" params: @@ -36,6 +37,7 @@ rule ancestral: --tree {input.tree} \ --alignment {input.alignment} \ --output-node-data {output.node_data} \ + --root-sequence {input.root_sequence} \ --inference {params.inference} """ diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk index 7f651b68..60144f85 100644 --- a/phylogenetic/rules/prepare_sequences.smk +++ b/phylogenetic/rules/prepare_sequences.smk @@ -55,6 +55,7 @@ rule filter: sequences = lambda wildcard: "data/sequences_{serotype}.fasta" if wildcard.gene in ['genome'] else "results/{gene}/sequences_{serotype}.fasta", metadata = "data/metadata_{serotype}.tsv", exclude = config["filter"]["exclude"], + include = config["filter"]["include"], output: sequences = "results/{gene}/filtered_{serotype}.fasta" params: @@ -69,6 +70,7 @@ rule filter: --metadata {input.metadata} \ --metadata-id-columns {params.strain_id} \ --exclude {input.exclude} \ + --include {input.include} \ --output {output.sequences} \ --group-by {params.group_by} \ --sequences-per-group {params.sequences_per_group} \