diff --git a/phylogenetic/defaults/clade-i/config.yaml b/phylogenetic/defaults/clade-i/config.yaml index e8ad850..ea40698 100644 --- a/phylogenetic/defaults/clade-i/config.yaml +++ b/phylogenetic/defaults/clade-i/config.yaml @@ -19,14 +19,14 @@ auspice_name: "mpox_clade-I" filter: min_date: 1900 min_length: 100000 - exclude_where: 'clade!=I' -### We don't want to subsample, so specify a config which is essentially a no-op +### Filter to only Clade I sequences subsample: everything: group_by: "" sequences_per_group: "" + query: "'clade in [\"I\", \"Ia\", \"Ib\"]'" ## align max_indel: 10000 diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk index 4916379..2fc7ce8 100644 --- a/phylogenetic/rules/prepare_sequences.smk +++ b/phylogenetic/rules/prepare_sequences.smk @@ -98,6 +98,11 @@ rule subsample: sequences_per_group=lambda w: config["subsample"][w.sample][ "sequences_per_group" ], + query=lambda w: ( + f"--query {config['subsample'][w.sample]['query']}" + if "query" in config["subsample"][w.sample] + else "" + ), other_filters=lambda w: config["subsample"][w.sample].get("other_filters", ""), exclude=lambda w: ( f"--exclude-where {' '.join([f'lineage={l}' for l in config['subsample'][w.sample]['exclude_lineages']])}" @@ -113,6 +118,7 @@ rule subsample: --output-strains {output.strains} \ {params.group_by} \ {params.sequences_per_group} \ + {params.query} \ {params.exclude} \ {params.other_filters} \ --output-log {output.log}