diff --git a/ingest/workflow/snakemake_rules/split_serotypes.smk b/ingest/workflow/snakemake_rules/split_serotypes.smk index b389fa8a..6ac75767 100644 --- a/ingest/workflow/snakemake_rules/split_serotypes.smk +++ b/ingest/workflow/snakemake_rules/split_serotypes.smk @@ -14,30 +14,30 @@ Parameters are expected to be defined in `config.transform`. """ rule split_by_ncbi_serotype: - """ - Split the data by serotype based on the NCBI metadata. - """ - input: - metadata = "data/metadata.tsv", - sequences = "data/sequences.fasta" - output: - metadata = "results/metadata_{serotype}.tsv", - sequences = "results/sequences_{serotype}.fasta" - params: - ncbi_serotype = lambda wildcards: wildcards.serotype, - id_field = config["transform"]["id_field"] - shell: """ - if [[ "{params.ncbi_serotype}" == "all" ]]; then - cp {input.metadata} {output.metadata} - cp {input.sequences} {output.sequences} - else - augur filter \ - --sequences {input.sequences} \ - --metadata {input.metadata} \ - --metadata-id-columns {params.id_field} \ - --query "ncbi_serotype=='{params.ncbi_serotype}'" \ - --output-sequences {output.sequences} \ - --output-metadata {output.metadata} - fi + Split the data by serotype based on the NCBI metadata. """ + input: + metadata = "data/metadata.tsv", + sequences = "data/sequences.fasta" + output: + metadata = "results/metadata_{serotype}.tsv", + sequences = "results/sequences_{serotype}.fasta" + params: + ncbi_serotype = lambda wildcards: wildcards.serotype, + id_field = config["transform"]["id_field"] + shell: + """ + if [[ "{params.ncbi_serotype}" == "all" ]]; then + cp {input.metadata} {output.metadata} + cp {input.sequences} {output.sequences} + else + augur filter \ + --sequences {input.sequences} \ + --metadata {input.metadata} \ + --metadata-id-columns {params.id_field} \ + --query "ncbi_serotype=='{params.ncbi_serotype}'" \ + --output-sequences {output.sequences} \ + --output-metadata {output.metadata} + fi + """