Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Nextclade and geo rules to example-data; disable "prM-E" build in phylo CI #24

Merged
merged 6 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 30 additions & 4 deletions ingest/build-configs/ci/copy_example_data.smk
Original file line number Diff line number Diff line change
@@ -1,12 +1,38 @@
rule copy_example_data:
rule copy_example_ncbi_data:
input:
ncbi_dataset="example-data/ncbi_dataset.zip"
output:
ncbi_dataset=temp("data/ncbi_dataset.zip")
shell:
"""
r"""
cp -f {input.ncbi_dataset} {output.ncbi_dataset}
"""

# force this rule over NCBI data fetch
ruleorder: copy_example_data > fetch_ncbi_dataset_package
ruleorder: copy_example_ncbi_data > fetch_ncbi_dataset_package


DATASET_NAME = config["nextclade"]["dataset_name"]
rule copy_example_nextclade_data:
input:
nextclade_dataset="example-data/nextclade_dataset.zip"
output:
nextclade_dataset=temp("data/nextclade_data/{DATASET_NAME}.zip")
shell:
r"""
cp -f {input.nextclade_dataset} {output.nextclade_dataset}
"""
# force this rule over Nextclade data fetch
ruleorder: copy_example_nextclade_data > get_nextclade_dataset


rule copy_example_geolocation_rules:
input:
general_geolocation_rules="example-data/general-geolocation-rules.tsv"
output:
general_geolocation_rules="data/general-geolocation-rules.tsv",
shell:
r"""
cp -f {input.general_geolocation_rules} {output.general_geolocation_rules}
"""
# force this rule over downloading geolocation rules
ruleorder: copy_example_geolocation_rules > fetch_general_geolocation_rules
44,702 changes: 44,702 additions & 0 deletions ingest/example-data/general-geolocation-rules.tsv

Large diffs are not rendered by default.

Binary file added ingest/example-data/nextclade_dataset.zip
Binary file not shown.
6 changes: 2 additions & 4 deletions phylogenetic/Snakefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
configfile: "defaults/config.yaml"

gene = ["genome", "prM-E"]

rule all:
input:
auspice_json=expand("auspice/yellow-fever-virus_{gene}.json", gene=gene),
tip_frequencies_json=expand("auspice/yellow-fever-virus_{gene}_tip-frequencies.json", gene=gene),
auspice_json=expand("auspice/yellow-fever-virus_{build}.json", build=config["builds"]),
tip_frequencies_json=expand("auspice/yellow-fever-virus_{build}_tip-frequencies.json", build=config["builds"]),


include: "rules/prepare_sequences.smk"
Expand Down
2 changes: 2 additions & 0 deletions phylogenetic/build-configs/ci/config.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
builds:
- "genome"
custom_rules:
- build-configs/ci/copy_example_data.smk
3 changes: 3 additions & 0 deletions phylogenetic/defaults/config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
builds:
- "genome"
- "prM-E"
strain_id_field: "accession"
files:
description: "defaults/description.md"
Expand Down
30 changes: 15 additions & 15 deletions phylogenetic/rules/annotate_phylogeny.smk
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@ phylogenetic tree.
rule ancestral:
"""Reconstructing ancestral sequences and mutations"""
input:
tree = "results/{gene}/tree.nwk",
alignment = "results/{gene}/aligned_and_filtered.fasta"
tree = "results/{build}/tree.nwk",
alignment = "results/{build}/aligned_and_filtered.fasta"
output:
node_data = "results/{gene}/nt_muts.json"
node_data = "results/{build}/nt_muts.json"
params:
inference = config["ancestral"]["inference"]
log:
"logs/{gene}/ancestral.txt",
"logs/{build}/ancestral.txt",
benchmark:
"benchmarks/{gene}/ancestral.txt"
"benchmarks/{build}/ancestral.txt"
shell:
r"""
augur ancestral \
Expand All @@ -29,15 +29,15 @@ rule ancestral:
rule translate:
"""Translating amino acid sequences"""
input:
tree = "results/{gene}/tree.nwk",
node_data = "results/{gene}/nt_muts.json",
genemap = "defaults/genemap_{gene}.gff"
tree = "results/{build}/tree.nwk",
node_data = "results/{build}/nt_muts.json",
genemap = "defaults/genemap_{build}.gff"
output:
node_data = "results/{gene}/aa_muts.json"
node_data = "results/{build}/aa_muts.json"
log:
"logs/{gene}/translate.txt",
"logs/{build}/translate.txt",
benchmark:
"benchmarks/{gene}/translate.txt"
"benchmarks/{build}/translate.txt"
shell:
r"""
augur translate \
Expand All @@ -52,17 +52,17 @@ rule translate:
rule traits:
"""Inferring ancestral traits for {params.columns!s}"""
input:
tree = "results/{gene}/tree.nwk",
tree = "results/{build}/tree.nwk",
metadata = "data/metadata.tsv",
output:
node_data = "results/{gene}/traits.json",
node_data = "results/{build}/traits.json",
params:
columns = config["traits"]["columns"],
strain_id = config["strain_id_field"],
log:
"logs/{gene}/traits.txt",
"logs/{build}/traits.txt",
benchmark:
"benchmarks/{gene}/traits.txt"
"benchmarks/{build}/traits.txt"
shell:
r"""
augur traits \
Expand Down
22 changes: 11 additions & 11 deletions phylogenetic/rules/construct_phylogeny.smk
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ This part of the workflow constructs the phylogenetic tree.
rule tree:
"""Building tree"""
input:
alignment = "results/{gene}/aligned_and_filtered.fasta"
alignment = "results/{build}/aligned_and_filtered.fasta"
output:
tree = "results/{gene}/tree_raw.nwk"
tree = "results/{build}/tree_raw.nwk"
log:
"logs/{gene}/tree.txt",
"logs/{build}/tree.txt",
benchmark:
"benchmarks/{gene}/tree.txt"
"benchmarks/{build}/tree.txt"
shell:
r"""
augur tree \
Expand All @@ -28,24 +28,24 @@ rule refine:
- filter tips more than {params.clock_filter_iqd} IQDs from clock expectation
"""
input:
tree = "results/{gene}/tree_raw.nwk",
alignment = "results/{gene}/aligned_and_filtered.fasta",
tree = "results/{build}/tree_raw.nwk",
alignment = "results/{build}/aligned_and_filtered.fasta",
metadata = "data/metadata.tsv"
output:
tree = "results/{gene}/tree.nwk",
node_data = "results/{gene}/branch_lengths.json"
tree = "results/{build}/tree.nwk",
node_data = "results/{build}/branch_lengths.json"
params:
strain_id = config["strain_id_field"],
timetree = lambda w: "--timetree" if w.gene == "genome" else "",
timetree = lambda w: "--timetree" if w.build == "genome" else "",
clock_rate = config["refine"]["clock_rate"],
clock_std_dev = config["refine"]["clock_std_dev"],
coalescent = config["refine"]["coalescent"],
date_inference = config["refine"]["date_inference"],
clock_filter_iqd = config["refine"]["clock_filter_iqd"],
log:
"logs/{gene}/refine.txt",
"logs/{build}/refine.txt",
benchmark:
"benchmarks/{gene}/refine.txt"
"benchmarks/{build}/refine.txt"
shell:
r"""
augur refine \
Expand Down
22 changes: 11 additions & 11 deletions phylogenetic/rules/export.smk
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,24 @@ rule colors:
rule export:
"""Exporting data files for for auspice"""
input:
tree = "results/{gene}/tree.nwk",
tree = "results/{build}/tree.nwk",
metadata = "data/metadata.tsv",
branch_lengths = "results/{gene}/branch_lengths.json",
nt_muts = "results/{gene}/nt_muts.json",
aa_muts = "results/{gene}/aa_muts.json",
traits = "results/{gene}/traits.json",
branch_lengths = "results/{build}/branch_lengths.json",
nt_muts = "results/{build}/nt_muts.json",
aa_muts = "results/{build}/aa_muts.json",
traits = "results/{build}/traits.json",
colors = "data/colors.tsv",
auspice_config = lambda w: config["files"][w.gene]["auspice_config"],
auspice_config = lambda w: config["files"][w.build]["auspice_config"],
description=config["files"]["description"],
output:
auspice_json = "auspice/yellow-fever-virus_{gene}.json"
auspice_json = "auspice/yellow-fever-virus_{build}.json"
params:
metadata_columns = config["export"]["metadata_columns"],
strain_id = config["strain_id_field"],
log:
"logs/{gene}/export.txt",
"logs/{build}/export.txt",
benchmark:
"benchmarks/{gene}/export.txt"
"benchmarks/{build}/export.txt"
shell:
r"""
augur export v2 \
Expand All @@ -63,10 +63,10 @@ rule tip_frequencies:
Estimating KDE frequencies for tips
"""
input:
tree = "results/{gene}/tree.nwk",
tree = "results/{build}/tree.nwk",
metadata = "data/metadata.tsv"
output:
tip_freq = "auspice/yellow-fever-virus_{gene}_tip-frequencies.json"
tip_freq = "auspice/yellow-fever-virus_{build}_tip-frequencies.json"
params:
strain_id = config["strain_id_field"],
min_date = config["tip_frequencies"]["min_date"],
Expand Down
Loading