Skip to content

Commit

Permalink
WIP: use pathoplexus for potential 1A strains
Browse files Browse the repository at this point in the history
  • Loading branch information
j23414 committed Nov 1, 2024
1 parent b90c25a commit 2631fe8
Showing 1 changed file with 71 additions and 2 deletions.
73 changes: 71 additions & 2 deletions ingest/rules/nextclade.smk
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,54 @@ like to customize the rules:
https://docs.nextstrain.org/projects/nextclade/page/user/nextclade-cli.html
"""

rule pathoplexus_classify:
"""
Pulls global lineage calls from Pathoplexus API
"""
output:
pathoplexus_tsv="data/pathoplexus_results/global_lineages.tsv",
params:
URL="https://lapis.pathoplexus.org/west-nile/sample/details",
fields="insdcAccessionBase,lineage",
id_field=config["curate"]["output_id_field"],
shell:
"""
curl "{params.URL}?dataFormat=TSV&downloadAsFile=false&fields={params.fields}" \
| uniq \
| csvtk -t rename -f "insdcAccessionBase" -n {params.id_field} \
> {output.pathoplexus_tsv}
"""

rule select_USA_potential_samples:
"""
Select 1A or "unassigned" sequences from the USA
"""
input:
sequences="results/sequences.fasta",
pathoplexus_tsv="data/pathoplexus_results/global_lineages.tsv",
output:
potential_1A_samples="data/pathoplexus_results/potential_1A_samples.tsv",
sequences="data/potential_1A_sequences.fasta",
params:
id_field=config["curate"]["output_id_field"],
shell:
"""
tsv-filter -H \
--not-regex 'lineage:1B|[2,3,4,5,6,7,8]' \
{input.pathoplexus_tsv} \
> {output.potential_1A_samples}
augur filter \
--sequences {input.sequences} \
--metadata {output.potential_1A_samples} \
--metadata-id-column {params.id_field} \
--output-sequences {output.sequences}
"""

rule nextclade_classify:
#Classifies sequences into clades using Nextclade
input:
sequences="results/sequences.fasta",
sequences="data/potential_1A_sequences.fasta",
dataset=config["nextclade"]["nextclade_dataset_path"],
output:
nextclade_tsv="data/nextclade_results/nextclade.tsv",
Expand Down Expand Up @@ -55,7 +99,7 @@ rule append_nextclade_columns:
metadata="data/raw_metadata.tsv",
nextclade_subtypes="data/nextclade_clades.tsv",
output:
metadata_all="results/metadata.tsv",
metadata_all="data/metadata_nextclade.tsv",
params:
id_field=config["curate"]["output_id_field"],
nextclade_field=config["nextclade"]["nextclade_field"],
Expand All @@ -69,3 +113,28 @@ rule append_nextclade_columns:
{input.metadata} \
> {output.metadata_all}
"""

rule append_pathoplexus_columns:
"""
Append the pathoplexus results to the metadata
"""
input:
metadata="data/metadata_nextclade.tsv",
pathoplexus_tsv="data/pathoplexus_results/global_lineages.tsv",
output:
metadata="results/metadata.tsv",
params:
id_field=config["curate"]["output_id_field"],
pathoplexus_field=config["curate"]["output_id_field"],
shell:
r"""
augur merge \
--metadata \
metadata={input.metadata:q} \
pathoplexus={input.pathoplexus_tsv:q} \
--metadata-id-columns \
metadata={params.id_field:q} \
pathoplexus={params.pathoplexus_field:q} \
--output-metadata {output.metadata:q} \
--no-source-columns
"""

0 comments on commit 2631fe8

Please sign in to comment.