diff --git a/ingest/config/config.yaml b/ingest/config/config.yaml index bae5fc9e..fadde38a 100644 --- a/ingest/config/config.yaml +++ b/ingest/config/config.yaml @@ -44,6 +44,8 @@ transform: annotations: 'source-data/annotations.tsv' # ID field used to merge annotations annotations_id: 'accession' + # Field to use as the sequence ID in the Nextclade file + nextclade_id_field: 'seqName' # Field to use as the sequence ID in the FASTA file id_field: 'accession' # Field to use as the sequence in the FASTA file diff --git a/ingest/workflow/snakemake_rules/nextclade.smk b/ingest/workflow/snakemake_rules/nextclade.smk index eb7b6720..fe02982a 100644 --- a/ingest/workflow/snakemake_rules/nextclade.smk +++ b/ingest/workflow/snakemake_rules/nextclade.smk @@ -61,6 +61,7 @@ rule join_metadata_clades: metadata="data/metadata.tsv", params: id_field=config["transform"]["id_field"], + nextclade_id_field=config["transform"]["nextclade_id_field"], shell: """ export SUBSET_FIELDS=`awk 'NR>1 {{print $1}}' {input.nextclade_field_map} | tr '\n' ',' | sed 's/,$//g'` @@ -75,11 +76,11 @@ rule join_metadata_clades: -k {input.nextclade_field_map} \ | tsv-join -H \ --filter-file - \ - --key-fields seqName \ + --key-fields {params.nextclade_id_field} \ --data-fields {params.id_field} \ --append-fields '*' \ --write-all ? \ {input.metadata} \ - | tsv-select -H --exclude seqName \ + | tsv-select -H --exclude {params.nextclade_id_field} \ > {output.metadata} """