diff --git a/.gitignore b/.gitignore
index ef13b00..a280ae4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,40 +1,25 @@
-# Files created by the pipeline, which we want to keep out of git
-# (or at least out of _this_ git repo).
+# pipeline output #
+benchmarks/
 data/
+logs/
 results/
-build/
 
-# Sensitive environment variables
-environment*
-
-# Snakemake state dir
-/.snakemake
-
-# Local config overrides
-/config_local.yaml
+# snakemake output #
+.snakemake
+snakemake_log
 
 # For Python #
-##############
 *.pyc
 .tox/
 .cache/
-
-# Compiled source #
-###################
-*.com
-*.class
-*.dll
-*.exe
-*.o
-*.so
+__pycache__/*
 
 # OS generated files #
-######################
 .DS_Store
-.DS_Store?
 ._*
 .Spotlight-V100
 .Trashes
 Icon?
 ehthumbs.db
 Thumbs.db
+*~
diff --git a/ingest/README.md b/ingest/README.md
index c3b3a16..e89d2d6 100644
--- a/ingest/README.md
+++ b/ingest/README.md
@@ -1,105 +1,42 @@
-# Ingest
+# Ingest workflow
 
 This workflow ingests public data from NCBI and outputs curated
 metadata and sequences that can be used as input for the phylogenetic
 workflow.
 
-## Workflow Usage
+If you have another data source or private data that needs to be
+formatted for the phylogenetic workflow, then you can use a similar
+workflow to curate your own data.
 
-The workflow can be run from the top level pathogen repo directory:
+## Config
 
-```bash
-nextstrain build ingest
-```
-
-Alternatively, the workflow can also be run from within the ingest
-directory:
-
-```bash
-cd ingest
-nextstrain build .
-```
-
-This produces the default outputs of the ingest workflow:
-
-- metadata      = results/metadata.tsv
-- sequences     = results/sequences.fasta
-
-### Dumping the full raw metadata from NCBI Datasets
-
-The workflow has a target for dumping the full raw metadata from NCBI
-Datasets.
-
-```bash
-nextstrain build ingest dump_ncbi_dataset_report
-```
-
-This will produce the file `ingest/data/ncbi_dataset_report_raw.tsv`,
-which you can inspect to determine what fields and data to use if you
-want to configure the workflow for your pathogen.
-
-## Defaults
-
-The defaults directory contains all of the default configurations for
+The config directory contains all of the default configurations for
 the ingest workflow.
 
-[defaults/config.yaml](defaults/config.yaml) contains all of the
-default configuration parameters used for the ingest workflow. Use
-Snakemake's `--configfile`/`--config` options to override these
-default values.
+[defaults/config.yaml][] contains all of the default configuration
+parameters used for the ingest workflow. Use Snakemake's
+`--configfile`/`--config` options to override these default values.
 
 ## Snakefile and rules
 
 The rules directory contains separate Snakefiles (`*.smk`) as modules
 of the core ingest workflow. The modules of the workflow are in
-separate files to keep the main ingest [Snakefile](Snakefile) succinct
-and organized.
-
-The `workdir` is hardcoded to be the ingest directory so all filepaths
-for inputs/outputs should be relative to the ingest directory.
-
-Modules are all
-[included](https://snakemake.readthedocs.io/en/stable/snakefiles/modularization.html#includes)
-in the main Snakefile in the order that they are expected to run.
-
-### Nextclade
-
-Nextstrain is pushing to standardize ingest workflows with Nextclade
-runs to include Nextclade outputs in our publicly hosted data.
-However, if a Nextclade dataset does not already exist, it requires
-curated data as input, so we are making Nextclade steps optional here.
-
-If Nextclade config values are included, the Nextclade rules will
-create the final metadata TSV by joining the Nextclade output with the
-metadata. If Nextclade configs are not included, we rename the subset
-metadata TSV to the final metadata TSV.
-
-To run Nextclade rules, include the `defaults/nextclade_config.yaml`
-config file with:
-
-```bash
-nextstrain build ingest --configfile defaults/nextclade_config.yaml
-```
-
-> [!TIP]
-> If the Nextclade dataset is stable and you always want to run the
-> Nextclade rules as part of ingest, we recommend moving the Nextclade
-> related config parameters from the `defaults/nextclade_config.yaml`
-> file to the default config file `defaults/config.yaml`.
-
-## Build configs
-
-The build-configs directory contains custom configs and rules that
-override and/or extend the default workflow.
-
-- [nextstrain-automation](build-configs/nextstrain-automation/) - automated internal Nextstrain builds.
+separate files to keep the main ingest [Snakefile][] succinct and
+organized. Modules are all [included][] in the main Snakefile in the
+order that they are expected to run.
 
 ## Vendored
 
-This repository uses
-[`git subrepo`](https://github.com/ingydotnet/git-subrepo) to manage copies
-of ingest scripts in [vendored](vendored), from
-[nextstrain/ingest](https://github.com/nextstrain/ingest).
+This repository uses [`git subrepo`][] to manage copies of ingest
+scripts in [vendored][], from [nextstrain/ingest][]
+
+See [vendored/README.md][] for instructions on how to update the
+vendored scripts.
 
-See [vendored/README.md](vendored/README.md#vendoring) for
-instructions on how to update the vendored scripts.
+[defaults/config.yaml]: ./config/defaults.yaml
+[`git subrepo`]: https://github.com/ingydotnet/git-subrepo
+[included]: https://snakemake.readthedocs.io/en/stable/snakefiles/modularization.html#includes
+[nextstrain/ingest]: https://github.com/nextstrain/ingest
+[Snakefile]: ./Snakefile
+[vendored]: ./vendored
+[vendored/README.md]: ./vendored/README.md#vendoring
diff --git a/ingest/Snakefile b/ingest/Snakefile
index 98b14a2..5d81090 100644
--- a/ingest/Snakefile
+++ b/ingest/Snakefile
@@ -1,58 +1,20 @@
-"""
-This is the main ingest Snakefile that orchestrates the full ingest workflow
-and defines its default outputs.
-"""
-
-
-# The workflow filepaths are written relative to this Snakefile's base
-# directory
-workdir: workflow.current_basedir
-
-
-# Use default configuration values. Override with Snakemake's
-# --configfile/--config options.
+# Use default configuration values. Override with Snakemake's --configfile/--config options.
 configfile: "defaults/config.yaml"
 
 
-# This is the default rule that Snakemake will run when there are no
-# specified targets. The default output of the ingest workflow is
-# usually the curated metadata and sequences. Nextstrain-maintained
-# ingest workflows will produce metadata files with the standard
-# Nextstrain fields and additional fields that are pathogen specific.
-# We recommend using these standard fields in custom ingests as well
-# to minimize the customizations you will need for the downstream
-# phylogenetic workflow.
-
-
-# TODO: Add link to centralized docs on standard Nextstrain metadata fields
 rule all:
     input:
         "results/sequences.fasta",
         "results/metadata.tsv",
 
 
-# Note that only PATHOGEN-level customizations should be added to
-# these core steps, meaning they are custom rules necessary for all
-# builds of the pathogen. If there are build-specific customizations,
-# they should be added with the custom_rules imported below to ensure
-# that the core workflow is not complicated by build-specific rules.
 include: "rules/fetch_from_ncbi.smk"
 include: "rules/curate.smk"
 
-
-# We are pushing to standardize ingest workflows with Nextclade runs
-# to include Nextclade outputs in our publicly hosted data. However,
-# if a Nextclade dataset does not already exist, creating one requires
-# curated data as input, so we are making Nextclade steps optional
-# here.
-#
-# If Nextclade config values are included, the nextclade rules will
-# create the final metadata TSV by joining the Nextclade output with
-# the metadata. If Nextclade configs are not included, we rename the
-# subset metadata TSV to the final metadata TSV. To run nextclade.smk
-# rules, include the `defaults/nextclade_config.yaml` config file with
-# `nextstrain build ingest --configfile
-# defaults/nextclade_config.yaml`.
+# If included, the nextclade rules will create the final metadata TSV
+# by joining the Nextclade output with the metadata. However, if not
+# including nextclade, we have to rename the subset metadata TSV to
+# the final metadata TSV.
 if "nextclade" in config:
 
     include: "rules/nextclade.smk"
@@ -66,21 +28,20 @@ else:
             metadata="results/metadata.tsv",
         shell:
             """
-            mv {input.metadata} {output.metadata}
+            mv {input.metadata:q} {output.metadata:q}
             """
 
 
-# Allow users to import custom rules provided via the config.
-# This allows users to run custom rules that can extend or override
-# the workflow. A concrete example of using custom rules is the
-# extension of the workflow with rules to support the Nextstrain
-# automation that uploads files and sends internal Slack
-# notifications. For extensions, the user will have to specify the
-# custom rule targets when running the workflow. For overrides, the
-# custom Snakefile will have to use the `ruleorder` directive to allow
-# Snakemake to handle ambiguous rules
-# https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#handling-ambiguous-rules
-if "custom_rules" in config:
-    for rule_file in config["custom_rules"]:
 
-        include: rule_file
+rule clean:
+    params:
+        targets = [
+            "benchmarks",
+            "data",
+            "logs",
+            "results",
+            ]
+    shell:
+        """
+        rm -rfv {params.targets}
+        """
diff --git a/ingest/defaults/config.yaml b/ingest/defaults/config.yaml
index e9af859..ae4c1ca 100644
--- a/ingest/defaults/config.yaml
+++ b/ingest/defaults/config.yaml
@@ -1,14 +1,5 @@
-# This configuration file should contain all required configuration parameters
-# for the ingest workflow to run to completion.
-#
-# Define optional config parameters with their default values here so that users
-# do not have to dig through the workflows to figure out the default values
-
-# Required to fetch from Entrez
-entrez_search_term: ""
-
-# Required to fetch from NCBI Datasets
-ncbi_taxon_id: ""
+# taxon for `yellow fever virus`
+ncbi_taxon_id: "11089"
 
 # The list of NCBI Datasets fields to include from NCBI Datasets output
 # These need to be the "mnemonics" of the NCBI Datasets fields, see docs for full list of fields
@@ -34,16 +25,18 @@ ncbi_datasets_fields:
 
 # Config parameters related to the curate pipeline
 curate:
-  # URL pointed to public generalized geolocation rules
+  # URL pointed to public generalized geolocation rules.
   # For the Nextstrain team, this is currently
-  # "https://raw.githubusercontent.com/nextstrain/ncov-ingest/master/source-data/gisaid_geoLocationRules.tsv"
+  # "https://raw.githubusercontent.com/nextstrain/ncov-ingest/master/source-data/gisaid_geoLocationRules.tsv".
   geolocation_rules_url: "https://raw.githubusercontent.com/nextstrain/ncov-ingest/master/source-data/gisaid_geoLocationRules.tsv"
   # The path to the local geolocation rules within the pathogen repo
   # The path should be relative to the ingest directory.
   local_geolocation_rules: "defaults/geolocation_rules.tsv"
-  # List of field names to change where the key is the original field name and the value is the new field name
-  # The original field names should match the ncbi_datasets_fields provided above.
-  # This is the first step in the pipeline, so any references to field names in the configs below should use the new field names
+  # List of field names to change where the key is the original field
+  # name and the value is the new field name. The original field names
+  # should match the ncbi_datasets_fields provided above. This is the
+  # first step in the pipeline, so any references to field names in
+  # the configs below should use the new field names.
   field_map:
     accession: accession
     accession_version: accession_version
@@ -69,8 +62,9 @@ curate:
   strain_backup_fields: ["accession"]
   # List of date fields to standardize to ISO format YYYY-MM-DD
   date_fields: ["date", "date_released", "date_updated"]
-  # List of expected date formats that are present in the date fields provided above
-  # These date formats should use directives expected by datetime
+  # List of expected date formats that are present in the date fields
+  # provided above. These date formats should use directives expected
+  # by datetime.
   # See https://docs.python.org/3.9/library/datetime.html#strftime-and-strptime-format-codes
   expected_date_formats: ["%Y", "%Y-%m", "%Y-%m-%d", "%Y-%m-%dT%H:%M:%SZ"]
   titlecase:
diff --git a/ingest/defaults/nextclade_config.yaml b/ingest/defaults/nextclade_config.yaml
deleted file mode 100644
index 3c48bc8..0000000
--- a/ingest/defaults/nextclade_config.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# Nextclade parameters to include if you are running Nextclade as a part of your ingest workflow
-# Note that this requires a Nextclade dataset to already exist for your pathogen.
-nextclade:
-  # The name of the Nextclade dataset to use for running nextclade.
-  # Run `nextclade dataset list` to get a full list of available Nextclade datasets
-  dataset_name: ""
-  # Path to the mapping for renaming Nextclade output columns
-  # The path should be relative to the ingest directory
-  field_map: "config/nextclade_field_map.tsv"
-  # This is the ID field you would use to match the Nextclade output with the record metadata.
-  # This should be the new name that you have defined in your field map.
-  id_field: "seqName"
diff --git a/ingest/defaults/nextclade_field_map.tsv b/ingest/defaults/nextclade_field_map.tsv
deleted file mode 100644
index 513b0fd..0000000
--- a/ingest/defaults/nextclade_field_map.tsv
+++ /dev/null
@@ -1,18 +0,0 @@
-# TSV file that is a mapping of column names for Nextclade output TSV
-# The first column should be the original column name of the Nextclade TSV
-# The second column should be the new column name to use in the final metadata TSV
-# Nextclade can have pathogen specific output columns so make sure to check which
-# columns would be useful for your downstream phylogenetic analysis.
-seqName	seqName
-clade	clade
-lineage	lineage
-coverage	coverage
-totalMissing	missing_data
-totalSubstitutions	divergence
-totalNonACGTNs	nonACGTN
-qc.missingData.status	QC_missing_data
-qc.mixedSites.status	QC_mixed_sites
-qc.privateMutations.status	QC_rare_mutations
-qc.frameShifts.status	QC_frame_shifts
-qc.stopCodons.status	QC_stop_codons
-frameShifts	frame_shifts
diff --git a/ingest/rules/curate.smk b/ingest/rules/curate.smk
index 112eb34..bf7bfa8 100644
--- a/ingest/rules/curate.smk
+++ b/ingest/rules/curate.smk
@@ -13,15 +13,13 @@ OUTPUTS:
 """
 
 
-# The following two rules can be ignored if you choose not to use the
-# generalized geolocation rules that are shared across pathogens.
-# The Nextstrain team will try to maintain a generalized set of geolocation
-# rules that can then be overridden by local geolocation rules per pathogen repo.
 rule fetch_general_geolocation_rules:
     output:
         general_geolocation_rules="data/general-geolocation-rules.tsv",
     params:
         geolocation_rules_url=config["curate"]["geolocation_rules_url"],
+    benchmark:
+        "benchmarks/fetch_general_geolocation_rules.txt"
     shell:
         """
         curl {params.geolocation_rules_url} > {output.general_geolocation_rules}
@@ -34,10 +32,12 @@ rule concat_geolocation_rules:
         local_geolocation_rules=config["curate"]["local_geolocation_rules"],
     output:
         all_geolocation_rules="data/all-geolocation-rules.tsv",
+    benchmark:
+        "benchmarks/concat_geolocation_rules.txt"
     shell:
-        # why is this `>>` and not `>`
         """
-        cat {input.general_geolocation_rules} {input.local_geolocation_rules} >> {output.all_geolocation_rules}
+        cat {input.general_geolocation_rules} {input.local_geolocation_rules} \
+          > {output.all_geolocation_rules}
         """
 
 
@@ -48,17 +48,9 @@ def format_field_map(field_map: dict[str, str]) -> str:
     return " ".join([f'"{key}"="{value}"' for key, value in field_map.items()])
 
 
-# This curate pipeline is based on existing pipelines for pathogen repos using NCBI data.
-# You may want to add and/or remove steps from the pipeline for custom metadata
-# curation for your pathogen. Note that the curate pipeline is streaming NDJSON
-# records between scripts, so any custom scripts added to the pipeline should expect
-# the input as NDJSON records from stdin and output NDJSON records to stdout.
-# The final step of the pipeline should convert the NDJSON records to two
-# separate files: a metadata TSV and a sequences FASTA.
 rule curate:
     input:
         sequences_ndjson="data/ncbi.ndjson",
-        # Change the geolocation_rules input path if you are removing the above two rules
         all_geolocation_rules="data/all-geolocation-rules.tsv",
         annotations=config["curate"]["annotations"],
     output:
@@ -124,8 +116,11 @@ rule subset_metadata:
         subset_metadata="data/subset_metadata.tsv",
     params:
         metadata_fields=",".join(config["curate"]["metadata_columns"]),
+    benchmark:
+        "benchmarks/subset_metadata.txt"
     shell:
         """
         tsv-select -H -f {params.metadata_fields} \
-            {input.metadata} > {output.subset_metadata}
+            {input.metadata} \
+        > {output.subset_metadata}
         """
diff --git a/ingest/rules/fetch_from_ncbi.smk b/ingest/rules/fetch_from_ncbi.smk
index ed350ce..2194c6f 100644
--- a/ingest/rules/fetch_from_ncbi.smk
+++ b/ingest/rules/fetch_from_ncbi.smk
@@ -9,35 +9,8 @@ OUTPUTS:
 
     ndjson = data/ncbi.ndjson
 
-There are two different approaches for fetching data from NCBI.
-Choose the one that works best for the pathogen data and edit the workflow config
-to provide the correct parameter.
-
-1. Fetch with NCBI Datasets (https://www.ncbi.nlm.nih.gov/datasets/)
-    - requires `ncbi_taxon_id` config
-    - Directly returns NDJSON without custom parsing
-    - Fastest option for large datasets (e.g. SARS-CoV-2)
-    - Only returns metadata fields that are available through NCBI Datasets
-    - Only works for viral genomes
-
-2. Fetch from Entrez (https://www.ncbi.nlm.nih.gov/books/NBK25501/)
-    - requires `entrez_search_term` config
-    - Returns all available data via a GenBank file
-    - Requires a custom script to parse the necessary fields from the GenBank file
 """
 
-
-# This ruleorder determines which rule to use to produce the final NCBI NDJSON file.
-# The default is set to use NCBI Datasets since it does not require a custom script.
-# Switch the rule order if you plan to use Entrez
-ruleorder: format_ncbi_datasets_ndjson > parse_genbank_to_ndjson
-
-
-###########################################################################
-####################### 1. Fetch from NCBI Datasets #######################
-###########################################################################
-
-
 rule fetch_ncbi_dataset_package:
     params:
         ncbi_taxon_id=config["ncbi_taxon_id"],
@@ -48,7 +21,6 @@ rule fetch_ncbi_dataset_package:
     benchmark:
         "benchmarks/fetch_ncbi_dataset_package.txt"
     shell:
-        # what's the `:q` mean
         """
         datasets download virus genome taxon {params.ncbi_taxon_id:q} \
             --no-progressbar \
@@ -56,18 +28,21 @@ rule fetch_ncbi_dataset_package:
         """
 
 
-# Note: This rule is not part of the default workflow!
-# It is intended to be used as a specific target for users to be able
-# to inspect and explore the full raw metadata from NCBI Datasets.
+# Note: This rule is not part of the default workflow! It is intended
+# to be used as a specific target to be able to inspect and explore
+# the full raw metadata from NCBI Datasets.
 rule dump_ncbi_dataset_report:
     input:
         dataset_package="data/ncbi_dataset.zip",
     output:
         ncbi_dataset_tsv="data/ncbi_dataset_report_raw.tsv",
+    benchmark:
+        "benchmarks/dump_ncbi_dataset_report.txt"
     shell:
         """
         dataformat tsv virus-genome \
-            --package {input.dataset_package} > {output.ncbi_dataset_tsv}
+            --package {input.dataset_package} \
+        > {output.ncbi_dataset_tsv}
         """
 
 
@@ -76,13 +51,13 @@ rule extract_ncbi_dataset_sequences:
         dataset_package="data/ncbi_dataset.zip",
     output:
         ncbi_dataset_sequences=temp("data/ncbi_dataset_sequences.fasta"),
-    # why benchmarks here but not elsewhere
     benchmark:
         "benchmarks/extract_ncbi_dataset_sequences.txt"
     shell:
         """
         unzip -jp {input.dataset_package} \
-            ncbi_dataset/data/genomic.fna > {output.ncbi_dataset_sequences}
+            ncbi_dataset/data/genomic.fna \
+        > {output.ncbi_dataset_sequences}
         """
 
 
@@ -111,10 +86,6 @@ rule format_ncbi_dataset_report:
         """
 
 
-# Technically you can bypass this step and directly provide FASTA and TSV files
-# as input files for the curate pipeline.
-# We do the formatting here to have a uniform NDJSON file format for the raw
-# data that we host on data.nextstrain.org
 rule format_ncbi_datasets_ndjson:
     input:
         ncbi_dataset_sequences="data/ncbi_dataset_sequences.fasta",
@@ -136,38 +107,3 @@ rule format_ncbi_datasets_ndjson:
             --duplicate-reporting warn \
             2> {log} > {output.ndjson}
         """
-
-
-###########################################################################
-########################## 2. Fetch from Entrez ###########################
-###########################################################################
-
-
-rule fetch_from_ncbi_entrez:
-    params:
-        term=config["entrez_search_term"],
-    output:
-        genbank="data/genbank.gb",
-    # Allow retries in case of network errors
-    retries: 5
-    benchmark:
-        "benchmarks/fetch_from_ncbi_entrez.txt"
-    shell:
-        """
-        vendored/fetch-from-ncbi-entrez \
-            --term {params.term:q} \
-            --output {output.genbank}
-        """
-
-
-rule parse_genbank_to_ndjson:
-    input:
-        genbank="data/genbank.gb",
-    output:
-        ndjson="data/ncbi.ndjson",
-    benchmark:
-        "benchmarks/parse_genbank_to_ndjson.txt"
-    shell:
-        """
-        # Add in custom script to parse needed fields from GenBank file to NDJSON file
-        """
diff --git a/ingest/rules/nextclade.smk b/ingest/rules/nextclade.smk
deleted file mode 100644
index ffbeab8..0000000
--- a/ingest/rules/nextclade.smk
+++ /dev/null
@@ -1,97 +0,0 @@
-"""
-This part of the workflow handles running Nextclade on the curated metadata
-and sequences.
-
-REQUIRED INPUTS:
-
-    metadata    = data/subset_metadata.tsv
-    sequences   = results/sequences.fasta
-
-OUTPUTS:
-
-    metadata        = results/metadata.tsv
-    nextclade       = results/nextclade.tsv
-    alignment       = results/alignment.fasta
-    translations    = results/translations.zip
-
-See Nextclade docs for more details on usage, inputs, and outputs if you would
-like to customize the rules:
-https://docs.nextstrain.org/projects/nextclade/page/user/nextclade-cli.html
-"""
-
-DATASET_NAME = config["nextclade"]["dataset_name"]
-
-
-rule get_nextclade_dataset:
-    """Download Nextclade dataset"""
-    output:
-        dataset=f"data/nextclade_data/{DATASET_NAME}.zip",
-    params:
-        dataset_name=DATASET_NAME,
-    shell:
-        # should this get updated to `nextclade3`?
-        """
-        nextclade2 dataset get \
-            --name={params.dataset_name:q} \
-            --output-zip={output.dataset} \
-            --verbose
-        """
-
-
-rule run_nextclade:
-    input:
-        dataset=f"data/nextclade_data/{DATASET_NAME}.zip",
-        sequences="results/sequences.fasta",
-    output:
-        nextclade="results/nextclade.tsv",
-        alignment="results/alignment.fasta",
-        translations="results/translations.zip",
-    params:
-        # The lambda is used to deactivate automatic wildcard expansion.
-        # https://github.com/snakemake/snakemake/blob/384d0066c512b0429719085f2cf886fdb97fd80a/snakemake/rules.py#L997-L1000
-        translations=lambda w: "results/translations/{gene}.fasta",
-    shell:
-        """
-        nextclade2 run \
-            {input.sequences} \
-            --input-dataset {input.dataset} \
-            --output-tsv {output.nextclade} \
-            --output-fasta {output.alignment} \
-            --output-translations {params.translations}
-
-        zip -rj {output.translations} results/translations
-        """
-
-
-rule join_metadata_and_nextclade:
-    input:
-        nextclade="results/nextclade.tsv",
-        metadata="data/subset_metadata.tsv",
-        nextclade_field_map=config["nextclade"]["field_map"],
-    output:
-        metadata="results/metadata.tsv",
-    params:
-        metadata_id_field=config["curate"]["output_id_field"],
-        nextclade_id_field=config["nextclade"]["id_field"],
-    shell:
-        """
-        export SUBSET_FIELDS=`grep -v '^#' {input.nextclade_field_map} | awk '{{print $1}}' | tr '\n' ',' | sed 's/,$//g'`
-
-        csvtk -tl cut -f $SUBSET_FIELDS \
-            {input.nextclade} \
-        | csvtk -tl rename2 \
-            -F \
-            -f '*' \
-            -p '(.+)' \
-            -r '{{kv}}' \
-            -k {input.nextclade_field_map} \
-        | tsv-join -H \
-            --filter-file - \
-            --key-fields {params.nextclade_id_field} \
-            --data-fields {params.metadata_id_field} \
-            --append-fields '*' \
-            --write-all ? \
-            {input.metadata} \
-        | tsv-select -H --exclude {params.nextclade_id_field} \
-            > {output.metadata}
-        """