diff --git a/ingest/README.md b/ingest/README.md index 52302b82..b7eb8152 100644 --- a/ingest/README.md +++ b/ingest/README.md @@ -25,8 +25,8 @@ nextstrain build . This will produce two files (within the `ingest` directory): -- `data/metadata.tsv` -- `data/sequences.fasta` +- `results/metadata.tsv` +- `results/sequences.fasta` Run the complete ingest pipeline and upload results to AWS S3 with diff --git a/ingest/Snakefile b/ingest/Snakefile index 63872f48..0566d575 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -14,7 +14,7 @@ send_slack_notifications = config.get("send_slack_notifications", False) def _get_all_targets(wildcards): # Default targets are the metadata TSV and sequences FASTA files - all_targets = ["data/sequences.fasta", "data/metadata.tsv"] + all_targets = ["results/sequences.fasta", "results/metadata.tsv"] # Add additional targets based on upload config upload_config = config.get("upload", {}) diff --git a/ingest/config/optional.yaml b/ingest/config/optional.yaml index 23726966..d445e075 100644 --- a/ingest/config/optional.yaml +++ b/ingest/config/optional.yaml @@ -10,8 +10,8 @@ upload: files_to_upload: genbank.ndjson.xz: data/genbank.ndjson all_sequences.ndjson.xz: data/sequences.ndjson - metadata.tsv.gz: data/metadata.tsv - sequences.fasta.xz: data/sequences.fasta + metadata.tsv.gz: results/metadata.tsv + sequences.fasta.xz: results/sequences.fasta alignment.fasta.xz: data/alignment.fasta insertions.csv.gz: data/insertions.csv translations.zip: data/translations.zip diff --git a/ingest/workflow/snakemake_rules/nextclade.smk b/ingest/workflow/snakemake_rules/nextclade.smk index 385ad6e4..f10a3f9e 100644 --- a/ingest/workflow/snakemake_rules/nextclade.smk +++ b/ingest/workflow/snakemake_rules/nextclade.smk @@ -19,7 +19,7 @@ rule nextclade_dataset_hMPXV: rule align: input: - sequences="data/sequences.fasta", + sequences="results/sequences.fasta", dataset="hmpxv.zip", output: alignment="data/alignment.fasta", @@ -41,7 +41,7 @@ rule align: rule nextclade: input: - sequences="data/sequences.fasta", + sequences="results/sequences.fasta", dataset="mpxv.zip", output: "data/nextclade.tsv", @@ -58,7 +58,7 @@ rule join_metadata_clades: metadata="data/metadata_raw.tsv", nextclade_field_map=config["nextclade"]["field_map"], output: - metadata="data/metadata.tsv", + metadata="results/metadata.tsv", params: id_field=config["transform"]["id_field"], nextclade_id_field=config["nextclade"]["id_field"], diff --git a/ingest/workflow/snakemake_rules/slack_notifications.smk b/ingest/workflow/snakemake_rules/slack_notifications.smk index 7dea0e7c..9eb04639 100644 --- a/ingest/workflow/snakemake_rules/slack_notifications.smk +++ b/ingest/workflow/snakemake_rules/slack_notifications.smk @@ -36,7 +36,7 @@ rule notify_on_genbank_record_change: rule notify_on_metadata_diff: input: - metadata="data/metadata.tsv", + metadata="results/metadata.tsv", output: touch("data/notify/metadata-diff.done"), params: diff --git a/ingest/workflow/snakemake_rules/transform.smk b/ingest/workflow/snakemake_rules/transform.smk index 0c3cf6b1..fe7d7c16 100644 --- a/ingest/workflow/snakemake_rules/transform.smk +++ b/ingest/workflow/snakemake_rules/transform.smk @@ -7,7 +7,7 @@ formats and expects input file This will produce output files as metadata = "data/metadata_raw.tsv" - sequences = "data/sequences.fasta" + sequences = "results/sequences.fasta" Parameters are expected to be defined in `config.transform`. """ @@ -43,7 +43,7 @@ rule transform: annotations=config["transform"]["annotations"], output: metadata="data/metadata_raw.tsv", - sequences="data/sequences.fasta", + sequences="results/sequences.fasta", log: "logs/transform.txt", params: diff --git a/ingest/workflow/snakemake_rules/upload.smk b/ingest/workflow/snakemake_rules/upload.smk index 7941351c..f18aebe9 100644 --- a/ingest/workflow/snakemake_rules/upload.smk +++ b/ingest/workflow/snakemake_rules/upload.smk @@ -35,7 +35,7 @@ def _get_upload_inputs(wildcards): if file_to_upload == "data/genbank.ndjson": flag_file = "data/notify/genbank-record-change.done" - elif file_to_upload == "data/metadata.tsv": + elif file_to_upload == "results/metadata.tsv": flag_file = "data/notify/metadata-diff.done" inputs["notify_flag_file"] = flag_file