-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Harmonize ingest with pathogen repo guide #35
Changes from 8 commits
c6ed7d1
dedaf58
6f2de8d
364640a
351ac37
5a4f29a
c73c566
7392f61
ea8daca
2ffef24
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,10 +4,7 @@ min_version( | |
"7.7.0" | ||
) # Snakemake 7.7.0 introduced `retries` directive used in fetch-sequences | ||
|
||
if not config: | ||
|
||
configfile: "config/config.yaml" | ||
|
||
configfile: "defaults/config.yaml" | ||
|
||
send_slack_notifications = config.get("send_slack_notifications", False) | ||
|
||
|
@@ -57,12 +54,13 @@ rule all: | |
_get_all_targets, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto missed this when I was reviewing nextstrain/zika@cee62cf, but I think the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ohh, I like the simplified default targets. Fixed in ed1bf1f |
||
|
||
|
||
include: "workflow/snakemake_rules/fetch_sequences.smk" | ||
include: "workflow/snakemake_rules/transform.smk" | ||
include: "workflow/snakemake_rules/split_serotypes.smk" | ||
include: "workflow/snakemake_rules/nextclade.smk" | ||
|
||
include: "rules/fetch_from_ncbi.smk" | ||
include: "rules/curate.smk" | ||
include: "rules/split_serotypes.smk" | ||
include: "rules/nextclade.smk" | ||
|
||
if config.get("upload", False): | ||
# Include custom rules defined in the config. | ||
if "custom_rules" in config: | ||
for rule_file in config["custom_rules"]: | ||
|
||
include: "workflow/snakemake_rules/upload.smk" | ||
include: rule_file |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# This configuration file should contain all required configuration parameters | ||
# for the ingest workflow to run with additional Nextstrain automation rules. | ||
|
||
# Custom rules to run as part of the Nextstrain automated workflow | ||
# The paths should be relative to the ingest directory. | ||
custom_rules: | ||
- build-configs/nextstrain-automation/upload.smk | ||
|
||
# Nextstrain CloudFront domain to ensure that we invalidate CloudFront after the S3 uploads | ||
# This is required as long as we are using the AWS CLI for uploads | ||
cloudfront_domain: "data.nextstrain.org" | ||
|
||
# Nextstrain AWS S3 Bucket with pathogen prefix | ||
s3_dst: "s3://nextstrain-data/files/workflows/dengue" | ||
|
||
# Mapping of files to upload | ||
files_to_upload: | ||
genbank.ndjson.xz: data/genbank.ndjson | ||
all_sequences.ndjson.xz: data/sequences.ndjson | ||
metadata_all.tsv.zst: results/metadata_all.tsv | ||
sequences_all.fasta.zst: results/sequences_all.fasta | ||
metadata_denv1.tsv.zst: results/metadata_denv1.tsv | ||
sequences_denv1.fasta.zst: results/sequences_denv1.fasta | ||
metadata_denv2.tsv.zst: results/metadata_denv2.tsv | ||
sequences_denv2.fasta.zst: results/sequences_denv2.fasta | ||
metadata_denv3.tsv.zst: results/metadata_denv3.tsv | ||
sequences_denv3.fasta.zst: results/sequences_denv3.fasta | ||
metadata_denv4.tsv.zst: results/metadata_denv4.tsv | ||
sequences_denv4.fasta.zst: results/sequences_denv4.fasta |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
""" | ||
This part of the workflow handles uploading files to AWS S3. | ||
|
||
Files to upload must be defined in the `files_to_upload` config param, where | ||
the keys are the remote files and the values are the local filepaths | ||
relative to the ingest directory. | ||
|
||
Produces a single file for each uploaded file: | ||
"results/upload/{remote_file}.upload" | ||
|
||
The rule `upload_all` can be used as a target to upload all files. | ||
""" | ||
import os | ||
|
||
slack_envvars_defined = "SLACK_CHANNELS" in os.environ and "SLACK_TOKEN" in os.environ | ||
send_notifications = ( | ||
config.get("send_slack_notifications", False) and slack_envvars_defined | ||
) | ||
|
||
|
||
rule upload_to_s3: | ||
input: | ||
file_to_upload=lambda wildcards: config["files_to_upload"][wildcards.remote_file], | ||
output: | ||
"results/upload/{remote_file}.upload", | ||
params: | ||
quiet="" if send_notifications else "--quiet", | ||
s3_dst=config["s3_dst"], | ||
cloudfront_domain=config["cloudfront_domain"], | ||
shell: | ||
""" | ||
./vendored/upload-to-s3 \ | ||
{params.quiet} \ | ||
{input.file_to_upload:q} \ | ||
{params.s3_dst:q}/{wildcards.remote_file:q} \ | ||
{params.cloudfront_domain} 2>&1 | tee {output} | ||
""" | ||
|
||
|
||
rule upload_all: | ||
input: | ||
uploads=[ | ||
f"results/upload/{remote_file}.upload" | ||
for remote_file in config["files_to_upload"].keys() | ||
], | ||
output: | ||
touch("results/upload_all.done") |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missed this when I was reviewing nextstrain/zika@cee62cf, but this line can be removed since we are no longer sending Slack notifications.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch! Dropped slack related code and docs in 2e14f00