example_config.yaml

# This file contains a high-level summary of pipeline configuration and inputs.
# It is ingested by the Snakefile, and also intended to be human-readable.

# Sample table (can be created using example_sample_table.csv)
samples: "sample_table.csv"

# Folder to place all results (can be relative or absolute path)
result_dir: "results_dir"

# Minimum quality threshold, used in 'trim_galore' and 'ivar trim'
min_qual: 20


# Minimum read length to retain after trimming, used in 'trim_galore' and 'ivar trim'
min_len: 20

# Path from snakemake dir to .bed file defining amplicon primer scheme
scheme_bed: 'resources/primer_schemes/artic_v3/nCoV-2019.bed'

# Path from snakemake dir to bwa indexed human + viral reference genome
composite_reference: 'data/composite_human_viral_reference.fna'

# Used as bwa reference genome when removing host sequences.
# Also used as 'ivar' reference genome in variant detection + consensus.
# Used as -r,-g arguments to 'quast'
# contig needed for hostremoval filtering script
viral_reference_contig_name: 'MN908947.3'
viral_reference_genome: 'data/MN908947.3.fasta'
viral_reference_feature_coords: 'data/MN908947.3.gff3'

# breseq_reference must be defined if run_breseq == True
run_breseq: False
# Used as --reference argument to 'breseq'
breseq_reference: 'data/MN908947.3.gbk'
# Used as --polymorphism-minimum-variant-coverage-each-strand, --polymorphism-frequency-cutoff arguments
# Parameters needed to determine thresholds for minor variant detection
polymorphism_variant_coverage: 2
polymorphism_frequency: 0.05

# run freebayes for variant and consensus calling (as well as ivar)
run_freebayes: True

# Used as --db argument to 'kraken2'
kraken2_db: 'data/Kraken2/db'

# For Ivar's amplicon filter 
# https://github.com/andersen-lab/ivar/commit/7027563fd75581c78dabc6040ebffdee2b24abe6
# must be set to nothing if you are not wanting to use this setting
# and "-f primer_pairs.tsv" with the correct file path if you do wish to use it
primer_pairs_tsv:

# Consensus and variant calling ivar/samtools params from https://github.com/connor-lab/ncov2019-artic-nf/blob/master/conf/illumina.config
mpileup_depth: 100000
# ivar/freebayes frequency threshold to build consensus
var_freq_threshold: 0.75
# Minimum coverage depth to call variant
var_min_coverage_depth: 10
# iVar/freebayes frequency threshold to call variant (ivar variants: -t )
var_min_freq_threshold: 0.25
# iVar/freebayes minimum mapQ to call variant (ivar variants: -q)
var_min_variant_quality: 20

# Toggle faster Pangolin analysis at the cost of accuracy (uses Pangolearn instead of Usher)
# Use for significantly larger datasets
pangolin_fast: False

# Versions of software related to lineage calling (use numbers only, i.e., 3.1.1). Dates (YYYY-mm-dd) are accepted for pangolearn. Leave blank for latest version(s).
pangolin: 
constellations:
scorpio:

# Required for Pangolin <v4.0
pangolearn: 
pango-designation:

# Required for Pangolin v4+
pangolin-data:

# Versions for Nextclade (software & datasets)
# nextclade: Software version. Input should use numbers only (i.e., 2.14.0). Must be >=1.3.0.
# nextclade-data: The nextclade dataset tag. Refer to available nextclade datasets. Accepted tag format is 'YYYY-mm-ddTHH:MM:SSZ'
# Be as specific as possible with the desired dataset tag. Can accept dates (YYYY-mm-dd) alone, but will assume corresponding timestamp (HH:MM:SS). SIGNAL will automatically adjust between v2 and v3 dataset tag formats
# Leave blank for latest versions
nextclade:
nextclade-data:

# Nextclade v2 only
# nextclade-include-recomb: set to False will download the recombinant-sequence free version of the nextclade dataset
nextclade-include-recomb: True

# ANYTHING BELOW IS ONLY NEEDED IF USING NCOV-TOOLS SUMMARIES
# Path from snakemake dir to .bed file defining the actual amplicon locations not the primers
amplicon_loc_bed: 'resources/primer_schemes/artic_v3/ncov-qc_V3.scheme.bed'

# fasta of sequences to include with pangolin phylogeny
phylo_include_seqs: "data/blank.fasta"

# List of negative control sample names or prefixes (i.e., ['Blank'] will cover Blank1, Blank2, etc.)
negative_control_prefix: []