-
Notifications
You must be signed in to change notification settings - Fork 25
/
example_config.yaml
100 lines (77 loc) · 4.05 KB
/
example_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# This file contains a high-level summary of pipeline configuration and inputs.
# It is ingested by the Snakefile, and also intended to be human-readable.
# Sample table (can be created using example_sample_table.csv)
samples: "sample_table.csv"
# Folder to place all results (can be relative or absolute path)
result_dir: "results_dir"
# Minimum quality threshold, used in 'trim_galore' and 'ivar trim'
min_qual: 20
# Minimum read length to retain after trimming, used in 'trim_galore' and 'ivar trim'
min_len: 20
# Path from snakemake dir to .bed file defining amplicon primer scheme
scheme_bed: 'resources/primer_schemes/artic_v3/nCoV-2019.bed'
# Path from snakemake dir to bwa indexed human + viral reference genome
composite_reference: 'data/composite_human_viral_reference.fna'
# Used as bwa reference genome when removing host sequences.
# Also used as 'ivar' reference genome in variant detection + consensus.
# Used as -r,-g arguments to 'quast'
# contig needed for hostremoval filtering script
viral_reference_contig_name: 'MN908947.3'
viral_reference_genome: 'data/MN908947.3.fasta'
viral_reference_feature_coords: 'data/MN908947.3.gff3'
# breseq_reference must be defined if run_breseq == True
run_breseq: False
# Used as --reference argument to 'breseq'
breseq_reference: 'data/MN908947.3.gbk'
# Used as --polymorphism-minimum-variant-coverage-each-strand, --polymorphism-frequency-cutoff arguments
# Parameters needed to determine thresholds for minor variant detection
polymorphism_variant_coverage: 2
polymorphism_frequency: 0.05
# run freebayes for variant and consensus calling (as well as ivar)
run_freebayes: True
# Used as --db argument to 'kraken2'
kraken2_db: 'data/Kraken2/db'
# For Ivar's amplicon filter
# https://github.com/andersen-lab/ivar/commit/7027563fd75581c78dabc6040ebffdee2b24abe6
# must be set to nothing if you are not wanting to use this setting
# and "-f primer_pairs.tsv" with the correct file path if you do wish to use it
primer_pairs_tsv:
# Consensus and variant calling ivar/samtools params from https://github.com/connor-lab/ncov2019-artic-nf/blob/master/conf/illumina.config
mpileup_depth: 100000
# ivar/freebayes frequency threshold to build consensus
var_freq_threshold: 0.75
# Minimum coverage depth to call variant
var_min_coverage_depth: 10
# iVar/freebayes frequency threshold to call variant (ivar variants: -t )
var_min_freq_threshold: 0.25
# iVar/freebayes minimum mapQ to call variant (ivar variants: -q)
var_min_variant_quality: 20
# Toggle faster Pangolin analysis at the cost of accuracy (uses Pangolearn instead of Usher)
# Use for significantly larger datasets
pangolin_fast: False
# Versions of software related to lineage calling (use numbers only, i.e., 3.1.1). Dates (YYYY-mm-dd) are accepted for pangolearn. Leave blank for latest version(s).
pangolin:
constellations:
scorpio:
# Required for Pangolin <v4.0
pangolearn:
pango-designation:
# Required for Pangolin v4+
pangolin-data:
# Versions for Nextclade (software & datasets)
# nextclade: Software version. Input should use numbers only (i.e., 2.14.0). Must be >=1.3.0.
# nextclade-data: The nextclade dataset tag. Refer to available nextclade datasets. Accepted tag format is 'YYYY-mm-ddTHH:MM:SSZ'
# Be as specific as possible with the desired dataset tag. Can accept dates (YYYY-mm-dd) alone, but will assume corresponding timestamp (HH:MM:SS). SIGNAL will automatically adjust between v2 and v3 dataset tag formats
# Leave blank for latest versions
nextclade:
nextclade-data:
# Nextclade v2 only
# nextclade-include-recomb: set to False will download the recombinant-sequence free version of the nextclade dataset
nextclade-include-recomb: True
# ANYTHING BELOW IS ONLY NEEDED IF USING NCOV-TOOLS SUMMARIES
# Path from snakemake dir to .bed file defining the actual amplicon locations not the primers
amplicon_loc_bed: 'resources/primer_schemes/artic_v3/ncov-qc_V3.scheme.bed'
# fasta of sequences to include with pangolin phylogeny
phylo_include_seqs: "data/blank.fasta"
# List of negative control sample names or prefixes (i.e., ['Blank'] will cover Blank1, Blank2, etc.)
negative_control_prefix: []