-
Notifications
You must be signed in to change notification settings - Fork 4
/
align_reads.nf
69 lines (55 loc) · 2.91 KB
/
align_reads.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env nextflow
// Using DSL-2
nextflow.enable.dsl=2
// Import helpers
GroovyShell shell = new GroovyShell()
def helpers = shell.parse(new File("${workflow.projectDir}/helpers.gvy"))
// Import sub-workflows
include { align_reads } from './modules/align_reads'
include { find_reads } from './modules/find_reads'
// Standalone entrypoint
workflow {
// Show help message if the user specifies the --help flag at runtime
helpers.help_message(
"""
Align genes against a set of WGS sequence reads
Aligns a deduplicated collection of genes against a collection of whole-genome shotgun
short-read sequencing data, producing a summary of the number of reads which align to each gene
Parameters:
--genes Path of single deduplicated amino acid FASTA file to be used for alignment
--reads Folder containing the set of FASTQ files to align against those genes
--reads_suffix File ending for all reads (default: ${params.reads_suffix})
--paired Set to true if input folder contains paired-end FASTQ files (default: false)
--samplesheet Optional file listing the FASTQ inputs to process (headers: sample,R1,R2)
--output Folder where output files will be written
--min_score_reads Minimum alignment score (default: ${params.min_score_reads})
--min_identity Minimum percent identity of the amino acid alignment required to retain the alignment
(default: ${params.min_identity}, ranges 0-100)
--max_evalue Maximum E-value threshold used to filter all alignments
(default: ${params.max_evalue})
--aligner Algorithm used for alignment (default: ${params.aligner}, options: diamond, blast)
--query_gencode Genetic code used for conceptual translation of genome sequences
(default: ${params.query_gencode})
--max_overlap Any alignment which overlaps a higher-scoring alignment by more than this
amount will be filtered out (default: ${params.max_overlap}, range: 0-100)
--aln_fmt Column headings used for alignment outputs (see DIAMOND documentation for details)
(default: ${params.aln_fmt})
""",
params.help
)
// Make sure that the required parameters were provided
helpers.require_param(params.output, "output")
helpers.require_param(params.genes, "genes")
// If there is no samplesheet
if ( "${params.samplesheet}" == "false" ){
// Requre the reads parameter
helpers.require_param(params.reads, "reads")
}
// Get the reads either from samplesheet or the input folder
find_reads()
// Align those reads against the centroids
align_reads(
file("${params.genes}"),
find_reads.out
)
}