Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added fastq_bwa_mem_samblaster #5582

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions subworkflows/nf-core/fastq_bwa_mem_samblaster/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
include { BWA_INDEX } from '../../../modules/nf-core/bwa/index/main'
include { BWA_MEM } from '../../../modules/nf-core/bwa/mem/main'
include { SAMBLASTER } from '../../../modules/nf-core/samblaster/main'

workflow FASTQ_BWA_MEM_SAMBLASTER {

take:
ch_fastq // channel: [ val(meta), [ fq ] ]; meta ~ [ id: 'sample' ]
ch_reference // channel: [ val(meta2), fasta, index ]; fasta | index; meta2 ~ [ id: 'genome' ]
// Each item from ch_fastq is combined with each item from ch_reference

main:
ch_versions = Channel.empty()

ch_has_index = ch_reference
| branch { meta2, fasta, index ->
yes: index
no: !index
}

// MODULE: BWA_INDEX
BWA_INDEX ( ch_has_index.no.map { meta2, fasta, index -> [ meta2, fasta ] } )

ch_bwa_index = BWA_INDEX.out.index
| mix(
ch_has_index.yes
| map { meta2, fasta, index ->
[ meta2, index ]
}
)

ch_versions = ch_versions.mix(BWA_INDEX.out.versions.first())

// MODULE: BWA_MEM
ch_mem_inputs = ch_fastq
| combine(
ch_bwa_index
)
| map { meta, fq, meta2, index ->
[ meta + [ ref_id: meta2.id ], fq, index ]
}

def sort_bam = false
BWA_MEM(
ch_mem_inputs.map { meta, fq, index -> [ meta, fq ] },
ch_mem_inputs.map { meta, fq, index -> [ [], index ] },
[ [], [] ],
sort_bam
)

ch_mem_bam = BWA_MEM.out.bam
ch_versions = ch_versions.mix(BWA_MEM.out.versions.first())

// MODULE: SAMBLASTER
SAMBLASTER ( ch_mem_bam )

ch_blasted_bam = SAMBLASTER.out.bam
ch_versions = ch_versions.mix(SAMBLASTER.out.versions.first())

emit:
bam = SAMBLASTER.out.bam // channel: [ val(meta), bam ]; meta ~ [ id: 'sample', ref_id: 'genome' ]
versions = ch_versions // channel: [ versions.yml ]
}
52 changes: 52 additions & 0 deletions subworkflows/nf-core/fastq_bwa_mem_samblaster/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
name: "fastq_bwa_mem_samblaster"
description: Index fasta if needed, map reads with BWA MEM and filter with samblaster
keywords:
- sort
- bam
- duplicate marking
components:
- bwa/index
- bwa/mem
- samblaster
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- meta2:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- fasta:
type: file
description: Input genome fasta file
- index:
type: file
description: BWA genome index files
pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false, ref_id:'genome' ]
- bam:
type: file
description: Tagged or filtered BAM file
pattern: "*.bam"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@GallVp"
maintainers:
- "@GallVp"
79 changes: 79 additions & 0 deletions subworkflows/nf-core/fastq_bwa_mem_samblaster/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
nextflow_workflow {

name "Test Subworkflow FASTQ_BWA_MEM_SAMBLASTER"
script "../main.nf"
workflow "FASTQ_BWA_MEM_SAMBLASTER"
config './nextflow.config'

tag "subworkflows"
tag "subworkflows_nfcore"
tag "subworkflows/fastq_bwa_mem_samblaster"
tag "samblaster"
tag "bwa/index"
tag "bwa/mem"


test("sarscov2-fq-gz") {

when {
workflow {
"""
input[0] = Channel.of(
[
[ id:'test' ],
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
)
input[1] = Channel.of(
[ [ id: 'genome' ], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), [] ]
)
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(
bam(workflow.out.bam[0][1]).getReadsMD5(),
workflow.out.versions
).match()
}
)
}
}

test("sarscov2-fq-gz-stub") {

options '-stub'

when {
workflow {
"""
input[0] = Channel.of(
[
[ id:'test' ],
[
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
]
]
)
input[1] = Channel.of(
[ [ id: 'genome' ], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), [] ]
)
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(workflow.out).match() }
)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
"sarscov2-fq-gz": {
"content": [
"7cf71131809f63c5c05c767742ed70ff",
[
"versions.yml:md5,3b59736950270bca59c29afeb5289b4f",
"versions.yml:md5,80949a94ee1be8238b1c79d89ed9fa83",
"versions.yml:md5,ddd810edeee1008ec55b628e3a9d6204"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2024-08-28T09:36:34.757275"
},
"sarscov2-fq-gz-stub": {
"content": [
{
"0": [
[
{
"id": "test",
"ref_id": "genome"
},
"test.on.genome.samblaster.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
"versions.yml:md5,3b59736950270bca59c29afeb5289b4f",
"versions.yml:md5,80949a94ee1be8238b1c79d89ed9fa83",
"versions.yml:md5,ddd810edeee1008ec55b628e3a9d6204"
],
"bam": [
[
{
"id": "test",
"ref_id": "genome"
},
"test.on.genome.samblaster.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,3b59736950270bca59c29afeb5289b4f",
"versions.yml:md5,80949a94ee1be8238b1c79d89ed9fa83",
"versions.yml:md5,ddd810edeee1008ec55b628e3a9d6204"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2024-08-28T09:36:42.43377"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
process {
withName: BWA_MEM {
ext.prefix = { "${meta.id}.on.${meta.ref_id}.bwa.mem" }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why these prefixes?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although these prefixes are not necessary here but in a pipeline they are as fastqs from each sample are mapped to each reference.

ext.args = '-5SP'
}

withName: SAMBLASTER {
ext.prefix = { "${meta.id}.on.${meta.ref_id}.samblaster" }
ext.args3 = '-h -F 2316'
}
}
2 changes: 2 additions & 0 deletions subworkflows/nf-core/fastq_bwa_mem_samblaster/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
subworkflows/fastq_bwa_mem_samblaster:
- subworkflows/nf-core/fastq_bwa_mem_samblaster/**
Loading