From e80fd835aa2eef94862d59dde5f535e3f0272bb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= Date: Fri, 6 Dec 2024 14:31:45 +0000 Subject: [PATCH 1/9] change module structure and first test attempt --- ...iba_workflow.nf => arriba_workflow.nf.bkp} | 0 subworkflows/local/arriba_workflow/main.nf | 92 ++++++++++++ .../local/arriba_workflow/tests/main.nf.test | 136 ++++++++++++++++++ workflows/rnafusion.nf | 10 +- 4 files changed, 237 insertions(+), 1 deletion(-) rename subworkflows/local/{arriba_workflow.nf => arriba_workflow.nf.bkp} (100%) create mode 100644 subworkflows/local/arriba_workflow/main.nf create mode 100644 subworkflows/local/arriba_workflow/tests/main.nf.test diff --git a/subworkflows/local/arriba_workflow.nf b/subworkflows/local/arriba_workflow.nf.bkp similarity index 100% rename from subworkflows/local/arriba_workflow.nf rename to subworkflows/local/arriba_workflow.nf.bkp diff --git a/subworkflows/local/arriba_workflow/main.nf b/subworkflows/local/arriba_workflow/main.nf new file mode 100644 index 00000000..a19fe86b --- /dev/null +++ b/subworkflows/local/arriba_workflow/main.nf @@ -0,0 +1,92 @@ +include { ARRIBA_ARRIBA } from '../../../modules/nf-core/arriba/arriba/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_ARRIBA } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_FOR_ARRIBA } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_ARRIBA } from '../../../modules/nf-core/samtools/view/main' +include { STAR_ALIGN as STAR_FOR_ARRIBA } from '../../../modules/nf-core/star/align/main' + +workflow ARRIBA_WORKFLOW { + take: + reads // channel [ meta, [ fastqs ] ] + ch_gtf // channel [ meta, path_gtf ] + ch_fasta // channel [ meta, path_fasta ] + ch_starindex_ref // channel [ meta, path_index ] + ch_arriba_ref_blacklist // channel [ meta, path_blacklist ] + ch_arriba_ref_known_fusions // channel [ meta, path_known_fusions ] + ch_arriba_ref_cytobands // channel [ meta, path_cytobands ] + ch_arriba_ref_protein_domains // channel [ meta, path_proteins ] + arriba // boolean + all // boolean + fusioninspector_only // boolean + star_ignore_sjdbgtf // boolean + seq_center // string + arriba_fusions // path? + cram // array? + + main: + ch_versions = Channel.empty() + ch_dummy_file = file("$baseDir/assets/dummy_file_arriba.txt", checkIfExists: true) + + if (( arriba || all) && !fusioninspector_only) { + + STAR_FOR_ARRIBA( + reads, + ch_starindex_ref, + ch_gtf, + star_ignore_sjdbgtf, + '', // seq_platform, should be a params like other pipelines + seq_center + ) + + ch_versions = ch_versions.mix(STAR_FOR_ARRIBA.out.versions) + + if ( arriba_fusions ) { + + ch_arriba_fusions = reads.combine( Channel.value( file( arriba_fusions, checkIfExists: true ) ) ) // This should be done in the main scirpt? + .map { meta, reads, fusions -> [ meta, fusions ] } + ch_arriba_fusion_fail = ch_dummy_file + + } else { + + ARRIBA_ARRIBA ( + STAR_FOR_ARRIBA.out.bam, + ch_fasta, + ch_gtf, + ch_arriba_ref_blacklist.map{ it[1] }, // should we update nf-core module to includes meta? + ch_arriba_ref_known_fusions.map{ it[1] }, + ch_arriba_ref_cytobands.map{ it[1] }, + ch_arriba_ref_protein_domains.map{ it[1] } + ) + + ch_versions = ch_versions.mix(ARRIBA_ARRIBA.out.versions) + + ch_arriba_fusions = ARRIBA_ARRIBA.out.fusions + ch_arriba_fusion_fail = ARRIBA_ARRIBA.out.fusions_fail.map{ meta, file -> return file} + } + + if ( cram.contains('arriba') ) { + + SAMTOOLS_SORT_FOR_ARRIBA(STAR_FOR_ARRIBA.out.bam, ch_fasta) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_FOR_ARRIBA.out.versions ) + + SAMTOOLS_VIEW_FOR_ARRIBA(SAMTOOLS_SORT_FOR_ARRIBA.out.bam.map { meta, bam -> [ meta, bam, [] ] }, ch_fasta, []) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_ARRIBA.out.versions ) + + SAMTOOLS_INDEX_FOR_ARRIBA(SAMTOOLS_VIEW_FOR_ARRIBA.out.cram) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_ARRIBA.out.versions ) + + } + + } + else { + ch_arriba_fusions = reads.combine(Channel.value( file(ch_dummy_file, checkIfExists:true ) ) ) + .map { meta, reads, fusions -> [ meta, fusions ] } + + ch_arriba_fusion_fail = ch_dummy_file + } + + emit: + fusions = ch_arriba_fusions + fusions_fail = ch_arriba_fusion_fail + versions = ch_versions + } + diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test b/subworkflows/local/arriba_workflow/tests/main.nf.test new file mode 100644 index 00000000..73f91ea2 --- /dev/null +++ b/subworkflows/local/arriba_workflow/tests/main.nf.test @@ -0,0 +1,136 @@ +nextflow_workflow { + + name "Test Subworkflow ARRIBA_WORKFLOW" + script "../main.nf" + //config "./nextflow.config" + workflow "ARRIBA_WORKFLOW" + tag "subworkflow" + tag "arriba" + tag "arriba/arriba" + tag "samtools" + tag "samtools/index" + tag "samtools/sort" + tag "samtools/view" + tag "star" + tag "star/align" + + + test("ARRIBA_WORKFLOW - Homo sapiens chr22") { + + when { + setup { + // Create genome index for STAR + run("STAR_GENOMEGENERATE") { + script "../../../../modules/nf-core/star/genomegenerate/main" + process { + """ + // FASTA + input[0] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExist: true + ) + .map{ [[id: it.name], it ]} + + + // GTF + input[1] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [[id: it.name], it ]} + """ + } + } + } + + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [ id: "test_fastqs" ], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_1.fq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_2.fq.gz", checkIfExists: true) + ] + ] ) + + // ch_gtf + input[1] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_fasta + input[2] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExist: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_starindex_ref + input[3] = STAR_GENOMEGENERATE.out.index.map{ [ [id: "star_index"], it ] } + + // ch_arriba_ref_blacklist + input[4] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExist: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_known_fusions + input[5] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExist: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_cytobands + input[6] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", checkIfExist: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_protein_domains + input[7] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", checkIfExist: true + ) + .map{ [ [ id: it.name ], it ] } + + // arriba (boolean) + input[8] = true + + // all (boolean) + input[9] = true + + // fusioninspector_only (boolean) + input[10] = false + + // star_ignore_sjdbgtf (boolean) + input[11] = false + + // seq_center (string) + input[12] = 'test_center' + + // arriba_fusions (path) + input[13] = null + + // cram (array??) + input[14] = [ 'arriba' ] + """ + } + } + + then { + assertAll( + { assert workflow.success }/*, + { assert snapshot(file( workflow.out.versions[0] )).match('versions' ) }, + { assert snapshot(file( workflow.out.rnaseq_metrics[0][1] ).readLines()[4..-1]).md5().match('rnaseq_metrics' ) }, + { assert snapshot(file( workflow.out.duplicate_metrics[0][1] ).readLines()[4..-1]).md5().match('duplicate_metrics' ) }, + { assert snapshot(file( workflow.out.insertsize_metrics[0][1] ).readLines()[4..-1]).md5().match('insertsize_metrics') }*/ + ) + } + } + +} diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index bafea68c..a457a1e3 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -92,7 +92,15 @@ workflow RNAFUSION { ch_starindex_ensembl_ref, ch_arriba_ref_blacklist, ch_arriba_ref_known_fusions, - ch_arriba_ref_protein_domains + ch_arriba_ref_cytobands, + ch_arriba_ref_protein_domains, + params.arriba // boolean + params.all // boolean + params.fusioninspector_only // boolean + params.star_ignore_sjdbgtf, + params.seq_center ?: '', + params.arriba_fusions, // path + params.cram // array? ) ch_versions = ch_versions.mix(ARRIBA_WORKFLOW.out.versions) From a6c082ef6eb4fb0bb050b4fb7a1f2d6fa99c8cdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= Date: Fri, 6 Dec 2024 18:46:23 +0000 Subject: [PATCH 2/9] update test and generate first snap --- subworkflows/local/arriba_workflow/main.nf | 17 +++-- .../local/arriba_workflow/tests/main.nf.test | 67 +++++++++---------- .../arriba_workflow/tests/main.nf.test.snap | 49 ++++++++++++++ 3 files changed, 91 insertions(+), 42 deletions(-) create mode 100644 subworkflows/local/arriba_workflow/tests/main.nf.test.snap diff --git a/subworkflows/local/arriba_workflow/main.nf b/subworkflows/local/arriba_workflow/main.nf index a19fe86b..5460e926 100644 --- a/subworkflows/local/arriba_workflow/main.nf +++ b/subworkflows/local/arriba_workflow/main.nf @@ -60,7 +60,7 @@ workflow ARRIBA_WORKFLOW { ch_versions = ch_versions.mix(ARRIBA_ARRIBA.out.versions) ch_arriba_fusions = ARRIBA_ARRIBA.out.fusions - ch_arriba_fusion_fail = ARRIBA_ARRIBA.out.fusions_fail.map{ meta, file -> return file} + ch_arriba_fusion_fail = ARRIBA_ARRIBA.out.fusions_fail.map{ meta, file -> return file } } if ( cram.contains('arriba') ) { @@ -78,15 +78,18 @@ workflow ARRIBA_WORKFLOW { } else { - ch_arriba_fusions = reads.combine(Channel.value( file(ch_dummy_file, checkIfExists:true ) ) ) - .map { meta, reads, fusions -> [ meta, fusions ] } + // Not sure how this dummy file can be useful + // If this tool can be skipped, why not just emitting an empty channel? + ch_arriba_fusions = reads + .combine(Channel.value( file(ch_dummy_file, checkIfExists: true ) ) ) + .map { meta, reads, fusions -> [ meta, fusions ] } - ch_arriba_fusion_fail = ch_dummy_file + ch_arriba_fusion_fail = ch_dummy_file } emit: - fusions = ch_arriba_fusions - fusions_fail = ch_arriba_fusion_fail - versions = ch_versions + fusions = ch_arriba_fusions + fusions_fail = ch_arriba_fusion_fail + versions = ch_versions } diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test b/subworkflows/local/arriba_workflow/tests/main.nf.test index 73f91ea2..1e713fbb 100644 --- a/subworkflows/local/arriba_workflow/tests/main.nf.test +++ b/subworkflows/local/arriba_workflow/tests/main.nf.test @@ -2,7 +2,6 @@ nextflow_workflow { name "Test Subworkflow ARRIBA_WORKFLOW" script "../main.nf" - //config "./nextflow.config" workflow "ARRIBA_WORKFLOW" tag "subworkflow" tag "arriba" @@ -12,35 +11,35 @@ nextflow_workflow { tag "samtools/sort" tag "samtools/view" tag "star" + tag "star/genomegenerate" tag "star/align" + test("ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4") { - test("ARRIBA_WORKFLOW - Homo sapiens chr22") { - - when { - setup { - // Create genome index for STAR - run("STAR_GENOMEGENERATE") { - script "../../../../modules/nf-core/star/genomegenerate/main" - process { - """ - // FASTA - input[0] = Channel.fromPath( - "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExist: true - ) - .map{ [[id: it.name], it ]} - - - // GTF - input[1] = Channel.fromPath( - "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true - ) - .map{ [[id: it.name], it ]} - """ - } + setup { + // Create genome index for STAR + run("STAR_GENOMEGENERATE") { + script "../../../../modules/nf-core/star/genomegenerate/main" + process { + """ + // FASTA + input[0] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + //"/workspace/rnafusion/results/test.fa", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + + // GTF + input[1] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + """ } } + } + when { workflow { """ // ch_reads @@ -63,38 +62,39 @@ nextflow_workflow { // ch_fasta input[2] = Channel.fromPath( - "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExist: true + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + //"/workspace/rnafusion/results/test.fa", checkIfExists: true ) .map{ [ [ id: it.name ], it ] } // ch_starindex_ref - input[3] = STAR_GENOMEGENERATE.out.index.map{ [ [id: "star_index"], it ] } + input[3] = STAR_GENOMEGENERATE.out.index // ch_arriba_ref_blacklist input[4] = Channel.fromPath( - "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExist: true + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true ) .map{ [ [ id: it.name ], it ] } // ch_arriba_ref_known_fusions input[5] = Channel.fromPath( - "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExist: true + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true ) .map{ [ [ id: it.name ], it ] } // ch_arriba_ref_cytobands input[6] = Channel.fromPath( - "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", checkIfExist: true + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", checkIfExists: true ) .map{ [ [ id: it.name ], it ] } // ch_arriba_ref_protein_domains input[7] = Channel.fromPath( - "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", checkIfExist: true + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", checkIfExists: true ) .map{ [ [ id: it.name ], it ] } @@ -124,11 +124,8 @@ nextflow_workflow { then { assertAll( - { assert workflow.success }/*, - { assert snapshot(file( workflow.out.versions[0] )).match('versions' ) }, - { assert snapshot(file( workflow.out.rnaseq_metrics[0][1] ).readLines()[4..-1]).md5().match('rnaseq_metrics' ) }, - { assert snapshot(file( workflow.out.duplicate_metrics[0][1] ).readLines()[4..-1]).md5().match('duplicate_metrics' ) }, - { assert snapshot(file( workflow.out.insertsize_metrics[0][1] ).readLines()[4..-1]).md5().match('insertsize_metrics') }*/ + { assert workflow.success }, + { assert snapshot(workflow.out).match() } ) } } diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test.snap b/subworkflows/local/arriba_workflow/tests/main.nf.test.snap new file mode 100644 index 00000000..f7b6a8db --- /dev/null +++ b/subworkflows/local/arriba_workflow/tests/main.nf.test.snap @@ -0,0 +1,49 @@ +{ + "ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4": { + "content": [ + { + "0": [ + [ + { + "id": "test_fastqs" + }, + "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d" + ] + ], + "1": [ + "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98" + ], + "2": [ + "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6", + "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9", + "versions.yml:md5,85458747b55f37c1a5afd39ee7a3a4aa", + "versions.yml:md5,bfc5d96804f2991c7f7c705f1ddf81ec", + "versions.yml:md5,f4f64d3f1fd867d5afa51e03f7cf2824" + ], + "fusions": [ + [ + { + "id": "test_fastqs" + }, + "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d" + ] + ], + "fusions_fail": [ + "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98" + ], + "versions": [ + "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6", + "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9", + "versions.yml:md5,85458747b55f37c1a5afd39ee7a3a4aa", + "versions.yml:md5,bfc5d96804f2991c7f7c705f1ddf81ec", + "versions.yml:md5,f4f64d3f1fd867d5afa51e03f7cf2824" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-06T18:40:22.919178866" + } +} \ No newline at end of file From 55b0562c864ed88347bc6d1005c06f604a0ad4dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= Date: Mon, 9 Dec 2024 11:49:03 +0000 Subject: [PATCH 3/9] update tests after fixing fasta in github --- subworkflows/local/arriba_workflow/main.nf | 5 ++--- subworkflows/local/arriba_workflow/tests/main.nf.test | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/arriba_workflow/main.nf b/subworkflows/local/arriba_workflow/main.nf index 5460e926..fbffb770 100644 --- a/subworkflows/local/arriba_workflow/main.nf +++ b/subworkflows/local/arriba_workflow/main.nf @@ -41,7 +41,7 @@ workflow ARRIBA_WORKFLOW { if ( arriba_fusions ) { - ch_arriba_fusions = reads.combine( Channel.value( file( arriba_fusions, checkIfExists: true ) ) ) // This should be done in the main scirpt? + ch_arriba_fusions = reads.combine( Channel.value( file( arriba_fusions, checkIfExists: true ) ) ) // Should this be done in the main script? .map { meta, reads, fusions -> [ meta, fusions ] } ch_arriba_fusion_fail = ch_dummy_file @@ -76,8 +76,7 @@ workflow ARRIBA_WORKFLOW { } - } - else { + } else { // Not sure how this dummy file can be useful // If this tool can be skipped, why not just emitting an empty channel? ch_arriba_fusions = reads diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test b/subworkflows/local/arriba_workflow/tests/main.nf.test index 1e713fbb..3fa816ea 100644 --- a/subworkflows/local/arriba_workflow/tests/main.nf.test +++ b/subworkflows/local/arriba_workflow/tests/main.nf.test @@ -25,7 +25,6 @@ nextflow_workflow { // FASTA input[0] = Channel.fromPath( "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true - //"/workspace/rnafusion/results/test.fa", checkIfExists: true ) .map{ [[id: it.getName() ], it ]} @@ -63,7 +62,6 @@ nextflow_workflow { input[2] = Channel.fromPath( "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true - //"/workspace/rnafusion/results/test.fa", checkIfExists: true ) .map{ [ [ id: it.name ], it ] } From 55db5b52281e11465a16de7396fbd3731907896a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= Date: Mon, 9 Dec 2024 12:49:05 +0000 Subject: [PATCH 4/9] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 335b84c3..7883f2c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add nf-test to local module: `FUSIONREPORT_DOWNLOAD` [#560](https://github.com/nf-core/rnafusion/pull/560) - Add nf-test to local subworkflow: `QC_WORKFLOW` [#568](https://github.com/nf-core/rnafusion/pull/568) - Add nf-test to local subworkflow: `TRIM_WORKFLOW` [#572](https://github.com/nf-core/rnafusion/pull/572) +- Add nf-test to local subworkflow: `ARRIBA_WORKFLOW` [#578](https://github.com/nf-core/rnafusion/pull/578) ### Changed From df269a91c1fce57e706107c8bc4a8f039f1a9ebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= Date: Mon, 9 Dec 2024 13:37:33 +0000 Subject: [PATCH 5/9] update test excluding samtools --- subworkflows/local/arriba_workflow/main.nf | 18 ++- .../local/arriba_workflow/tests/main.nf.test | 120 +++++++++++++++++- .../arriba_workflow/tests/main.nf.test.snap | 49 ------- 3 files changed, 131 insertions(+), 56 deletions(-) delete mode 100644 subworkflows/local/arriba_workflow/tests/main.nf.test.snap diff --git a/subworkflows/local/arriba_workflow/main.nf b/subworkflows/local/arriba_workflow/main.nf index fbffb770..4e30c785 100644 --- a/subworkflows/local/arriba_workflow/main.nf +++ b/subworkflows/local/arriba_workflow/main.nf @@ -19,11 +19,12 @@ workflow ARRIBA_WORKFLOW { fusioninspector_only // boolean star_ignore_sjdbgtf // boolean seq_center // string - arriba_fusions // path? - cram // array? + arriba_fusions // path + cram // array main: - ch_versions = Channel.empty() + ch_versions = Channel.empty() + ch_bam_index = Channel.empty() ch_dummy_file = file("$baseDir/assets/dummy_file_arriba.txt", checkIfExists: true) if (( arriba || all) && !fusioninspector_only) { @@ -74,6 +75,10 @@ workflow ARRIBA_WORKFLOW { SAMTOOLS_INDEX_FOR_ARRIBA(SAMTOOLS_VIEW_FOR_ARRIBA.out.cram) ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_ARRIBA.out.versions ) + // Join bam and index files + SAMTOOLS_SORT_FOR_ARRIBA.out.bam + .join(SAMTOOLS_INDEX_FOR_ARRIBA.out.cram) + .set{ ch_bam_index } } } else { @@ -87,8 +92,9 @@ workflow ARRIBA_WORKFLOW { } emit: - fusions = ch_arriba_fusions - fusions_fail = ch_arriba_fusion_fail - versions = ch_versions + fusions = ch_arriba_fusions // channel [ meta, path_fusions ] + fusions_fail = ch_arriba_fusion_fail // channel [ path, fusions_failed ] + bam_index = ch_bam_index // channel [ meta, bam, index ] + versions = ch_versions // channel [ versions ] } diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test b/subworkflows/local/arriba_workflow/tests/main.nf.test index 3fa816ea..9a9514d7 100644 --- a/subworkflows/local/arriba_workflow/tests/main.nf.test +++ b/subworkflows/local/arriba_workflow/tests/main.nf.test @@ -14,6 +14,8 @@ nextflow_workflow { tag "star/genomegenerate" tag "star/align" + + // Test #1 Indexing test("ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4") { setup { @@ -114,7 +116,7 @@ nextflow_workflow { // arriba_fusions (path) input[13] = null - // cram (array??) + // cram (array) input[14] = [ 'arriba' ] """ } @@ -128,4 +130,120 @@ nextflow_workflow { } } + + // TEST #2 WITHOUT INDEXING + test("ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4 - cram = []") { + + setup { + // Create genome index for STAR + run("STAR_GENOMEGENERATE") { + script "../../../../modules/nf-core/star/genomegenerate/main" + process { + """ + // FASTA + input[0] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + + // GTF + input[1] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + """ + } + } + } + + when { + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [ id: "test_fastqs" ], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_1.fq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_2.fq.gz", checkIfExists: true) + ] + ] ) + + // ch_gtf + input[1] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_fasta + input[2] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_starindex_ref + input[3] = STAR_GENOMEGENERATE.out.index + + // ch_arriba_ref_blacklist + input[4] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_known_fusions + input[5] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_cytobands + input[6] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_protein_domains + input[7] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // arriba (boolean) + input[8] = true + + // all (boolean) + input[9] = true + + // fusioninspector_only (boolean) + input[10] = false + + // star_ignore_sjdbgtf (boolean) + input[11] = false + + // seq_center (string) + input[12] = 'test_center' + + // arriba_fusions (path) + input[13] = null + + // cram (array) + input[14] = [ ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + } diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test.snap b/subworkflows/local/arriba_workflow/tests/main.nf.test.snap deleted file mode 100644 index f7b6a8db..00000000 --- a/subworkflows/local/arriba_workflow/tests/main.nf.test.snap +++ /dev/null @@ -1,49 +0,0 @@ -{ - "ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4": { - "content": [ - { - "0": [ - [ - { - "id": "test_fastqs" - }, - "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d" - ] - ], - "1": [ - "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98" - ], - "2": [ - "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6", - "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9", - "versions.yml:md5,85458747b55f37c1a5afd39ee7a3a4aa", - "versions.yml:md5,bfc5d96804f2991c7f7c705f1ddf81ec", - "versions.yml:md5,f4f64d3f1fd867d5afa51e03f7cf2824" - ], - "fusions": [ - [ - { - "id": "test_fastqs" - }, - "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d" - ] - ], - "fusions_fail": [ - "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98" - ], - "versions": [ - "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6", - "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9", - "versions.yml:md5,85458747b55f37c1a5afd39ee7a3a4aa", - "versions.yml:md5,bfc5d96804f2991c7f7c705f1ddf81ec", - "versions.yml:md5,f4f64d3f1fd867d5afa51e03f7cf2824" - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-06T18:40:22.919178866" - } -} \ No newline at end of file From aecc7ef189c360c9944005838401d9c54b212efe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= Date: Mon, 9 Dec 2024 18:27:07 +0000 Subject: [PATCH 6/9] update test to include cram files --- subworkflows/local/arriba_workflow/main.nf | 10 +- .../local/arriba_workflow/tests/main.nf.test | 4 +- .../arriba_workflow/tests/main.nf.test.snap | 114 ++++++++++++++++++ tests/test_build.nf.test.snap | 4 +- tests/test_cosmic.nf.test.snap | 0 workflows/rnafusion.nf | 14 +-- 6 files changed, 129 insertions(+), 17 deletions(-) create mode 100644 subworkflows/local/arriba_workflow/tests/main.nf.test.snap create mode 100644 tests/test_cosmic.nf.test.snap diff --git a/subworkflows/local/arriba_workflow/main.nf b/subworkflows/local/arriba_workflow/main.nf index 4e30c785..28a0a042 100644 --- a/subworkflows/local/arriba_workflow/main.nf +++ b/subworkflows/local/arriba_workflow/main.nf @@ -24,7 +24,7 @@ workflow ARRIBA_WORKFLOW { main: ch_versions = Channel.empty() - ch_bam_index = Channel.empty() + ch_cram_index = Channel.empty() ch_dummy_file = file("$baseDir/assets/dummy_file_arriba.txt", checkIfExists: true) if (( arriba || all) && !fusioninspector_only) { @@ -75,10 +75,8 @@ workflow ARRIBA_WORKFLOW { SAMTOOLS_INDEX_FOR_ARRIBA(SAMTOOLS_VIEW_FOR_ARRIBA.out.cram) ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_ARRIBA.out.versions ) - // Join bam and index files - SAMTOOLS_SORT_FOR_ARRIBA.out.bam - .join(SAMTOOLS_INDEX_FOR_ARRIBA.out.cram) - .set{ ch_bam_index } + // Join cram and index files + ch_cram_index = SAMTOOLS_VIEW_FOR_ARRIBA.out.cram.join(SAMTOOLS_INDEX_FOR_ARRIBA.out.crai) } } else { @@ -94,7 +92,7 @@ workflow ARRIBA_WORKFLOW { emit: fusions = ch_arriba_fusions // channel [ meta, path_fusions ] fusions_fail = ch_arriba_fusion_fail // channel [ path, fusions_failed ] - bam_index = ch_bam_index // channel [ meta, bam, index ] + cram_index = ch_cram_index // channel [ meta, cram, crai ] versions = ch_versions // channel [ versions ] } diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test b/subworkflows/local/arriba_workflow/tests/main.nf.test index 9a9514d7..96786a5b 100644 --- a/subworkflows/local/arriba_workflow/tests/main.nf.test +++ b/subworkflows/local/arriba_workflow/tests/main.nf.test @@ -21,7 +21,7 @@ nextflow_workflow { setup { // Create genome index for STAR run("STAR_GENOMEGENERATE") { - script "../../../../modules/nf-core/star/genomegenerate/main" + script "../../../../modules/nf-core/star/genomegenerate/main.nf" process { """ // FASTA @@ -137,7 +137,7 @@ nextflow_workflow { setup { // Create genome index for STAR run("STAR_GENOMEGENERATE") { - script "../../../../modules/nf-core/star/genomegenerate/main" + script "../../../../modules/nf-core/star/genomegenerate/main.nf" process { """ // FASTA diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test.snap b/subworkflows/local/arriba_workflow/tests/main.nf.test.snap new file mode 100644 index 00000000..3aa777e8 --- /dev/null +++ b/subworkflows/local/arriba_workflow/tests/main.nf.test.snap @@ -0,0 +1,114 @@ +{ + "ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4": { + "content": [ + { + "0": [ + [ + { + "id": "test_fastqs" + }, + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/da/0878f3451a0f8db34ffd9b178974e6/test_fastqs.arriba.fusions.tsv" + ] + ], + "1": [ + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/da/0878f3451a0f8db34ffd9b178974e6/test_fastqs.arriba.fusions.discarded.tsv" + ], + "2": [ + [ + { + "id": "test_fastqs" + }, + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/c8/482130c0382c0d0318b9e98d0650da/test_fastqs_star_for_arriba_sorted.cram", + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/b5/57dffeefbc4801b0f7c4af2c8530e6/test_fastqs_star_for_arriba_sorted.cram.crai" + ] + ], + "3": [ + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/da/0878f3451a0f8db34ffd9b178974e6/versions.yml", + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/5b/18880ae27c2497bddad57c4b7ba766/versions.yml", + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/b5/57dffeefbc4801b0f7c4af2c8530e6/versions.yml", + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/c8/482130c0382c0d0318b9e98d0650da/versions.yml", + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/48/58af6204cc601a1a0e6e826a117f31/versions.yml" + ], + "cram_index": [ + [ + { + "id": "test_fastqs" + }, + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/c8/482130c0382c0d0318b9e98d0650da/test_fastqs_star_for_arriba_sorted.cram", + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/b5/57dffeefbc4801b0f7c4af2c8530e6/test_fastqs_star_for_arriba_sorted.cram.crai" + ] + ], + "fusions": [ + [ + { + "id": "test_fastqs" + }, + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/da/0878f3451a0f8db34ffd9b178974e6/test_fastqs.arriba.fusions.tsv" + ] + ], + "fusions_fail": [ + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/da/0878f3451a0f8db34ffd9b178974e6/test_fastqs.arriba.fusions.discarded.tsv" + ], + "versions": [ + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/da/0878f3451a0f8db34ffd9b178974e6/versions.yml", + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/5b/18880ae27c2497bddad57c4b7ba766/versions.yml", + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/b5/57dffeefbc4801b0f7c4af2c8530e6/versions.yml", + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/c8/482130c0382c0d0318b9e98d0650da/versions.yml", + "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/48/58af6204cc601a1a0e6e826a117f31/versions.yml" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-09T18:02:58.142697295" + }, + "ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4 - cram = []": { + "content": [ + { + "0": [ + [ + { + "id": "test_fastqs" + }, + "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d" + ] + ], + "1": [ + "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98" + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6", + "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9" + ], + "cram_index": [ + + ], + "fusions": [ + [ + { + "id": "test_fastqs" + }, + "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d" + ] + ], + "fusions_fail": [ + "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98" + ], + "versions": [ + "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6", + "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-09T18:15:28.965848873" + } +} \ No newline at end of file diff --git a/tests/test_build.nf.test.snap b/tests/test_build.nf.test.snap index 3b5441dc..28c64818 100644 --- a/tests/test_build.nf.test.snap +++ b/tests/test_build.nf.test.snap @@ -34,7 +34,7 @@ "Homo_sapiens.GRCh38.102.gtf:md5,defac755cd9aa4e82ec33398c27745ef", "Homo_sapiens.GRCh38.102.refflat:md5,ef095e13743811c31d44752c32e9673e", "Homo_sapiens.GRCh38.102_rrna_intervals.gtf.interval_list:md5,0abf61877f65247b15c438d605d85599", - "hgnc_complete_set.txt:md5,34f4bd879d972e511fcb0318df9775b6", + "hgnc_complete_set.txt:md5,a563a2f8432ec0ab7d3dc74d769102b8", "Homo_sapiens.GRCh38.102_rrna_intervals.gtf:md5,744bf505deb50837b15441e808cad345" ] ], @@ -42,6 +42,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-04T17:55:45.068780109" + "timestamp": "2024-12-09T18:20:09.914393513" } } \ No newline at end of file diff --git a/tests/test_cosmic.nf.test.snap b/tests/test_cosmic.nf.test.snap new file mode 100644 index 00000000..e69de29b diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index a457a1e3..0908b30c 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -94,13 +94,13 @@ workflow RNAFUSION { ch_arriba_ref_known_fusions, ch_arriba_ref_cytobands, ch_arriba_ref_protein_domains, - params.arriba // boolean - params.all // boolean - params.fusioninspector_only // boolean - params.star_ignore_sjdbgtf, - params.seq_center ?: '', - params.arriba_fusions, // path - params.cram // array? + params.arriba, // boolean + params.all, // boolean + params.fusioninspector_only, // boolean + params.star_ignore_sjdbgtf, // boolean + params.seq_center ?: '', // string + params.arriba_fusions, // path + params.cram // array ) ch_versions = ch_versions.mix(ARRIBA_WORKFLOW.out.versions) From 9d87a377b35816d4a1ae14246ebc25458464cf1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= Date: Mon, 9 Dec 2024 18:58:19 +0000 Subject: [PATCH 7/9] remove original script --- subworkflows/local/arriba_workflow.nf.bkp | 64 ----------------------- 1 file changed, 64 deletions(-) delete mode 100644 subworkflows/local/arriba_workflow.nf.bkp diff --git a/subworkflows/local/arriba_workflow.nf.bkp b/subworkflows/local/arriba_workflow.nf.bkp deleted file mode 100644 index ccc88823..00000000 --- a/subworkflows/local/arriba_workflow.nf.bkp +++ /dev/null @@ -1,64 +0,0 @@ -include { ARRIBA_ARRIBA } from '../../modules/nf-core/arriba/arriba/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_ARRIBA} from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_SORT as SAMTOOLS_SORT_FOR_ARRIBA} from '../../modules/nf-core/samtools/sort/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_ARRIBA} from '../../modules/nf-core/samtools/view/main' -include { STAR_ALIGN as STAR_FOR_ARRIBA } from '../../modules/nf-core/star/align/main' - -workflow ARRIBA_WORKFLOW { - take: - reads - ch_gtf - ch_fasta - ch_starindex_ref - ch_arriba_ref_blacklist - ch_arriba_ref_known_fusions - ch_arriba_ref_protein_domains - - main: - ch_versions = Channel.empty() - ch_dummy_file = file("$baseDir/assets/dummy_file_arriba.txt", checkIfExists: true) - - if ((params.arriba || params.all) && !params.fusioninspector_only) { - - STAR_FOR_ARRIBA( reads, ch_starindex_ref, ch_gtf, params.star_ignore_sjdbgtf, '', params.seq_center ?: '') - ch_versions = ch_versions.mix(STAR_FOR_ARRIBA.out.versions) - - if (params.arriba_fusions) { - ch_arriba_fusions = reads.combine( Channel.value( file( params.arriba_fusions, checkIfExists: true ) ) ) - .map { meta, reads, fusions -> [ meta, fusions ] } - ch_arriba_fusion_fail = ch_dummy_file - } else { - ARRIBA_ARRIBA ( STAR_FOR_ARRIBA.out.bam, ch_fasta, ch_gtf, ch_arriba_ref_blacklist, ch_arriba_ref_known_fusions, [[],[]], ch_arriba_ref_protein_domains ) - ch_versions = ch_versions.mix(ARRIBA_ARRIBA.out.versions) - - ch_arriba_fusions = ARRIBA_ARRIBA.out.fusions - ch_arriba_fusion_fail = ARRIBA_ARRIBA.out.fusions_fail.map{ meta, file -> return file} - } - - if (params.cram.contains('arriba') ){ - - SAMTOOLS_SORT_FOR_ARRIBA(STAR_FOR_ARRIBA.out.bam, ch_fasta) - ch_versions = ch_versions.mix(SAMTOOLS_SORT_FOR_ARRIBA.out.versions ) - - SAMTOOLS_VIEW_FOR_ARRIBA(SAMTOOLS_SORT_FOR_ARRIBA.out.bam.map { meta, bam -> [ meta, bam, [] ] }, ch_fasta, []) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_ARRIBA.out.versions ) - - SAMTOOLS_INDEX_FOR_ARRIBA(SAMTOOLS_VIEW_FOR_ARRIBA.out.cram) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_ARRIBA.out.versions ) - - } - - } - else { - ch_arriba_fusions = reads.combine(Channel.value( file(ch_dummy_file, checkIfExists:true ) ) ) - .map { meta, reads, fusions -> [ meta, fusions ] } - - ch_arriba_fusion_fail = ch_dummy_file - } - - emit: - fusions = ch_arriba_fusions - fusions_fail = ch_arriba_fusion_fail - versions = ch_versions - } - From 3a45cbbcdf5081372ed98aa2a10b8783725418a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= Date: Mon, 9 Dec 2024 19:03:43 +0000 Subject: [PATCH 8/9] fix changelog --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92390113..5a7ecea7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add nf-test to local module: `FUSIONREPORT_DETECT`. Improve `FUSIONREPORT_DOWNLOAD` module [#572](https://github.com/nf-core/rnafusion/pull/577) - Add nf-test to local subworkflow: `ARRIBA_WORKFLOW` [#578](https://github.com/nf-core/rnafusion/pull/578) - ### Changed - Updated modules and migrated non-specific modules to nf-core/modules [#484](https://github.com/nf-core/rnafusion/pull/484) From 4fd46f4e97034c94957e6140112d07965461426b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= Date: Wed, 11 Dec 2024 18:19:10 +0000 Subject: [PATCH 9/9] PR changes --- subworkflows/local/arriba_workflow/main.nf | 11 +- .../local/arriba_workflow/tests/main.nf.test | 147 +++++++++++++++++- .../arriba_workflow/tests/main.nf.test.snap | 135 +++++----------- workflows/rnafusion.nf | 5 + 4 files changed, 193 insertions(+), 105 deletions(-) diff --git a/subworkflows/local/arriba_workflow/main.nf b/subworkflows/local/arriba_workflow/main.nf index 28a0a042..5f59916a 100644 --- a/subworkflows/local/arriba_workflow/main.nf +++ b/subworkflows/local/arriba_workflow/main.nf @@ -27,14 +27,14 @@ workflow ARRIBA_WORKFLOW { ch_cram_index = Channel.empty() ch_dummy_file = file("$baseDir/assets/dummy_file_arriba.txt", checkIfExists: true) - if (( arriba || all) && !fusioninspector_only) { + if (( arriba || all ) && !fusioninspector_only) { STAR_FOR_ARRIBA( reads, ch_starindex_ref, ch_gtf, star_ignore_sjdbgtf, - '', // seq_platform, should be a params like other pipelines + '', seq_center ) @@ -42,7 +42,7 @@ workflow ARRIBA_WORKFLOW { if ( arriba_fusions ) { - ch_arriba_fusions = reads.combine( Channel.value( file( arriba_fusions, checkIfExists: true ) ) ) // Should this be done in the main script? + ch_arriba_fusions = reads.combine( Channel.value( file( arriba_fusions, checkIfExists: true ) ) ) .map { meta, reads, fusions -> [ meta, fusions ] } ch_arriba_fusion_fail = ch_dummy_file @@ -52,7 +52,7 @@ workflow ARRIBA_WORKFLOW { STAR_FOR_ARRIBA.out.bam, ch_fasta, ch_gtf, - ch_arriba_ref_blacklist.map{ it[1] }, // should we update nf-core module to includes meta? + ch_arriba_ref_blacklist.map{ it[1] }, ch_arriba_ref_known_fusions.map{ it[1] }, ch_arriba_ref_cytobands.map{ it[1] }, ch_arriba_ref_protein_domains.map{ it[1] } @@ -80,8 +80,7 @@ workflow ARRIBA_WORKFLOW { } } else { - // Not sure how this dummy file can be useful - // If this tool can be skipped, why not just emitting an empty channel? + ch_arriba_fusions = reads .combine(Channel.value( file(ch_dummy_file, checkIfExists: true ) ) ) .map { meta, reads, fusions -> [ meta, fusions ] } diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test b/subworkflows/local/arriba_workflow/tests/main.nf.test index 96786a5b..e49a3768 100644 --- a/subworkflows/local/arriba_workflow/tests/main.nf.test +++ b/subworkflows/local/arriba_workflow/tests/main.nf.test @@ -125,13 +125,146 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot(workflow.out).match() } + { with(workflow.out) { + assert snapshot( + file(fusions[0][1]), + file(fusions_fail[0]), + file(cram_index[0][1]).name, + file(cram_index[0][2]).name, + versions.collect{ file(it) } + ).match() + } + } ) } } - // TEST #2 WITHOUT INDEXING + // Test #2 With arriba_fusions file + test("ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4 - External fusion file") { + + setup { + // Create genome index for STAR + run("STAR_GENOMEGENERATE") { + script "../../../../modules/nf-core/star/genomegenerate/main.nf" + process { + """ + // FASTA + input[0] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + + // GTF + input[1] = Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [[id: it.getName() ], it ]} + """ + } + } + } + + when { + workflow { + """ + // ch_reads + input[0] = Channel.of( + [ + [ id: "test_fastqs" ], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_1.fq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_2.fq.gz", checkIfExists: true) + ] + ] ) + + // ch_gtf + input[1] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_fasta + input[2] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_starindex_ref + input[3] = STAR_GENOMEGENERATE.out.index + + // ch_arriba_ref_blacklist + input[4] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_known_fusions + input[5] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_cytobands + input[6] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // ch_arriba_ref_protein_domains + input[7] = + Channel.fromPath( + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", checkIfExists: true + ) + .map{ [ [ id: it.name ], it ] } + + // arriba (boolean) + input[8] = true + + // all (boolean) + input[9] = true + + // fusioninspector_only (boolean) + input[10] = false + + // star_ignore_sjdbgtf (boolean) + input[11] = false + + // seq_center (string) + input[12] = 'test_center' + + // arriba_fusions (string path) + input[13] = "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/test_fastqs.arriba.fusions.tsv" + + // cram (array) + input[14] = [ 'arriba' ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { with(workflow.out) { + assert snapshot( + fusions[0].size() == 2, + fusions_fail.size() == 1, + file(cram_index[0][1]).name, + file(cram_index[0][2]).name, + versions.collect{ file(it) } + ).match() + } + } + ) + } + } + + // TEST #3 WITHOUT INDEXING test("ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4 - cram = []") { setup { @@ -241,7 +374,15 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot(workflow.out).match() } + { with(workflow.out) { + assert snapshot( + file(fusions[0][1]), + file(fusions_fail[0]), + cram_index.size() == 0, + versions.collect{ file(it) } + ).match() + } + } ) } } diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test.snap b/subworkflows/local/arriba_workflow/tests/main.nf.test.snap index 3aa777e8..2057827a 100644 --- a/subworkflows/local/arriba_workflow/tests/main.nf.test.snap +++ b/subworkflows/local/arriba_workflow/tests/main.nf.test.snap @@ -1,114 +1,57 @@ { + "ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4 - External fusion file": { + "content": [ + true, + true, + "test_fastqs_star_for_arriba_sorted.cram", + "test_fastqs_star_for_arriba_sorted.cram.crai", + [ + "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9", + "versions.yml:md5,85458747b55f37c1a5afd39ee7a3a4aa", + "versions.yml:md5,bfc5d96804f2991c7f7c705f1ddf81ec", + "versions.yml:md5,f4f64d3f1fd867d5afa51e03f7cf2824" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-11T15:53:59.18258718" + }, "ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4": { "content": [ - { - "0": [ - [ - { - "id": "test_fastqs" - }, - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/da/0878f3451a0f8db34ffd9b178974e6/test_fastqs.arriba.fusions.tsv" - ] - ], - "1": [ - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/da/0878f3451a0f8db34ffd9b178974e6/test_fastqs.arriba.fusions.discarded.tsv" - ], - "2": [ - [ - { - "id": "test_fastqs" - }, - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/c8/482130c0382c0d0318b9e98d0650da/test_fastqs_star_for_arriba_sorted.cram", - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/b5/57dffeefbc4801b0f7c4af2c8530e6/test_fastqs_star_for_arriba_sorted.cram.crai" - ] - ], - "3": [ - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/da/0878f3451a0f8db34ffd9b178974e6/versions.yml", - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/5b/18880ae27c2497bddad57c4b7ba766/versions.yml", - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/b5/57dffeefbc4801b0f7c4af2c8530e6/versions.yml", - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/c8/482130c0382c0d0318b9e98d0650da/versions.yml", - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/48/58af6204cc601a1a0e6e826a117f31/versions.yml" - ], - "cram_index": [ - [ - { - "id": "test_fastqs" - }, - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/c8/482130c0382c0d0318b9e98d0650da/test_fastqs_star_for_arriba_sorted.cram", - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/b5/57dffeefbc4801b0f7c4af2c8530e6/test_fastqs_star_for_arriba_sorted.cram.crai" - ] - ], - "fusions": [ - [ - { - "id": "test_fastqs" - }, - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/da/0878f3451a0f8db34ffd9b178974e6/test_fastqs.arriba.fusions.tsv" - ] - ], - "fusions_fail": [ - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/da/0878f3451a0f8db34ffd9b178974e6/test_fastqs.arriba.fusions.discarded.tsv" - ], - "versions": [ - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/da/0878f3451a0f8db34ffd9b178974e6/versions.yml", - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/5b/18880ae27c2497bddad57c4b7ba766/versions.yml", - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/b5/57dffeefbc4801b0f7c4af2c8530e6/versions.yml", - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/c8/482130c0382c0d0318b9e98d0650da/versions.yml", - "/workspace/rnafusion/.nf-test/tests/7899df1159e4d52047799028a4a24a9a/work/48/58af6204cc601a1a0e6e826a117f31/versions.yml" - ] - } + "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d", + "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98", + "test_fastqs_star_for_arriba_sorted.cram", + "test_fastqs_star_for_arriba_sorted.cram.crai", + [ + "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6", + "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9", + "versions.yml:md5,85458747b55f37c1a5afd39ee7a3a4aa", + "versions.yml:md5,bfc5d96804f2991c7f7c705f1ddf81ec", + "versions.yml:md5,f4f64d3f1fd867d5afa51e03f7cf2824" + ] ], "meta": { "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-09T18:02:58.142697295" + "timestamp": "2024-12-11T15:43:48.053656601" }, "ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4 - cram = []": { "content": [ - { - "0": [ - [ - { - "id": "test_fastqs" - }, - "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d" - ] - ], - "1": [ - "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98" - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6", - "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9" - ], - "cram_index": [ - - ], - "fusions": [ - [ - { - "id": "test_fastqs" - }, - "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d" - ] - ], - "fusions_fail": [ - "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98" - ], - "versions": [ - "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6", - "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9" - ] - } + "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d", + "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98", + true, + [ + "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6", + "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9" + ] ], "meta": { "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-09T18:15:28.965848873" + "timestamp": "2024-12-11T16:07:37.079418154" } } \ No newline at end of file diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 0908b30c..0c8a6e5f 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -85,6 +85,11 @@ workflow RNAFUSION { // // SUBWORKFLOW: Run STAR alignment and Arriba // + + // TODO: add params.seq_platform and pass it as argument to arriba_workflow + // TODO: improve how params.arriba_fusions would avoid running arriba module. Maybe inputed from samplesheet? + // TODO: same as above, but with ch_arriba_fusion_fail. It's currently replaces by a dummy file + ARRIBA_WORKFLOW ( ch_reads_all, ch_gtf,