nf-core · alanmmobbs93 · Dec 11, 2024 · Dec 6, 2024 · Dec 6, 2024 · Dec 9, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add nf-test to local module: `FUSIONREPORT_DOWNLOAD` [#560](https://github.com/nf-core/rnafusion/pull/560)
 - Add nf-test to local subworkflow: `QC_WORKFLOW` [#568](https://github.com/nf-core/rnafusion/pull/568)
 - Add nf-test to local subworkflow: `TRIM_WORKFLOW` [#572](https://github.com/nf-core/rnafusion/pull/572)
+- Add nf-test to local subworkflow: `ARRIBA_WORKFLOW` [#578](https://github.com/nf-core/rnafusion/pull/578)
 
 ### Changed
 

diff --git a/subworkflows/local/arriba_workflow.nf → subworkflows/local/arriba_workflow.nf.bkp b/subworkflows/local/arriba_workflow.nf → subworkflows/local/arriba_workflow.nf.bkp
diff --git a/subworkflows/local/arriba_workflow/main.nf b/subworkflows/local/arriba_workflow/main.nf
@@ -0,0 +1,94 @@
+include { ARRIBA_ARRIBA                               } from '../../../modules/nf-core/arriba/arriba/main'
+include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_FOR_ARRIBA } from '../../../modules/nf-core/samtools/index/main'
+include { SAMTOOLS_SORT as SAMTOOLS_SORT_FOR_ARRIBA   } from '../../../modules/nf-core/samtools/sort/main'
+include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_ARRIBA   } from '../../../modules/nf-core/samtools/view/main'
+include { STAR_ALIGN as STAR_FOR_ARRIBA               } from '../../../modules/nf-core/star/align/main'
+
+workflow ARRIBA_WORKFLOW {
+    take:
+        reads                           // channel [ meta, [ fastqs ]         ]
+        ch_gtf                          // channel [ meta, path_gtf           ]
+        ch_fasta                        // channel [ meta, path_fasta         ]
+        ch_starindex_ref                // channel [ meta, path_index         ]
+        ch_arriba_ref_blacklist         // channel [ meta, path_blacklist     ]
+        ch_arriba_ref_known_fusions     // channel [ meta, path_known_fusions ]
+        ch_arriba_ref_cytobands         // channel [ meta, path_cytobands     ]
+        ch_arriba_ref_protein_domains   // channel [ meta, path_proteins      ]
+        arriba                          // boolean
+        all                             // boolean
+        fusioninspector_only            // boolean
+        star_ignore_sjdbgtf             // boolean
+        seq_center                      // string
+        arriba_fusions                  // path?
+        cram                            // array?
+
+    main:
+        ch_versions = Channel.empty()
+        ch_dummy_file = file("$baseDir/assets/dummy_file_arriba.txt", checkIfExists: true)
+
+        if (( arriba || all) && !fusioninspector_only) {
+
+            STAR_FOR_ARRIBA(
+                reads,
+                ch_starindex_ref,
+                ch_gtf,
+                star_ignore_sjdbgtf,
+                '',                       // seq_platform, should be a params like other pipelines
+                seq_center
+            )
+
+            ch_versions = ch_versions.mix(STAR_FOR_ARRIBA.out.versions)
+
+            if ( arriba_fusions ) {
+
+                ch_arriba_fusions = reads.combine( Channel.value( file( arriba_fusions, checkIfExists: true ) ) ) // Should this be done in the main script?
+                    .map { meta, reads, fusions -> [ meta, fusions ] }
+                ch_arriba_fusion_fail = ch_dummy_file
+
+            } else {
+
+                ARRIBA_ARRIBA (
+                    STAR_FOR_ARRIBA.out.bam,
+                    ch_fasta,
+                    ch_gtf,
+                    ch_arriba_ref_blacklist.map{ it[1] },       // should we update nf-core module to includes meta?
+                    ch_arriba_ref_known_fusions.map{ it[1] },
+                    ch_arriba_ref_cytobands.map{ it[1] },
+                    ch_arriba_ref_protein_domains.map{ it[1] }
+                )
+
+                ch_versions = ch_versions.mix(ARRIBA_ARRIBA.out.versions)
+
+                ch_arriba_fusions     = ARRIBA_ARRIBA.out.fusions
+                ch_arriba_fusion_fail = ARRIBA_ARRIBA.out.fusions_fail.map{ meta, file -> return file }
+            }
+
+            if ( cram.contains('arriba') ) {
+
+                SAMTOOLS_SORT_FOR_ARRIBA(STAR_FOR_ARRIBA.out.bam, ch_fasta)
+                ch_versions = ch_versions.mix(SAMTOOLS_SORT_FOR_ARRIBA.out.versions )
+
+                SAMTOOLS_VIEW_FOR_ARRIBA(SAMTOOLS_SORT_FOR_ARRIBA.out.bam.map { meta, bam -> [ meta, bam, [] ] }, ch_fasta, [])
+                ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_ARRIBA.out.versions )
+
+                SAMTOOLS_INDEX_FOR_ARRIBA(SAMTOOLS_VIEW_FOR_ARRIBA.out.cram)
+                ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_ARRIBA.out.versions )
+
+            }
+
+        } else {
+            // Not sure how this dummy file can be useful
+            // If this tool can be skipped, why not just emitting an empty channel?
+            ch_arriba_fusions = reads
+                .combine(Channel.value( file(ch_dummy_file, checkIfExists: true ) ) )
+                .map { meta, reads, fusions -> [ meta, fusions ] }
+
+            ch_arriba_fusion_fail = ch_dummy_file
+        }
+
+    emit:
+        fusions      = ch_arriba_fusions
+        fusions_fail = ch_arriba_fusion_fail
+        versions     = ch_versions
+    }
+
diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test b/subworkflows/local/arriba_workflow/tests/main.nf.test
@@ -0,0 +1,131 @@
+nextflow_workflow {
+
+    name "Test Subworkflow ARRIBA_WORKFLOW"
+    script "../main.nf"
+    workflow "ARRIBA_WORKFLOW"
+    tag "subworkflow"
+    tag "arriba"
+    tag "arriba/arriba"
+    tag "samtools"
+    tag "samtools/index"
+    tag "samtools/sort"
+    tag "samtools/view"
+    tag "star"
+    tag "star/genomegenerate"
+    tag "star/align"
+
+    test("ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4") {
+
+        setup {
+            // Create genome index for STAR
+            run("STAR_GENOMEGENERATE") {
+                script "../../../../modules/nf-core/star/genomegenerate/main"
+                process {
+                """
+                    // FASTA
+                    input[0] = Channel.fromPath(
+                            "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true
+                        )
+                        .map{ [[id: it.getName() ], it ]}
+
+                    // GTF
+                    input[1] = Channel.fromPath(
+                            "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true
+                        )
+                        .map{ [[id: it.getName() ], it ]}
+                    """
+                }
+            }
+        }
+
+        when {
+            workflow {
+                """
+                // ch_reads
+                input[0] = Channel.of(
+                    [
+                        [ id: "test_fastqs" ],
+                        [
+                            file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_1.fq.gz", checkIfExists: true),
+                            file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/human/reads_2.fq.gz", checkIfExists: true)
+                        ]
+                    ] )
+
+                // ch_gtf
+                input[1] =
+                    Channel.fromPath(
+                        "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf", checkIfExists: true
+                    )
+                    .map{ [ [ id: it.name ], it ] }
+
+                // ch_fasta
+                input[2] =
+                    Channel.fromPath(
+                        "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa", checkIfExists: true
+                    )
+                    .map{ [ [ id: it.name ], it ] }
+
+                // ch_starindex_ref
+                input[3] = STAR_GENOMEGENERATE.out.index
+
+                // ch_arriba_ref_blacklist
+                input[4] =
+                    Channel.fromPath(
+                        "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true
+                    )
+                    .map{ [ [ id: it.name ], it ] }
+
+                // ch_arriba_ref_known_fusions
+                input[5] =
+                    Channel.fromPath(
+                        "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", checkIfExists: true
+                    )
+                    .map{ [ [ id: it.name ], it ] }
+
+                // ch_arriba_ref_cytobands
+                input[6] =
+                    Channel.fromPath(
+                        "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", checkIfExists: true
+                    )
+                    .map{ [ [ id: it.name ], it ] }
+
+                // ch_arriba_ref_protein_domains
+                input[7] =
+                    Channel.fromPath(
+                        "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", checkIfExists: true
+                    )
+                    .map{ [ [ id: it.name ], it ] }
+
+                // arriba (boolean)
+                input[8] = true
+
+                // all (boolean)
+                input[9] = true
+
+                // fusioninspector_only  (boolean)
+                input[10] = false
+
+                // star_ignore_sjdbgtf (boolean)
+                input[11] = false
+
+                // seq_center (string)
+                input[12] = 'test_center'
+
+                // arriba_fusions (path)
+                input[13] = null
+
+                // cram (array??)
+                input[14] = [ 'arriba' ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(workflow.out).match() }
+            )
+        }
+    }
+
+}
diff --git a/subworkflows/local/arriba_workflow/tests/main.nf.test.snap b/subworkflows/local/arriba_workflow/tests/main.nf.test.snap
@@ -0,0 +1,49 @@
+{
+    "ARRIBA_WORKFLOW - Homo sapiens - FASTQs chr4": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test_fastqs"
+                        },
+                        "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d"
+                    ]
+                ],
+                "1": [
+                    "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98"
+                ],
+                "2": [
+                    "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6",
+                    "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9",
+                    "versions.yml:md5,85458747b55f37c1a5afd39ee7a3a4aa",
+                    "versions.yml:md5,bfc5d96804f2991c7f7c705f1ddf81ec",
+                    "versions.yml:md5,f4f64d3f1fd867d5afa51e03f7cf2824"
+                ],
+                "fusions": [
+                    [
+                        {
+                            "id": "test_fastqs"
+                        },
+                        "test_fastqs.arriba.fusions.tsv:md5,8f39789c4428e81eb9a8d0e54c34c43d"
+                    ]
+                ],
+                "fusions_fail": [
+                    "test_fastqs.arriba.fusions.discarded.tsv:md5,b804c1ed5b01d34163f5c0b2f6810f98"
+                ],
+                "versions": [
+                    "versions.yml:md5,24030f38976402fad0861e6ec99ee6b6",
+                    "versions.yml:md5,439bbb92ff0a83f1e278fc396e9d8ce9",
+                    "versions.yml:md5,85458747b55f37c1a5afd39ee7a3a4aa",
+                    "versions.yml:md5,bfc5d96804f2991c7f7c705f1ddf81ec",
+                    "versions.yml:md5,f4f64d3f1fd867d5afa51e03f7cf2824"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.10.2"
+        },
+        "timestamp": "2024-12-06T18:40:22.919178866"
+    }
+}
diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf
@@ -92,7 +92,15 @@ workflow RNAFUSION {
         ch_starindex_ensembl_ref,
         ch_arriba_ref_blacklist,
         ch_arriba_ref_known_fusions,
-        ch_arriba_ref_protein_domains
+        ch_arriba_ref_cytobands,
+        ch_arriba_ref_protein_domains,
+        params.arriba                   // boolean
+        params.all                      // boolean
+        params.fusioninspector_only     // boolean
+        params.star_ignore_sjdbgtf,
+        params.seq_center ?: '',
+        params.arriba_fusions,          // path
+        params.cram                     // array?
     )
     ch_versions = ch_versions.mix(ARRIBA_WORKFLOW.out.versions)