From d19f77aa275ae87cb8509d3d5b1c0f0eff8442c8 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <noronhaa@mskcc.org>
Date: Fri, 20 Sep 2024 12:43:28 -0400
Subject: [PATCH 01/28] updating GRCh38 references

---
 conf/igenomes.config | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/conf/igenomes.config b/conf/igenomes.config
index c618acc..ec6d5b0 100644
--- a/conf/igenomes.config
+++ b/conf/igenomes.config
@@ -34,11 +34,14 @@ params {
             ensembl_version = 75
         }
         'GRCh38' {
-            fasta          = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa"
-            gtf            = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf"
-            refflat        = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes.gencode/refFlat.txt.gz"
-            starfusion_url = "https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_StarFv1.10/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz"
-            cdna           = "https://ftp.ensembl.org/pub/release-86/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"
+            ensembl_version      = 88
+            fasta                = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38Decoy/Sequence/WholeGenomeFasta/genome.fa"
+            gtf                  = "https://ftp.ensembl.org/pub/release-88/gtf/homo_sapiens/Homo_sapiens.GRCh38.88.chr.gtf.gz"
+            //forte will generate refflat from gtf
+            refflat              = null
+            starfusion_url       = "https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_StarFv1.10/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz"
+            cdna                 = "https://ftp.ensembl.org/pub/release-88/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"
+            metafusion_blocklist = "https://raw.githubusercontent.com/anoronh4/forte-references/main/GRCh38/blocklist_breakpoints.hg38.bedpe.gz"
         }
         'smallGRCh37' {
             fasta          = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta"
@@ -48,7 +51,6 @@ params {
             cdna           = "http://ftp.ensemblgenomes.org/pub/viruses/fasta/sars_cov_2/cdna/Sars_cov_2.ASM985889v3.cdna.all.fa.gz"
             metafusion_blocklist = "https://raw.githubusercontent.com/anoronh4/forte-references/main/GRCh37_test/blocklist_breakpoints.bedpe"
             ensembl_version = 75
-
         }
 /*
         'hg38' {

From f8069a297344bcf0451c1f4bdcf4d1a21be8ea7b Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Mon, 23 Sep 2024 12:51:27 -0400
Subject: [PATCH 02/28] fix gtf channel for GRCh38 mode

---
 subworkflows/local/align_reads.nf        |  2 +-
 subworkflows/local/fusion.nf             |  4 ++--
 subworkflows/local/prepare_references.nf | 14 +++++++-------
 subworkflows/local/quantification.nf     |  6 +++---
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/subworkflows/local/align_reads.nf b/subworkflows/local/align_reads.nf
index 8e5c410..8f50d57 100644
--- a/subworkflows/local/align_reads.nf
+++ b/subworkflows/local/align_reads.nf
@@ -19,7 +19,7 @@ workflow ALIGN_READS {
     STAR_ALIGN(
         reads,
         star_index,
-        gtf,
+        gtf.map{it[1]},
         false,
         [],
         []
diff --git a/subworkflows/local/fusion.nf b/subworkflows/local/fusion.nf
index 0539428..6f8578c 100644
--- a/subworkflows/local/fusion.nf
+++ b/subworkflows/local/fusion.nf
@@ -41,7 +41,7 @@ workflow FUSION {
     STAR_FOR_ARRIBA(
         reads,
         star_index,
-        gtf,
+        gtf.map{it[1]},
         false,
         [],
         []
@@ -51,7 +51,7 @@ workflow FUSION {
     ARRIBA(
         STAR_FOR_ARRIBA.out.bam,
         fasta,
-        gtf,
+        gtf.map{it[1]},
         arriba_blacklist,
         arriba_known_fusions,
         [],
diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf
index b1d06a8..a4abdd8 100644
--- a/subworkflows/local/prepare_references.nf
+++ b/subworkflows/local/prepare_references.nf
@@ -25,10 +25,10 @@ workflow PREPARE_REFERENCES {
     ch_versions = Channel.empty()
 
     if (params.gtf.endsWith(".gz")){
-        GUNZIP_GTF([[:],params.gtf])
-        gtf = GUNZIP_GTF.out.gunzip.map{ it[1] }.first()
+        GUNZIP_GTF([[id:params.genome],params.gtf])
+        gtf = GUNZIP_GTF.out.gunzip.first()
     } else {
-        gtf = params.gtf
+        gtf = Channel.of([[id:params.genome],params.gtf])
     }
 
     if (params.metafusion_blocklist.endsWith(".gz")){
@@ -38,11 +38,11 @@ workflow PREPARE_REFERENCES {
         metafusion_blocklist = params.metafusion_blocklist
     }
 
-    STAR_GENOMEGENERATE(params.fasta,gtf)
+    STAR_GENOMEGENERATE(params.fasta,gtf.map{it[1] }.first())
     ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
     star_index = STAR_GENOMEGENERATE.out.index
 
-    UCSC_GTFTOGENEPRED(Channel.value(gtf).map{[[id:params.genome],it]})
+    UCSC_GTFTOGENEPRED(gtf)
     ch_versions = ch_versions.mix(UCSC_GTFTOGENEPRED.out.versions)
 
     UCSC_GENEPREDTOBED(UCSC_GTFTOGENEPRED.out.genepred)
@@ -85,7 +85,7 @@ workflow PREPARE_REFERENCES {
     ARRIBA_DOWNLOAD()
 
     AGAT_SPADDINTRONS(
-        [[:],gtf],
+        gtf,
         []
     )
 
@@ -94,7 +94,7 @@ workflow PREPARE_REFERENCES {
     )
 
     METAFUSION_GENEINFO(
-        [[:],gtf], starfusion_ref, fusioncatcher_ref
+        gtf,starfusion_ref, fusioncatcher_ref
     )
 
     AGFUSION_DOWNLOAD(
diff --git a/subworkflows/local/quantification.nf b/subworkflows/local/quantification.nf
index 6a1c543..43321b5 100644
--- a/subworkflows/local/quantification.nf
+++ b/subworkflows/local/quantification.nf
@@ -19,14 +19,14 @@ workflow QUANTIFICATION {
 
     HTSEQ_COUNT(
         bam.join(bai,by:[0]),
-        gtf
+        gtf.map{it[1]}
     )
     ch_versions   = ch_versions.mix(HTSEQ_COUNT.out.versions)
 
 
     FEATURECOUNTS_GENE(
         bam,
-        gtf
+        gtf.map{it[1]}
     )
     ch_versions = ch_versions.mix(FEATURECOUNTS_GENE.out.versions)
 
@@ -40,7 +40,7 @@ workflow QUANTIFICATION {
 
     COUNT_FEATURES(
         KALLISTO_QUANT.out.abundance,
-        gtf
+        gtf.map{it[1]}
     )
 
 

From 16dd74ebbb4a5a061bf783758a0067edb5917631 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Mon, 23 Sep 2024 13:28:14 -0400
Subject: [PATCH 03/28] update star/genomegenerate

---
 modules.json                                  |   2 +-
 .../star/genomegenerate/environment.yml       |   9 ++
 modules/nf-core/star/genomegenerate/main.nf   |  97 +++++++-----
 modules/nf-core/star/genomegenerate/meta.yml  |  47 ++++--
 .../star/genomegenerate/tests/main.nf.test    | 114 ++++++++++++++
 .../genomegenerate/tests/main.nf.test.snap    | 148 ++++++++++++++++++
 .../star/genomegenerate/tests/tags.yml        |   2 +
 subworkflows/local/prepare_references.nf      |   5 +-
 8 files changed, 371 insertions(+), 53 deletions(-)
 create mode 100644 modules/nf-core/star/genomegenerate/environment.yml
 create mode 100644 modules/nf-core/star/genomegenerate/tests/main.nf.test
 create mode 100644 modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/star/genomegenerate/tests/tags.yml

diff --git a/modules.json b/modules.json
index 2be0262..13b9239 100644
--- a/modules.json
+++ b/modules.json
@@ -154,7 +154,7 @@
                     },
                     "star/genomegenerate": {
                         "branch": "master",
-                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "subread/featurecounts": {
diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml
new file mode 100644
index 0000000..1debc4c
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/environment.yml
@@ -0,0 +1,9 @@
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - bioconda::htslib=1.18
+  - bioconda::samtools=1.18
+  - bioconda::star=2.7.10a
+  - conda-forge::gawk=5.1.0
diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf
index 9146248..b885571 100644
--- a/modules/nf-core/star/genomegenerate/main.nf
+++ b/modules/nf-core/star/genomegenerate/main.nf
@@ -2,26 +2,27 @@ process STAR_GENOMEGENERATE {
     tag "$fasta"
     label 'process_high'
 
-    conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' :
-        'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }"
+        'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' :
+        'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }"
 
     input:
-    path fasta
-    path gtf
+    tuple val(meta), path(fasta)
+    tuple val(meta2), path(gtf)
 
     output:
-    path "star"        , emit: index
-    path "versions.yml", emit: versions
+    tuple val(meta), path("star")  , emit: index
+    path "versions.yml"            , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
-    def args_list = args.tokenize()
-    def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
+    def args        = task.ext.args ?: ''
+    def args_list   = args.tokenize()
+    def memory      = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
+    def include_gtf = gtf ? "--sjdbGTFfile $gtf" : ''
     if (args_list.contains('--genomeSAindexNbases')) {
         """
         mkdir star
@@ -29,7 +30,7 @@ process STAR_GENOMEGENERATE {
             --runMode genomeGenerate \\
             --genomeDir star/ \\
             --genomeFastaFiles $fasta \\
-            --sjdbGTFfile $gtf \\
+            $include_gtf \\
             --runThreadN $task.cpus \\
             $memory \\
             $args
@@ -51,7 +52,7 @@ process STAR_GENOMEGENERATE {
             --runMode genomeGenerate \\
             --genomeDir star/ \\
             --genomeFastaFiles $fasta \\
-            --sjdbGTFfile $gtf \\
+            $include_gtf \\
             --runThreadN $task.cpus \\
             --genomeSAindexNbases \$NUM_BASES \\
             $memory \\
@@ -67,30 +68,52 @@ process STAR_GENOMEGENERATE {
     }
 
     stub:
-    """
-    mkdir star
-    touch star/Genome
-    touch star/Log.out
-    touch star/SA
-    touch star/SAindex
-    touch star/chrLength.txt
-    touch star/chrName.txt
-    touch star/chrNameLength.txt
-    touch star/chrStart.txt
-    touch star/exonGeTrInfo.tab
-    touch star/exonInfo.tab
-    touch star/geneInfo.tab
-    touch star/genomeParameters.txt
-    touch star/sjdbInfo.txt
-    touch star/sjdbList.fromGTF.out.tab
-    touch star/sjdbList.out.tab
-    touch star/transcriptInfo.tab
+    if (gtf) {
+        """
+        mkdir star
+        touch star/Genome
+        touch star/Log.out
+        touch star/SA
+        touch star/SAindex
+        touch star/chrLength.txt
+        touch star/chrName.txt
+        touch star/chrNameLength.txt
+        touch star/chrStart.txt
+        touch star/exonGeTrInfo.tab
+        touch star/exonInfo.tab
+        touch star/geneInfo.tab
+        touch star/genomeParameters.txt
+        touch star/sjdbInfo.txt
+        touch star/sjdbList.fromGTF.out.tab
+        touch star/sjdbList.out.tab
+        touch star/transcriptInfo.tab
 
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        star: \$(STAR --version | sed -e "s/STAR_//g")
-        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
-        gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
-    END_VERSIONS
-    """
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            star: \$(STAR --version | sed -e "s/STAR_//g")
+            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+            gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
+        END_VERSIONS
+        """
+    } else {
+        """
+        mkdir star
+        touch star/Genome
+        touch star/Log.out
+        touch star/SA
+        touch star/SAindex
+        touch star/chrLength.txt
+        touch star/chrName.txt
+        touch star/chrNameLength.txt
+        touch star/chrStart.txt
+        touch star/genomeParameters.txt
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            star: \$(STAR --version | sed -e "s/STAR_//g")
+            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+            gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
+        END_VERSIONS
+        """
+    }
 }
diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml
index 8181157..33c1f65 100644
--- a/modules/nf-core/star/genomegenerate/meta.yml
+++ b/modules/nf-core/star/genomegenerate/meta.yml
@@ -14,24 +14,43 @@ tools:
       manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf
       doi: 10.1093/bioinformatics/bts635
       licence: ["MIT"]
+      identifier: biotools:star
 input:
-  - fasta:
-      type: file
-      description: Fasta file of the reference genome
-  - gtf:
-      type: file
-      description: GTF file of the reference genome
-
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - fasta:
+        type: file
+        description: Fasta file of the reference genome
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'test' ]
+    - gtf:
+        type: file
+        description: GTF file of the reference genome
 output:
   - index:
-      type: directory
-      description: Folder containing the star index files
-      pattern: "star"
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - star:
+          type: directory
+          description: Folder containing the star index files
+          pattern: "star"
   - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
 authors:
   - "@kevinmenden"
   - "@drpatelh"
+maintainers:
+  - "@kevinmenden"
+  - "@drpatelh"
diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test
new file mode 100644
index 0000000..4d619c4
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test
@@ -0,0 +1,114 @@
+nextflow_process {
+
+    name "Test Process STAR_GENOMEGENERATE"
+    script "../main.nf"
+    process "STAR_GENOMEGENERATE"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "star"
+    tag "star/genomegenerate"
+
+    test("fasta_gtf") {
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test_fasta' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                ])
+                input[1] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString(),
+                    process.out.versions)
+                .match() }
+            )
+        }
+    }
+
+    test("fasta") {
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test_fasta' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                ])
+                input[1] = Channel.of([ [], [] ])
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString(),
+                    process.out.versions
+                ).match() }
+            )
+        }
+    }
+
+    test("fasta_gtf_stub") {
+
+        options '-stub'
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test_fasta' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                ])
+                input[1] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("fasta_stub") {
+
+        options '-stub'
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test_fasta' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                ])
+                input[1] = Channel.of([ [], [] ])
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
new file mode 100644
index 0000000..207f4b4
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
@@ -0,0 +1,148 @@
+{
+    "fasta_gtf": {
+        "content": [
+            "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]",
+            [
+                "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T14:55:35.478401"
+    },
+    "fasta_gtf_stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test_fasta"
+                        },
+                        [
+                            "Genome:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "SA:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
+                ],
+                "index": [
+                    [
+                        {
+                            "id": "test_fasta"
+                        },
+                        [
+                            "Genome:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "SA:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T14:55:57.247585"
+    },
+    "fasta_stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test_fasta"
+                        },
+                        [
+                            "Genome:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "SA:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
+                ],
+                "index": [
+                    [
+                        {
+                            "id": "test_fasta"
+                        },
+                        [
+                            "Genome:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "SA:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T14:56:07.01742"
+    },
+    "fasta": {
+        "content": [
+            "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]",
+            [
+                "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T14:55:45.48784"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml
new file mode 100644
index 0000000..79f619b
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/tests/tags.yml
@@ -0,0 +1,2 @@
+star/genomegenerate:
+  - modules/nf-core/star/genomegenerate/**
diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf
index a4abdd8..938652a 100644
--- a/subworkflows/local/prepare_references.nf
+++ b/subworkflows/local/prepare_references.nf
@@ -38,7 +38,10 @@ workflow PREPARE_REFERENCES {
         metafusion_blocklist = params.metafusion_blocklist
     }
 
-    STAR_GENOMEGENERATE(params.fasta,gtf.map{it[1] }.first())
+    STAR_GENOMEGENERATE(
+        [[id:params.genome],params.fasta],
+        gtf
+    )
     ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
     star_index = STAR_GENOMEGENERATE.out.index
 

From e30de977fb7fb6752f1ec393116bde1c15a8172a Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Wed, 25 Sep 2024 17:31:11 -0400
Subject: [PATCH 04/28] adjustments to make GRCh38 run through

---
 conf/igenomes.config                          |    5 +-
 conf/modules.config                           |    2 +-
 modules.json                                  |   16 +-
 modules/nf-core/arriba/arriba/environment.yml |    5 +
 modules/nf-core/arriba/arriba/main.nf         |   68 +
 modules/nf-core/arriba/arriba/meta.yml        |  123 +
 .../gatk4/bedtointervallist/environment.yml   |    5 +
 .../nf-core/gatk4/bedtointervallist/main.nf   |   15 +-
 .../nf-core/gatk4/bedtointervallist/meta.yml  |   60 +-
 .../bedtointervallist/tests/main.nf.test      |   38 +
 .../bedtointervallist/tests/main.nf.test.snap |   35 +
 .../gatk4/bedtointervallist/tests/tags.yml    |    2 +
 .../createsequencedictionary/environment.yml  |    5 +
 .../gatk4/createsequencedictionary/main.nf    |   19 +-
 .../gatk4/createsequencedictionary/meta.yml   |   55 +-
 .../tests/main.nf.test                        |   56 +
 .../tests/main.nf.test.snap                   |   68 +
 .../createsequencedictionary/tests/tags.yml   |    2 +
 .../nf-core/samtools/faidx/environment.yml    |    7 +
 modules/nf-core/samtools/faidx/main.nf        |   22 +-
 modules/nf-core/samtools/faidx/meta.yml       |   79 +-
 .../nf-core/samtools/faidx/tests/main.nf.test |  122 +
 .../samtools/faidx/tests/main.nf.test.snap    |  249 +++
 .../samtools/faidx/tests/nextflow.config      |    7 +
 .../samtools/faidx/tests/nextflow2.config     |    6 +
 modules/nf-core/samtools/faidx/tests/tags.yml |    2 +
 modules/nf-core/star/align/environment.yml    |    9 +
 modules/nf-core/star/align/main.nf            |   14 +-
 modules/nf-core/star/align/meta.yml           |  228 +-
 modules/nf-core/star/align/tests/main.nf.test |  609 +++++
 .../star/align/tests/main.nf.test.snap        | 1973 +++++++++++++++++
 .../star/align/tests/nextflow.arriba.config   |   14 +
 .../nf-core/star/align/tests/nextflow.config  |   14 +
 .../align/tests/nextflow.starfusion.config    |   14 +
 modules/nf-core/star/align/tests/tags.yml     |    2 +
 modules/nf-core/star/genomegenerate/main.nf   |    2 +-
 .../genomegenerate/star-genomegenerate.diff   |   20 +
 subworkflows/local/align_reads.nf             |    2 +-
 subworkflows/local/fillout.nf                 |    2 +-
 subworkflows/local/fusion.nf                  |   31 +-
 subworkflows/local/prepare_references.nf      |   15 +-
 subworkflows/local/qc.nf                      |    8 +-
 workflows/forte.nf                            |    5 +-
 43 files changed, 3854 insertions(+), 181 deletions(-)
 create mode 100644 modules/nf-core/arriba/arriba/environment.yml
 create mode 100644 modules/nf-core/arriba/arriba/main.nf
 create mode 100644 modules/nf-core/arriba/arriba/meta.yml
 create mode 100644 modules/nf-core/gatk4/bedtointervallist/environment.yml
 create mode 100644 modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test
 create mode 100644 modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/gatk4/bedtointervallist/tests/tags.yml
 create mode 100644 modules/nf-core/gatk4/createsequencedictionary/environment.yml
 create mode 100644 modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test
 create mode 100644 modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml
 create mode 100644 modules/nf-core/samtools/faidx/environment.yml
 create mode 100644 modules/nf-core/samtools/faidx/tests/main.nf.test
 create mode 100644 modules/nf-core/samtools/faidx/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/samtools/faidx/tests/nextflow.config
 create mode 100644 modules/nf-core/samtools/faidx/tests/nextflow2.config
 create mode 100644 modules/nf-core/samtools/faidx/tests/tags.yml
 create mode 100644 modules/nf-core/star/align/environment.yml
 create mode 100644 modules/nf-core/star/align/tests/main.nf.test
 create mode 100644 modules/nf-core/star/align/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/star/align/tests/nextflow.arriba.config
 create mode 100644 modules/nf-core/star/align/tests/nextflow.config
 create mode 100644 modules/nf-core/star/align/tests/nextflow.starfusion.config
 create mode 100644 modules/nf-core/star/align/tests/tags.yml
 create mode 100644 modules/nf-core/star/genomegenerate/star-genomegenerate.diff

diff --git a/conf/igenomes.config b/conf/igenomes.config
index ec6d5b0..a653ef7 100644
--- a/conf/igenomes.config
+++ b/conf/igenomes.config
@@ -35,8 +35,9 @@ params {
         }
         'GRCh38' {
             ensembl_version      = 88
-            fasta                = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38Decoy/Sequence/WholeGenomeFasta/genome.fa"
-            gtf                  = "https://ftp.ensembl.org/pub/release-88/gtf/homo_sapiens/Homo_sapiens.GRCh38.88.chr.gtf.gz"
+            //fasta                = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38Decoy/Sequence/WholeGenomeFasta/genome.fa"
+	    fasta                = "https://ftp.ensembl.org/pub/release-88/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
+            gtf                  = "https://ftp.ensembl.org/pub/release-88/gtf/homo_sapiens/Homo_sapiens.GRCh38.88.gtf.gz"
             //forte will generate refflat from gtf
             refflat              = null
             starfusion_url       = "https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_StarFv1.10/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz"
diff --git a/conf/modules.config b/conf/modules.config
index 9277358..5f15d4e 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -475,7 +475,7 @@ process {
         ]
     }
 
-    withName: ARRIBA {
+    withName: ARRIBA_ARRIBA {
         ext.args = {
             "-s ${meta.single_end || meta.strandedness == "forward" ? "yes" : meta.strandedness == "reverse" ? "reverse" : "no" }"
         }
diff --git a/modules.json b/modules.json
index 13b9239..5a728db 100644
--- a/modules.json
+++ b/modules.json
@@ -26,6 +26,11 @@
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
                         "installed_by": ["modules"]
                     },
+                    "arriba/arriba": {
+                        "branch": "master",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
+                        "installed_by": ["modules"]
+                    },
                     "cat/cat": {
                         "branch": "master",
                         "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
@@ -48,12 +53,12 @@
                     },
                     "gatk4/bedtointervallist": {
                         "branch": "master",
-                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "gatk4/createsequencedictionary": {
                         "branch": "master",
-                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "gunzip": {
@@ -134,7 +139,7 @@
                     },
                     "samtools/faidx": {
                         "branch": "master",
-                        "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "samtools/index": {
@@ -149,13 +154,14 @@
                     },
                     "star/align": {
                         "branch": "master",
-                        "git_sha": "57d75dbac06812c59798a48585032f6e50bb1914",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
                     "star/genomegenerate": {
                         "branch": "master",
                         "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
-                        "installed_by": ["modules"]
+                        "installed_by": ["modules"],
+                        "patch": "modules/nf-core/star/genomegenerate/star-genomegenerate.diff"
                     },
                     "subread/featurecounts": {
                         "branch": "master",
diff --git a/modules/nf-core/arriba/arriba/environment.yml b/modules/nf-core/arriba/arriba/environment.yml
new file mode 100644
index 0000000..d0883a0
--- /dev/null
+++ b/modules/nf-core/arriba/arriba/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::arriba=2.4.0
diff --git a/modules/nf-core/arriba/arriba/main.nf b/modules/nf-core/arriba/arriba/main.nf
new file mode 100644
index 0000000..761d0bf
--- /dev/null
+++ b/modules/nf-core/arriba/arriba/main.nf
@@ -0,0 +1,68 @@
+process ARRIBA_ARRIBA {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/arriba:2.4.0--h0033a41_2' :
+        'biocontainers/arriba:2.4.0--h0033a41_2' }"
+
+    input:
+    tuple val(meta), path(bam)
+    tuple val(meta2), path(fasta)
+    tuple val(meta3), path(gtf)
+    tuple val(meta4), path(blacklist)
+    tuple val(meta5), path(known_fusions)
+    tuple val(meta6), path(structural_variants)
+    tuple val(meta7), path(tags)
+    tuple val(meta8), path(protein_domains)
+
+    output:
+    tuple val(meta), path("*.fusions.tsv")          , emit: fusions
+    tuple val(meta), path("*.fusions.discarded.tsv"), emit: fusions_fail
+    path "versions.yml"                             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def blacklist = blacklist ? "-b $blacklist" : "-f blacklist"
+    def known_fusions = known_fusions ? "-k $known_fusions" : ""
+    def structural_variants = structural_variants ? "-d $structual_variants" : ""
+    def tags = tags ? "-t $tags" : ""
+    def protein_domains = protein_domains ? "-p $protein_domains" : ""
+
+    """
+    arriba \\
+        -x $bam \\
+        -a $fasta \\
+        -g $gtf \\
+        -o ${prefix}.fusions.tsv \\
+        -O ${prefix}.fusions.discarded.tsv \\
+        $blacklist \\
+        $known_fusions \\
+        $structural_variants \\
+        $tags \\
+        $protein_domains \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        arriba: \$(arriba -h | grep 'Version:' 2>&1 |  sed 's/Version:\s//')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    echo stub > ${prefix}.fusions.tsv
+    echo stub > ${prefix}.fusions.discarded.tsv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        arriba: \$(arriba -h | grep 'Version:' 2>&1 |  sed 's/Version:\s//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/arriba/arriba/meta.yml b/modules/nf-core/arriba/arriba/meta.yml
new file mode 100644
index 0000000..f230dda
--- /dev/null
+++ b/modules/nf-core/arriba/arriba/meta.yml
@@ -0,0 +1,123 @@
+name: arriba_arriba
+description: Arriba is a command-line tool for the detection of gene fusions from
+  RNA-Seq data.
+keywords:
+  - fusion
+  - arriba
+  - detection
+  - RNA-Seq
+tools:
+  - arriba:
+      description: Fast and accurate gene fusion detection from RNA-Seq data
+      homepage: https://github.com/suhrig/arriba
+      documentation: https://arriba.readthedocs.io/en/latest/
+      tool_dev_url: https://github.com/suhrig/arriba
+      doi: "10.1101/gr.257246.119"
+      licence: ["MIT"]
+      identifier: biotools:Arriba
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - bam:
+        type: file
+        description: BAM/CRAM/SAM file
+        pattern: "*.{bam,cram,sam}"
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'test' ]
+    - fasta:
+        type: file
+        description: Assembly FASTA file
+        pattern: "*.{fasta}"
+  - - meta3:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'test' ]
+    - gtf:
+        type: file
+        description: Annotation GTF file
+        pattern: "*.{gtf}"
+  - - meta4:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'test' ]
+    - blacklist:
+        type: file
+        description: Blacklist file
+        pattern: "*.{tsv}"
+  - - meta5:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'test' ]
+    - known_fusions:
+        type: file
+        description: Known fusions file
+        pattern: "*.{tsv}"
+  - - meta6:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'test' ]
+    - structural_variants:
+        type: file
+        description: Structural variants file
+        pattern: "*.{tsv}"
+  - - meta7:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'test' ]
+    - tags:
+        type: file
+        description: Tags file
+        pattern: "*.{tsv}"
+  - - meta8:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'test' ]
+    - protein_domains:
+        type: file
+        description: Protein domains file
+        pattern: "*.{gff3}"
+output:
+  - fusions:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.fusions.tsv":
+          type: file
+          description: File contains fusions which pass all of Arriba's filters.
+          pattern: "*.{fusions.tsv}"
+  - fusions_fail:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.fusions.discarded.tsv":
+          type: file
+          description: File contains fusions that Arriba classified as an artifact or
+            that are also observed in healthy tissue.
+          pattern: "*.{fusions.discarded.tsv}"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+authors:
+  - "@praveenraj2018"
+  - "@rannick"
+maintainers:
+  - "@praveenraj2018"
+  - "@rannick"
diff --git a/modules/nf-core/gatk4/bedtointervallist/environment.yml b/modules/nf-core/gatk4/bedtointervallist/environment.yml
new file mode 100644
index 0000000..55993f4
--- /dev/null
+++ b/modules/nf-core/gatk4/bedtointervallist/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::gatk4=4.5.0.0
diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf
index 41fab00..68863d6 100644
--- a/modules/nf-core/gatk4/bedtointervallist/main.nf
+++ b/modules/nf-core/gatk4/bedtointervallist/main.nf
@@ -2,14 +2,14 @@ process GATK4_BEDTOINTERVALLIST {
     tag "$meta.id"
     label 'process_medium'
 
-    conda "bioconda::gatk4=4.3.0.0"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/gatk4:4.3.0.0--py36hdfd78af_0':
-        'quay.io/biocontainers/gatk4:4.3.0.0--py36hdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0':
+        'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }"
 
     input:
     tuple val(meta), path(bed)
-    path  dict
+    tuple val(meta2), path(dict)
 
     output:
     tuple val(meta), path('*.interval_list'), emit: interval_list
@@ -22,14 +22,15 @@ process GATK4_BEDTOINTERVALLIST {
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
 
-    def avail_mem = 3
+    def avail_mem = 3072
     if (!task.memory) {
         log.info '[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
     } else {
-        avail_mem = task.memory.giga
+        avail_mem = (task.memory.mega*0.8).intValue()
     }
     """
-    gatk --java-options "-Xmx${avail_mem}g" BedToIntervalList \\
+    gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\
+        BedToIntervalList \\
         --INPUT $bed \\
         --OUTPUT ${prefix}.interval_list \\
         --SEQUENCE_DICTIONARY $dict \\
diff --git a/modules/nf-core/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml
index 986f159..25348e1 100644
--- a/modules/nf-core/gatk4/bedtointervallist/meta.yml
+++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml
@@ -2,6 +2,8 @@ name: gatk4_bedtointervallist
 description: Creates an interval list from a bed file and a reference dict
 keywords:
   - bed
+  - bedtointervallist
+  - gatk4
   - interval list
 tools:
   - gatk4:
@@ -13,28 +15,48 @@ tools:
       documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
       doi: 10.1158/1538-7445.AM2017-3590
       licence: ["Apache-2.0"]
+      identifier: ""
 input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test']
-  - bed:
-      type: file
-      description: Input bed file
-      pattern: "*.bed"
-  - dict:
-      type: file
-      description: Sequence dictionary
-      pattern: "*.dict"
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test']
+    - bed:
+        type: file
+        description: Input bed file
+        pattern: "*.bed"
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'genome' ]
+    - dict:
+        type: file
+        description: Sequence dictionary
+        pattern: "*.dict"
 output:
   - interval_list:
-      type: file
-      description: gatk interval list file
-      pattern: "*.interval_list"
+      - meta:
+          type: file
+          description: gatk interval list file
+          pattern: "*.interval_list"
+      - "*.interval_list":
+          type: file
+          description: gatk interval list file
+          pattern: "*.interval_list"
+      - _list:
+          type: file
+          description: gatk interval list file
+          pattern: "*.interval_list"
   - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
 authors:
   - "@kevinmenden"
+  - "@ramprasadn"
+maintainers:
+  - "@kevinmenden"
+  - "@ramprasadn"
diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test
new file mode 100644
index 0000000..2289f73
--- /dev/null
+++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test
@@ -0,0 +1,38 @@
+nextflow_process {
+
+    name "Test Process GATK4_BEDTOINTERVALLIST"
+    script "../main.nf"
+    process "GATK4_BEDTOINTERVALLIST"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "gatk4"
+    tag "gatk4/bedtointervallist"
+
+    test("test_gatk4_bedtointervallist") {
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test' ], // meta map
+                [file(params.modules_testdata_base_path + 
+                'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ]
+                ]
+                input[1] = [ [ id:'dict' ], // meta map
+                [file(params.modules_testdata_base_path +
+                'genomics/sarscov2/genome/genome.dict', checkIfExists: true) ]
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap
new file mode 100644
index 0000000..48c322f
--- /dev/null
+++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap
@@ -0,0 +1,35 @@
+{
+    "test_gatk4_bedtointervallist": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.interval_list:md5,e51101c9357fb2d59fd30e370eefa39c"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,29a18c36f27584eb5a5f2f5457088b3b"
+                ],
+                "interval_list": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.interval_list:md5,e51101c9357fb2d59fd30e370eefa39c"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,29a18c36f27584eb5a5f2f5457088b3b"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-03-19T14:20:12.168775"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml
new file mode 100644
index 0000000..b4d54f1
--- /dev/null
+++ b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml
@@ -0,0 +1,2 @@
+gatk4/bedtointervallist:
+  - "modules/nf-core/gatk4/bedtointervallist/**"
diff --git a/modules/nf-core/gatk4/createsequencedictionary/environment.yml b/modules/nf-core/gatk4/createsequencedictionary/environment.yml
new file mode 100644
index 0000000..55993f4
--- /dev/null
+++ b/modules/nf-core/gatk4/createsequencedictionary/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::gatk4=4.5.0.0
diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf
index bc324ad..c7f1d75 100644
--- a/modules/nf-core/gatk4/createsequencedictionary/main.nf
+++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf
@@ -2,17 +2,17 @@ process GATK4_CREATESEQUENCEDICTIONARY {
     tag "$fasta"
     label 'process_medium'
 
-    conda "bioconda::gatk4=4.3.0.0"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/gatk4:4.3.0.0--py36hdfd78af_0':
-        'quay.io/biocontainers/gatk4:4.3.0.0--py36hdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0':
+        'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }"
 
     input:
-    path fasta
+    tuple val(meta), path(fasta)
 
     output:
-    path "*.dict"       , emit: dict
-    path "versions.yml" , emit: versions
+    tuple val(meta), path('*.dict')  , emit: dict
+    path "versions.yml"              , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -20,14 +20,15 @@ process GATK4_CREATESEQUENCEDICTIONARY {
     script:
     def args = task.ext.args ?: ''
 
-    def avail_mem = 6
+    def avail_mem = 6144
     if (!task.memory) {
         log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.'
     } else {
-        avail_mem = task.memory.giga
+        avail_mem = (task.memory.mega*0.8).intValue()
     }
     """
-    gatk --java-options "-Xmx${avail_mem}g" CreateSequenceDictionary \\
+    gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\
+        CreateSequenceDictionary \\
         --REFERENCE $fasta \\
         --URI $fasta \\
         --TMP_DIR . \\
diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml
index bd24788..7b5156b 100644
--- a/modules/nf-core/gatk4/createsequencedictionary/meta.yml
+++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml
@@ -1,32 +1,49 @@
 name: gatk4_createsequencedictionary
 description: Creates a sequence dictionary for a reference sequence
 keywords:
+  - createsequencedictionary
   - dictionary
   - fasta
+  - gatk4
 tools:
   - gatk:
-    description: |
-      Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
-      with a primary focus on variant discovery and genotyping. Its powerful processing engine
-      and high-performance computing features make it capable of taking on projects of any size.
-    homepage: https://gatk.broadinstitute.org/hc/en-us
-    documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
-    doi: 10.1158/1538-7445.AM2017-3590
-    licence: ["Apache-2.0"]
-
+      description: |
+        Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
+        with a primary focus on variant discovery and genotyping. Its powerful processing engine
+        and high-performance computing features make it capable of taking on projects of any size.
+      homepage: https://gatk.broadinstitute.org/hc/en-us
+      documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
+      doi: 10.1158/1538-7445.AM2017-3590
+      licence: ["Apache-2.0"]
+      identifier: ""
 input:
-  - fasta:
-    type: file
-    description: Input fasta file
-    pattern: "*.{fasta,fa}"
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'genome' ]
+    - fasta:
+        type: file
+        description: Input fasta file
+        pattern: "*.{fasta,fa}"
 output:
   - dict:
-    type: file
-    description: gatk dictionary file
-    pattern: "*.{dict}"
+      - meta:
+          type: file
+          description: gatk dictionary file
+          pattern: "*.{dict}"
+      - "*.dict":
+          type: file
+          description: gatk dictionary file
+          pattern: "*.{dict}"
   - versions:
-    type: file
-    description: File containing software versions
-    pattern: "versions.yml"
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
 authors:
   - "@maxulysse"
+  - "@ramprasadn"
+maintainers:
+  - "@maxulysse"
+  - "@ramprasadn"
diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test
new file mode 100644
index 0000000..a8a9c6d
--- /dev/null
+++ b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test
@@ -0,0 +1,56 @@
+nextflow_process {
+
+    name "Test Process GATK4_CREATESEQUENCEDICTIONARY"
+    script "../main.nf"
+    process "GATK4_CREATESEQUENCEDICTIONARY"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "gatk4"
+    tag "gatk4/createsequencedictionary"
+
+    test("sarscov2 - fasta") {
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                    ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - fasta - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                    ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap
new file mode 100644
index 0000000..16735f9
--- /dev/null
+++ b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+    "sarscov2 - fasta - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "genome.dict:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6"
+                ],
+                "dict": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "genome.dict:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.0"
+        },
+        "timestamp": "2024-05-16T10:16:16.34453"
+    },
+    "sarscov2 - fasta": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "genome.dict:md5,7362679f176e0f52add03c08f457f646"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6"
+                ],
+                "dict": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "genome.dict:md5,7362679f176e0f52add03c08f457f646"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.0"
+        },
+        "timestamp": "2024-05-16T13:58:25.822068"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml b/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml
new file mode 100644
index 0000000..035c5e4
--- /dev/null
+++ b/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml
@@ -0,0 +1,2 @@
+gatk4/createsequencedictionary:
+  - "modules/nf-core/gatk4/createsequencedictionary/**"
diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml
new file mode 100644
index 0000000..2bcd47e
--- /dev/null
+++ b/modules/nf-core/samtools/faidx/environment.yml
@@ -0,0 +1,7 @@
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - bioconda::htslib=1.21
+  - bioconda::samtools=1.21
diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf
index 4dd0e5b..28c0a81 100644
--- a/modules/nf-core/samtools/faidx/main.nf
+++ b/modules/nf-core/samtools/faidx/main.nf
@@ -2,18 +2,20 @@ process SAMTOOLS_FAIDX {
     tag "$fasta"
     label 'process_single'
 
-    conda "bioconda::samtools=1.17"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
-        'biocontainers/samtools:1.17--h00cdaf9_0' }"
+        'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' :
+        'biocontainers/samtools:1.21--h50ea8bc_0' }"
 
     input:
     tuple val(meta), path(fasta)
+    tuple val(meta2), path(fai)
 
     output:
-    tuple val(meta), path ("*.fai"), emit: fai
-    tuple val(meta), path ("*.gzi"), emit: gzi, optional: true
-    path "versions.yml"            , emit: versions
+    tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true
+    tuple val(meta), path ("*.fai")        , emit: fai, optional: true
+    tuple val(meta), path ("*.gzi")        , emit: gzi, optional: true
+    path "versions.yml"                    , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -23,8 +25,8 @@ process SAMTOOLS_FAIDX {
     """
     samtools \\
         faidx \\
-        $args \\
-        $fasta
+        $fasta \\
+        $args
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -33,8 +35,12 @@ process SAMTOOLS_FAIDX {
     """
 
     stub:
+    def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll()
+    def fastacmd = match[0] ? "touch ${match[0][1]}" : ''
     """
+    ${fastacmd}
     touch ${fasta}.fai
+
     cat <<-END_VERSIONS > versions.yml
 
     "${task.process}":
diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml
index fe2fe9a..6721b2c 100644
--- a/modules/nf-core/samtools/faidx/meta.yml
+++ b/modules/nf-core/samtools/faidx/meta.yml
@@ -3,6 +3,7 @@ description: Index FASTA file
 keywords:
   - index
   - fasta
+  - faidx
 tools:
   - samtools:
       description: |
@@ -13,35 +14,67 @@ tools:
       documentation: http://www.htslib.org/doc/samtools.html
       doi: 10.1093/bioinformatics/btp352
       licence: ["MIT"]
+      identifier: biotools:samtools
 input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - fasta:
-      type: file
-      description: FASTA file
-      pattern: "*.{fa,fasta}"
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'test' ]
+    - fasta:
+        type: file
+        description: FASTA file
+        pattern: "*.{fa,fasta}"
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'test' ]
+    - fai:
+        type: file
+        description: FASTA index file
+        pattern: "*.{fai}"
 output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
+  - fa:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.{fa,fasta}":
+          type: file
+          description: FASTA file
+          pattern: "*.{fa}"
   - fai:
-      type: file
-      description: FASTA index file
-      pattern: "*.{fai}"
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.fai":
+          type: file
+          description: FASTA index file
+          pattern: "*.{fai}"
   - gzi:
-      type: file
-      description: Optional gzip index file for compressed inputs
-      pattern: "*.gzi"
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.gzi":
+          type: file
+          description: Optional gzip index file for compressed inputs
+          pattern: "*.gzi"
   - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
 authors:
   - "@drpatelh"
   - "@ewels"
   - "@phue"
+maintainers:
+  - "@drpatelh"
+  - "@ewels"
+  - "@phue"
diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test
new file mode 100644
index 0000000..17244ef
--- /dev/null
+++ b/modules/nf-core/samtools/faidx/tests/main.nf.test
@@ -0,0 +1,122 @@
+nextflow_process {
+
+    name "Test Process SAMTOOLS_FAIDX"
+    script "../main.nf"
+    process "SAMTOOLS_FAIDX"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "samtools"
+    tag "samtools/faidx"
+
+    test("test_samtools_faidx") {
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]
+
+                input[1] = [[],[]]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("test_samtools_faidx_bgzip") {
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)]
+
+                input[1] = [[],[]]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("test_samtools_faidx_fasta") {
+
+        config "./nextflow.config"
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]
+
+                input[1] = [ [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("test_samtools_faidx_stub_fasta") {
+
+        config "./nextflow2.config"
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]
+
+                input[1] = [ [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("test_samtools_faidx_stub_fai") {
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]
+
+                input[1] = [[],[]]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap
new file mode 100644
index 0000000..1bbb3ec
--- /dev/null
+++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap
@@ -0,0 +1,249 @@
+{
+    "test_samtools_faidx": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+                    ]
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f"
+                ],
+                "fa": [
+                    
+                ],
+                "fai": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+                    ]
+                ],
+                "gzi": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-09-16T07:57:47.450887871"
+    },
+    "test_samtools_faidx_bgzip": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474"
+                    ]
+                ],
+                "3": [
+                    "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f"
+                ],
+                "fa": [
+                    
+                ],
+                "fai": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+                    ]
+                ],
+                "gzi": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-09-16T07:58:04.804905659"
+    },
+    "test_samtools_faidx_fasta": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36"
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f"
+                ],
+                "fa": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36"
+                    ]
+                ],
+                "fai": [
+                    
+                ],
+                "gzi": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-09-16T07:58:23.831268154"
+    },
+    "test_samtools_faidx_stub_fasta": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f"
+                ],
+                "fa": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+                    ]
+                ],
+                "fai": [
+                    
+                ],
+                "gzi": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-09-16T07:58:35.600243706"
+    },
+    "test_samtools_faidx_stub_fai": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+                    ]
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f"
+                ],
+                "fa": [
+                    
+                ],
+                "fai": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+                    ]
+                ],
+                "gzi": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-09-16T07:58:54.705460167"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config
new file mode 100644
index 0000000..f76a3ba
--- /dev/null
+++ b/modules/nf-core/samtools/faidx/tests/nextflow.config
@@ -0,0 +1,7 @@
+process {
+
+    withName: SAMTOOLS_FAIDX {
+        ext.args = 'MT192765.1 -o extract.fa'
+    }
+
+}
diff --git a/modules/nf-core/samtools/faidx/tests/nextflow2.config b/modules/nf-core/samtools/faidx/tests/nextflow2.config
new file mode 100644
index 0000000..33ebbd5
--- /dev/null
+++ b/modules/nf-core/samtools/faidx/tests/nextflow2.config
@@ -0,0 +1,6 @@
+process {
+
+    withName: SAMTOOLS_FAIDX {
+        ext.args = '-o extract.fa'
+    }
+}
diff --git a/modules/nf-core/samtools/faidx/tests/tags.yml b/modules/nf-core/samtools/faidx/tests/tags.yml
new file mode 100644
index 0000000..e4a8394
--- /dev/null
+++ b/modules/nf-core/samtools/faidx/tests/tags.yml
@@ -0,0 +1,2 @@
+samtools/faidx:
+  - modules/nf-core/samtools/faidx/**
diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml
new file mode 100644
index 0000000..1debc4c
--- /dev/null
+++ b/modules/nf-core/star/align/environment.yml
@@ -0,0 +1,9 @@
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - bioconda::htslib=1.18
+  - bioconda::samtools=1.18
+  - bioconda::star=2.7.10a
+  - conda-forge::gawk=5.1.0
diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf
index 8cb8e9a..ae67e00 100644
--- a/modules/nf-core/star/align/main.nf
+++ b/modules/nf-core/star/align/main.nf
@@ -2,15 +2,15 @@ process STAR_ALIGN {
     tag "$meta.id"
     label 'process_high'
 
-    conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' :
-        'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }"
+        'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' :
+        'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }"
 
     input:
     tuple val(meta), path(reads, stageAs: "input*/*")
-    path index
-    path gtf
+    tuple val(meta2), path(index)
+    tuple val(meta3), path(gtf)
     val star_ignore_sjdbgtf
     val seq_platform
     val seq_center
@@ -81,6 +81,8 @@ process STAR_ALIGN {
     stub:
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
+    echo "" | gzip > ${prefix}.unmapped_1.fastq.gz
+    echo "" | gzip > ${prefix}.unmapped_2.fastq.gz
     touch ${prefix}Xd.out.bam
     touch ${prefix}.Log.final.out
     touch ${prefix}.Log.out
@@ -89,8 +91,6 @@ process STAR_ALIGN {
     touch ${prefix}.toTranscriptome.out.bam
     touch ${prefix}.Aligned.unsort.out.bam
     touch ${prefix}.Aligned.sortedByCoord.out.bam
-    touch ${prefix}.unmapped_1.fastq.gz
-    touch ${prefix}.unmapped_2.fastq.gz
     touch ${prefix}.tab
     touch ${prefix}.SJ.out.tab
     touch ${prefix}.ReadsPerGene.out.tab
diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml
index bce16d3..d30556b 100644
--- a/modules/nf-core/star/align/meta.yml
+++ b/modules/nf-core/star/align/meta.yml
@@ -14,76 +14,194 @@ tools:
       manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf
       doi: 10.1093/bioinformatics/bts635
       licence: ["MIT"]
+      identifier: biotools:star
 input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - reads:
-      type: file
-      description: |
-        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
-        respectively.
-  - index:
-      type: directory
-      description: STAR genome index
-      pattern: "star"
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - reads:
+        type: file
+        description: |
+          List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+          respectively.
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'test' ]
+    - index:
+        type: directory
+        description: STAR genome index
+        pattern: "star"
+  - - meta3:
+        type: map
+        description: |
+          Groovy Map containing reference information
+          e.g. [ id:'test' ]
+    - gtf:
+        type: file
+        description: Annotation GTF file
+        pattern: "*.{gtf}"
+  - - star_ignore_sjdbgtf:
+        type: boolean
+        description: Ignore annotation GTF file
+  - - seq_platform:
+        type: string
+        description: Sequencing platform
+  - - seq_center:
+        type: string
+        description: Sequencing center
 output:
-  - bam:
-      type: file
-      description: Output BAM file containing read alignments
-      pattern: "*.{bam}"
   - log_final:
-      type: file
-      description: STAR final log file
-      pattern: "*Log.final.out"
+      - meta:
+          type: file
+          description: STAR final log file
+          pattern: "*Log.final.out"
+      - "*Log.final.out":
+          type: file
+          description: STAR final log file
+          pattern: "*Log.final.out"
   - log_out:
-      type: file
-      description: STAR lot out file
-      pattern: "*Log.out"
+      - meta:
+          type: file
+          description: STAR lot out file
+          pattern: "*Log.out"
+      - "*Log.out":
+          type: file
+          description: STAR lot out file
+          pattern: "*Log.out"
   - log_progress:
-      type: file
-      description: STAR log progress file
-      pattern: "*Log.progress.out"
+      - meta:
+          type: file
+          description: STAR log progress file
+          pattern: "*Log.progress.out"
+      - "*Log.progress.out":
+          type: file
+          description: STAR log progress file
+          pattern: "*Log.progress.out"
   - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+  - bam:
+      - meta:
+          type: file
+          description: Output BAM file containing read alignments
+          pattern: "*.{bam}"
+      - "*d.out.bam":
+          type: file
+          description: Output BAM file containing read alignments
+          pattern: "*.{bam}"
   - bam_sorted:
-      type: file
-      description: Sorted BAM file of read alignments (optional)
-      pattern: "*sortedByCoord.out.bam"
+      - meta:
+          type: file
+          description: Sorted BAM file of read alignments (optional)
+          pattern: "*sortedByCoord.out.bam"
+      - "*sortedByCoord.out.bam":
+          type: file
+          description: Sorted BAM file of read alignments (optional)
+          pattern: "*sortedByCoord.out.bam"
   - bam_transcript:
-      type: file
-      description: Output BAM file of transcriptome alignment (optional)
-      pattern: "*toTranscriptome.out.bam"
+      - meta:
+          type: file
+          description: Output BAM file of transcriptome alignment (optional)
+          pattern: "*toTranscriptome.out.bam"
+      - "*toTranscriptome.out.bam":
+          type: file
+          description: Output BAM file of transcriptome alignment (optional)
+          pattern: "*toTranscriptome.out.bam"
   - bam_unsorted:
-      type: file
-      description: Unsorted BAM file of read alignments (optional)
-      pattern: "*Aligned.unsort.out.bam"
+      - meta:
+          type: file
+          description: Unsorted BAM file of read alignments (optional)
+          pattern: "*Aligned.unsort.out.bam"
+      - "*Aligned.unsort.out.bam":
+          type: file
+          description: Unsorted BAM file of read alignments (optional)
+          pattern: "*Aligned.unsort.out.bam"
   - fastq:
-      type: file
-      description: Unmapped FastQ files (optional)
-      pattern: "*fastq.gz"
+      - meta:
+          type: file
+          description: Unmapped FastQ files (optional)
+          pattern: "*fastq.gz"
+      - "*fastq.gz":
+          type: file
+          description: Unmapped FastQ files (optional)
+          pattern: "*fastq.gz"
   - tab:
-      type: file
-      description: STAR output tab file(s) (optional)
-      pattern: "*.tab"
+      - meta:
+          type: file
+          description: STAR output tab file(s) (optional)
+          pattern: "*.tab"
+      - "*.tab":
+          type: file
+          description: STAR output tab file(s) (optional)
+          pattern: "*.tab"
+  - spl_junc_tab:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.SJ.out.tab":
+          type: file
+          description: STAR output splice junction tab file
+          pattern: "*.SJ.out.tab"
+  - read_per_gene_tab:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.ReadsPerGene.out.tab":
+          type: file
+          description: STAR output read per gene tab file
+          pattern: "*.ReadsPerGene.out.tab"
   - junction:
-      type: file
-      description: STAR chimeric junction output file (optional)
-      pattern: "*.out.junction"
+      - meta:
+          type: file
+          description: STAR chimeric junction output file (optional)
+          pattern: "*.out.junction"
+      - "*.out.junction":
+          type: file
+          description: STAR chimeric junction output file (optional)
+          pattern: "*.out.junction"
+  - sam:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.out.sam":
+          type: file
+          description: STAR output SAM file
+          pattern: "*.out.sam"
   - wig:
-      type: file
-      description: STAR output wiggle format file(s) (optional)
-      pattern: "*.wig"
+      - meta:
+          type: file
+          description: STAR output wiggle format file(s) (optional)
+          pattern: "*.wig"
+      - "*.wig":
+          type: file
+          description: STAR output wiggle format file(s) (optional)
+          pattern: "*.wig"
   - bedgraph:
-      type: file
-      description: STAR output bedGraph format file(s) (optional)
-      pattern: "*.bg"
-
+      - meta:
+          type: file
+          description: STAR output bedGraph format file(s) (optional)
+          pattern: "*.bg"
+      - "*.bg":
+          type: file
+          description: STAR output bedGraph format file(s) (optional)
+          pattern: "*.bg"
 authors:
   - "@kevinmenden"
   - "@drpatelh"
   - "@praveenraj2018"
+maintainers:
+  - "@kevinmenden"
+  - "@drpatelh"
+  - "@praveenraj2018"
diff --git a/modules/nf-core/star/align/tests/main.nf.test b/modules/nf-core/star/align/tests/main.nf.test
new file mode 100644
index 0000000..2d9f72d
--- /dev/null
+++ b/modules/nf-core/star/align/tests/main.nf.test
@@ -0,0 +1,609 @@
+nextflow_process {
+
+    name "Test Process STAR_ALIGN"
+    script "../main.nf"
+    process "STAR_ALIGN"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "star"
+    tag "star/align"
+    tag "star/genomegenerate"
+
+    test("homo_sapiens - single_end") {
+        config "./nextflow.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:true ], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    file(process.out.log_final[0][1]).name,
+                    file(process.out.log_out[0][1]).name,
+                    file(process.out.log_progress[0][1]).name,
+                    process.out.bam,
+                    process.out.bam_sorted,
+                    process.out.bam_transcript,
+                    process.out.bam_unsorted,
+                    process.out.bedgraph,
+                    process.out.fastq,
+                    process.out.junction,
+                    process.out.read_per_gene_tab,
+                    process.out.sam,
+                    process.out.spl_junc_tab,
+                    process.out.tab,
+                    process.out.wig,
+                    process.out.versions
+                ).match() }
+            )
+        }
+    }
+
+    test("homo_sapiens - paired_end") {
+        config "./nextflow.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
+                    ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    file(process.out.log_final[0][1]).name,
+                    file(process.out.log_out[0][1]).name,
+                    file(process.out.log_progress[0][1]).name,
+                    process.out.bam,
+                    process.out.bam_sorted,
+                    process.out.bam_transcript,
+                    process.out.bam_unsorted,
+                    process.out.bedgraph,
+                    process.out.fastq,
+                    process.out.junction,
+                    process.out.read_per_gene_tab,
+                    process.out.sam,
+                    process.out.spl_junc_tab,
+                    process.out.tab,
+                    process.out.wig,
+                    process.out.versions
+                ).match() }
+            )
+        }
+    }
+
+    test("homo_sapiens - paired_end - arriba") {
+        config "./nextflow.arriba.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
+                    ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    file(process.out.log_final[0][1]).name,
+                    file(process.out.log_out[0][1]).name,
+                    file(process.out.log_progress[0][1]).name,
+                    process.out.bam,
+                    process.out.bam_sorted,
+                    process.out.bam_transcript,
+                    process.out.bam_unsorted,
+                    process.out.bedgraph,
+                    process.out.fastq,
+                    process.out.junction,
+                    process.out.read_per_gene_tab,
+                    process.out.sam,
+                    process.out.spl_junc_tab,
+                    process.out.tab,
+                    process.out.wig,
+                    process.out.versions
+                ).match() }
+            )
+        }
+    }
+
+    test("homo_sapiens - paired_end - starfusion") {
+        config "./nextflow.starfusion.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
+                    ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    file(process.out.log_final[0][1]).name,
+                    file(process.out.log_out[0][1]).name,
+                    file(process.out.log_progress[0][1]).name,
+                    process.out.bam,
+                    process.out.bam_sorted,
+                    process.out.bam_transcript,
+                    process.out.bam_unsorted,
+                    process.out.bedgraph,
+                    process.out.fastq,
+                    process.out.junction,
+                    process.out.read_per_gene_tab,
+                    process.out.sam,
+                    process.out.spl_junc_tab,
+                    process.out.tab,
+                    process.out.wig,
+                    process.out.versions
+                ).match() }
+            )
+        }
+    }
+
+    test("homo_sapiens - paired_end - multiple") {
+        config "./nextflow.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
+                    ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    file(process.out.log_final[0][1]).name,
+                    file(process.out.log_out[0][1]).name,
+                    file(process.out.log_progress[0][1]).name,
+                    process.out.bam,
+                    process.out.bam_sorted,
+                    process.out.bam_transcript,
+                    process.out.bam_unsorted,
+                    process.out.bedgraph,
+                    process.out.fastq,
+                    process.out.junction,
+                    process.out.read_per_gene_tab,
+                    process.out.sam,
+                    process.out.spl_junc_tab,
+                    process.out.tab,
+                    process.out.wig,
+                    process.out.versions
+                ).match() }
+            )
+        }
+    }
+
+    test("homo_sapiens - single_end - stub") {
+        options "-stub"
+        config "./nextflow.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:true ], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("homo_sapiens - paired_end - stub") {
+        options "-stub"
+        config "./nextflow.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
+                    ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("homo_sapiens - paired_end - arriba - stub") {
+        options "-stub"
+        config "./nextflow.arriba.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
+                    ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("homo_sapiens - paired_end - starfusion - stub") {
+        options "-stub"
+        config "./nextflow.starfusion.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
+                    ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("homo_sapiens - paired_end - multiple - stub") {
+        options "-stub"
+        config "./nextflow.config"
+
+        setup {
+            run("STAR_GENOMEGENERATE") {
+                script "../../../star/genomegenerate/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test_fasta' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                    ])
+                    input[1] = Channel.of([
+                        [ id:'test_gtf' ],
+                        [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                    ])
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
+                    ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/star/align/tests/main.nf.test.snap b/modules/nf-core/star/align/tests/main.nf.test.snap
new file mode 100644
index 0000000..c814eb5
--- /dev/null
+++ b/modules/nf-core/star/align/tests/main.nf.test.snap
@@ -0,0 +1,1973 @@
+{
+    "homo_sapiens - single_end - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "10": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "11": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "12": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "13": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "14": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "15": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "3": [
+                    "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+                ],
+                "4": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "5": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "6": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "7": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "8": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        [
+                            "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                            "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                        ]
+                    ]
+                ],
+                "9": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        [
+                            "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam_sorted": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam_transcript": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "bam_unsorted": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "bedgraph": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "fastq": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        [
+                            "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                            "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                        ]
+                    ]
+                ],
+                "junction": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_final": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_progress": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "read_per_gene_tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "sam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "spl_junc_tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        [
+                            "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+                ],
+                "wig": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T15:16:04.712114"
+    },
+    "homo_sapiens - paired_end - arriba - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "10": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "11": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "12": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "13": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "14": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "15": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "3": [
+                    "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+                ],
+                "4": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "5": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "6": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "7": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "8": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                            "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                        ]
+                    ]
+                ],
+                "9": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam_sorted": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam_transcript": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "bam_unsorted": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "bedgraph": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "fastq": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                            "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                        ]
+                    ]
+                ],
+                "junction": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_final": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_progress": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "read_per_gene_tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "sam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "spl_junc_tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+                ],
+                "wig": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T15:16:28.874293"
+    },
+    "homo_sapiens - single_end": {
+        "content": [
+            "test.Log.final.out",
+            "test.Log.out",
+            "test.Log.progress.out",
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535"
+                ]
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    [
+                        "test.Signal.Unique.str1.out.bg:md5,c56fc1472776fb927eaf62d973da5f9a",
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,e93373cf6f2a2a9506e2efdb260cdd4f"
+                    ]
+                ]
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": true
+                    },
+                    "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c"
+                ]
+            ],
+            [
+                
+            ],
+            [
+                "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T18:02:34.35338"
+    },
+    "homo_sapiens - paired_end": {
+        "content": [
+            "test.Log.final.out",
+            "test.Log.out",
+            "test.Log.progress.out",
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f"
+                ]
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    [
+                        "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a",
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6"
+                    ]
+                ]
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d"
+                ]
+            ],
+            [
+                
+            ],
+            [
+                "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T18:03:16.701923"
+    },
+    "homo_sapiens - paired_end - multiple - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "10": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "11": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "12": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "13": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "14": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "15": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "3": [
+                    "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+                ],
+                "4": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "5": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "6": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "7": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "8": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                            "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                        ]
+                    ]
+                ],
+                "9": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam_sorted": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam_transcript": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "bam_unsorted": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "bedgraph": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "fastq": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                            "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                        ]
+                    ]
+                ],
+                "junction": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_final": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_progress": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "read_per_gene_tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "sam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "spl_junc_tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+                ],
+                "wig": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T15:16:51.360287"
+    },
+    "homo_sapiens - paired_end - multiple": {
+        "content": [
+            "test.Log.final.out",
+            "test.Log.out",
+            "test.Log.progress.out",
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a"
+                ]
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    [
+                        "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a",
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6"
+                    ]
+                ]
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2"
+                ]
+            ],
+            [
+                
+            ],
+            [
+                "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T13:13:28.987438"
+    },
+    "homo_sapiens - paired_end - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "10": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "11": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "12": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "13": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "14": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "15": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "3": [
+                    "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+                ],
+                "4": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "5": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "6": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "7": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "8": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                            "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                        ]
+                    ]
+                ],
+                "9": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam_sorted": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam_transcript": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "bam_unsorted": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "bedgraph": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "fastq": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                            "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                        ]
+                    ]
+                ],
+                "junction": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_final": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_progress": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "read_per_gene_tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "sam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "spl_junc_tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+                ],
+                "wig": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T15:16:16.798018"
+    },
+    "homo_sapiens - paired_end - starfusion": {
+        "content": [
+            "test.Log.final.out",
+            "test.Log.out",
+            "test.Log.progress.out",
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Aligned.out.bam:md5,bcad07b838f6762fc01eea52b5cd3f84"
+                ]
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Chimeric.out.junction:md5,c10ef219f4a30e83711b995bc5e40dba"
+                ]
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a"
+                ]
+            ],
+            [
+                
+            ],
+            [
+                "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T13:10:55.371956"
+    },
+    "homo_sapiens - paired_end - arriba": {
+        "content": [
+            "test.Log.final.out",
+            "test.Log.out",
+            "test.Log.progress.out",
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.Aligned.out.bam:md5,c1b1747f5873f2d17762725636e891d5"
+                ]
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c"
+                ]
+            ],
+            [
+                
+            ],
+            [
+                "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T13:05:10.7534"
+    },
+    "homo_sapiens - paired_end - starfusion - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "10": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "11": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "12": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "13": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "14": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "15": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "3": [
+                    "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+                ],
+                "4": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "5": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "6": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "7": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "8": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                            "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                        ]
+                    ]
+                ],
+                "9": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam_sorted": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "bam_transcript": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "bam_unsorted": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "bedgraph": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "fastq": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                            "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                        ]
+                    ]
+                ],
+                "junction": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_final": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_out": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "log_progress": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "read_per_gene_tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "sam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "spl_junc_tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "tab": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f"
+                ],
+                "wig": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T15:16:40.64399"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/star/align/tests/nextflow.arriba.config b/modules/nf-core/star/align/tests/nextflow.arriba.config
new file mode 100644
index 0000000..2324b9e
--- /dev/null
+++ b/modules/nf-core/star/align/tests/nextflow.arriba.config
@@ -0,0 +1,14 @@
+process {
+
+    withName: STAR_GENOMEGENERATE {
+        ext.args = '--genomeSAindexNbases 9'
+    }
+
+    withName: STAR_ALIGN {
+        ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50'
+    }
+
+}
+
+// Fix chown issue for the output star folder
+docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)'
diff --git a/modules/nf-core/star/align/tests/nextflow.config b/modules/nf-core/star/align/tests/nextflow.config
new file mode 100644
index 0000000..c4ac580
--- /dev/null
+++ b/modules/nf-core/star/align/tests/nextflow.config
@@ -0,0 +1,14 @@
+process {
+
+    withName: STAR_GENOMEGENERATE {
+        ext.args = '--genomeSAindexNbases 9'
+    }
+
+    withName: STAR_ALIGN {
+        ext.args = '--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --outWigType bedGraph --outWigStrand Unstranded'
+    }
+
+}
+
+// Fix chown issue for the output star folder
+docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)'
diff --git a/modules/nf-core/star/align/tests/nextflow.starfusion.config b/modules/nf-core/star/align/tests/nextflow.starfusion.config
new file mode 100644
index 0000000..467b649
--- /dev/null
+++ b/modules/nf-core/star/align/tests/nextflow.starfusion.config
@@ -0,0 +1,14 @@
+process {
+
+    withName: STAR_GENOMEGENERATE {
+        ext.args = '--genomeSAindexNbases 9'
+    }
+
+    withName: STAR_ALIGN {
+        ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outReadsUnmapped None --twopassMode Basic --outSAMstrandField intronMotif --outSAMunmapped Within --chimSegmentMin 12 --chimJunctionOverhangMin 8 --chimOutJunctionFormat 1 --alignSJDBoverhangMin 10 --alignMatesGapMax 100000 --alignIntronMax 100000 --alignSJstitchMismatchNmax 5 -1 5 5 --chimMultimapScoreRange 3 --chimScoreJunctionNonGTAG -4 --chimMultimapNmax 20 --chimNonchimScoreDropMin 10 --peOverlapNbasesMin 12 --peOverlapMMp 0.1 --alignInsertionFlush Right --alignSplicedMateMapLminOverLmate 0 --alignSplicedMateMapLmin 30'
+    }
+
+}
+
+// Fix chown issue for the output star folder
+docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)'
diff --git a/modules/nf-core/star/align/tests/tags.yml b/modules/nf-core/star/align/tests/tags.yml
new file mode 100644
index 0000000..8beace1
--- /dev/null
+++ b/modules/nf-core/star/align/tests/tags.yml
@@ -0,0 +1,2 @@
+star/align:
+  - modules/nf-core/star/align/**
diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf
index b885571..4d4055b 100644
--- a/modules/nf-core/star/genomegenerate/main.nf
+++ b/modules/nf-core/star/genomegenerate/main.nf
@@ -21,7 +21,7 @@ process STAR_GENOMEGENERATE {
     script:
     def args        = task.ext.args ?: ''
     def args_list   = args.tokenize()
-    def memory      = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
+    def memory      = task.memory ? "--limitGenomeGenerateRAM ${(task.memory.toBytes()*task.cpus) - 100000000}" : ''
     def include_gtf = gtf ? "--sjdbGTFfile $gtf" : ''
     if (args_list.contains('--genomeSAindexNbases')) {
         """
diff --git a/modules/nf-core/star/genomegenerate/star-genomegenerate.diff b/modules/nf-core/star/genomegenerate/star-genomegenerate.diff
new file mode 100644
index 0000000..247d39a
--- /dev/null
+++ b/modules/nf-core/star/genomegenerate/star-genomegenerate.diff
@@ -0,0 +1,20 @@
+Changes in module 'nf-core/star/genomegenerate'
+'modules/nf-core/star/genomegenerate/environment.yml' is unchanged
+'modules/nf-core/star/genomegenerate/meta.yml' is unchanged
+Changes in 'star/genomegenerate/main.nf':
+--- modules/nf-core/star/genomegenerate/main.nf
++++ modules/nf-core/star/genomegenerate/main.nf
+@@ -21,7 +21,7 @@
+     script:
+     def args        = task.ext.args ?: ''
+     def args_list   = args.tokenize()
+-    def memory      = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
++    def memory      = task.memory ? "--limitGenomeGenerateRAM ${(task.memory.toBytes()*task.cpus) - 100000000}" : ''
+     def include_gtf = gtf ? "--sjdbGTFfile $gtf" : ''
+     if (args_list.contains('--genomeSAindexNbases')) {
+         """
+
+'modules/nf-core/star/genomegenerate/tests/tags.yml' is unchanged
+'modules/nf-core/star/genomegenerate/tests/main.nf.test' is unchanged
+'modules/nf-core/star/genomegenerate/tests/main.nf.test.snap' is unchanged
+************************************************************
diff --git a/subworkflows/local/align_reads.nf b/subworkflows/local/align_reads.nf
index 8f50d57..8e5c410 100644
--- a/subworkflows/local/align_reads.nf
+++ b/subworkflows/local/align_reads.nf
@@ -19,7 +19,7 @@ workflow ALIGN_READS {
     STAR_ALIGN(
         reads,
         star_index,
-        gtf.map{it[1]},
+        gtf,
         false,
         [],
         []
diff --git a/subworkflows/local/fillout.nf b/subworkflows/local/fillout.nf
index 9453446..3a3703f 100755
--- a/subworkflows/local/fillout.nf
+++ b/subworkflows/local/fillout.nf
@@ -32,7 +32,7 @@ workflow FILLOUT {
             .map{ meta, bam, bai, variants ->
                 [ meta, bam, bai, variants, "${variants.getBaseName()}.gbcms.maf"]
             },
-        fasta,
+        fasta.map{it[1]}.first(),
         fai
 
     )
diff --git a/subworkflows/local/fusion.nf b/subworkflows/local/fusion.nf
index 6f8578c..fb8525e 100644
--- a/subworkflows/local/fusion.nf
+++ b/subworkflows/local/fusion.nf
@@ -1,5 +1,5 @@
 include { STAR_ALIGN as STAR_FOR_ARRIBA     } from '../../modules/nf-core/star/align/main'
-include { ARRIBA                            } from '../../modules/nf-core/arriba/main'
+include { ARRIBA_ARRIBA                     } from '../../modules/nf-core/arriba/arriba/main'
 include { STAR_ALIGN as STAR_FOR_STARFUSION } from '../../modules/nf-core/star/align/main'
 include { STARFUSION                        } from '../../modules/local/starfusion/detect/main'
 include { FUSIONCATCHER_DETECT              } from '../../modules/local/fusioncatcher/detect/main'
@@ -19,6 +19,7 @@ workflow FUSION {
     reads
     reads_untrimmed
     star_index
+    fasta
     gtf
     starfusion_ref
     fusioncatcher_ref
@@ -33,7 +34,7 @@ workflow FUSION {
 
     main:
     ch_versions = Channel.empty()
-    fasta = params.fasta
+    //fasta = params.fasta
     //gene_bed = params.metafusion_gene_bed
     //gene_info = params.metafusion_gene_info
     //blocklist = params.metafusion_blocklist
@@ -41,30 +42,30 @@ workflow FUSION {
     STAR_FOR_ARRIBA(
         reads,
         star_index,
-        gtf.map{it[1]},
+        gtf,
         false,
         [],
         []
     )
     ch_versions = ch_versions.mix(STAR_FOR_ARRIBA.out.versions.first())
 
-    ARRIBA(
+    ARRIBA_ARRIBA(
         STAR_FOR_ARRIBA.out.bam,
         fasta,
-        gtf.map{it[1]},
-        arriba_blacklist,
-        arriba_known_fusions,
-        [],
-        [],
-        arriba_protein_domains
+        gtf,
+	arriba_blacklist.map{[[:],it]}.view(),
+	arriba_known_fusions.map{[[:],it]},
+        [[:],[]],
+        [[:],[]],
+	arriba_protein_domains.map{[[:],it]}
     )
-    ch_versions = ch_versions.mix(ARRIBA.out.versions.first())
+    ch_versions = ch_versions.mix(ARRIBA_ARRIBA.out.versions.first())
 
     STAR_FOR_STARFUSION(
         reads,
         // use the star index in the starfusion reference to ensure compatibility
-        starfusion_ref.map{ file( it + "/ref_genome.fa.star.idx")},
-	starfusion_ref.map{ file( it + "/ref_annot.gtf")},
+        starfusion_ref.map{ [[id:params.genome],file( it + "/ref_genome.fa.star.idx")] },
+	starfusion_ref.map{ [[id:params.genome],file( it + "/ref_annot.gtf")] },
         false,
         [],
         []
@@ -88,7 +89,7 @@ workflow FUSION {
     fc_fusions = ["GRCh37","hg19","smallGRCh37"].contains(params.genome) ? FUSIONCATCHER_DETECT.out.fusions_alt : FUSIONCATCHER_DETECT.out.fusions
 
 
-    ARRIBA_TO_CFF(ARRIBA.out.fusions
+    ARRIBA_TO_CFF(ARRIBA_ARRIBA.out.fusions
             .map{ meta, file ->[ meta, "arriba", file ] })
     FUSIONCATCHER_TO_CFF(fc_fusions
                     .map{ meta, file -> [ meta, "fusioncatcher", file ] } )
@@ -113,7 +114,7 @@ workflow FUSION {
         MERGE_CFF.out.file_out,
         gene_bed.map{ it[1] }.first(),
         gene_info.map{ it[1] }.first(),
-        fasta,
+        fasta.map{ it[1] }.first(),
         blocklist
     )
 
diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf
index 938652a..d5a02dc 100644
--- a/subworkflows/local/prepare_references.nf
+++ b/subworkflows/local/prepare_references.nf
@@ -6,6 +6,7 @@ include { SAMTOOLS_FAIDX                 } from '../../modules/nf-core/samtools/
 include { GATK4_BEDTOINTERVALLIST        } from '../../modules/nf-core/gatk4/bedtointervallist/main'
 include { PREPARE_RRNA                   } from '../../modules/local/prepare_rrna/main'
 include {
+    GUNZIP as GUNZIP_FASTA ;
     GUNZIP as GUNZIP_GTF ;
     GUNZIP as GUNZIP_METAFUSIONGENEBED ;
     GUNZIP as GUNZIP_METAFUSIONBLOCKLIST
@@ -24,6 +25,13 @@ workflow PREPARE_REFERENCES {
     main:
     ch_versions = Channel.empty()
 
+    if (params.fasta.endsWith(".gz")){
+        GUNZIP_FASTA([[id:params.genome],params.fasta])
+        fasta = GUNZIP_FASTA.out.gunzip.first()
+    } else {
+        fasta = Channel.of([[id:params.genome],params.fasta])
+    }
+
     if (params.gtf.endsWith(".gz")){
         GUNZIP_GTF([[id:params.genome],params.gtf])
         gtf = GUNZIP_GTF.out.gunzip.first()
@@ -39,7 +47,7 @@ workflow PREPARE_REFERENCES {
     }
 
     STAR_GENOMEGENERATE(
-        [[id:params.genome],params.fasta],
+        fasta,
         gtf
     )
     ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
@@ -58,7 +66,7 @@ workflow PREPARE_REFERENCES {
     }
     PREPARE_RRNA([],refflat)
 
-    GATK4_CREATESEQUENCEDICTIONARY(params.fasta)
+    GATK4_CREATESEQUENCEDICTIONARY(fasta)
     ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions)
 
     GATK4_BEDTOINTERVALLIST(
@@ -67,7 +75,7 @@ workflow PREPARE_REFERENCES {
     )
     ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST.out.versions)
 
-    SAMTOOLS_FAIDX ([[:],params.fasta])
+    SAMTOOLS_FAIDX(fasta,[[:],[]])
 
     if (params.starfusion_url) {
         UNTAR_STARFUSION([[id:params.starfusion_url.tokenize("/")[-1].replaceFirst(/\.tar\.gz$/, "")],params.starfusion_url])
@@ -113,6 +121,7 @@ workflow PREPARE_REFERENCES {
     star_index         = star_index
     // Convert queue channel to value channel so it never gets poison pilled
     refflat            = refflat
+    fasta              = fasta
     fasta_dict         = GATK4_CREATESEQUENCEDICTIONARY.out.dict
     fasta_fai          = SAMTOOLS_FAIDX.out.fai
     rrna_bed           = PREPARE_RRNA.out.rRNA_bed
diff --git a/subworkflows/local/qc.nf b/subworkflows/local/qc.nf
index b9e3bc5..58b4e04 100644
--- a/subworkflows/local/qc.nf
+++ b/subworkflows/local/qc.nf
@@ -17,12 +17,12 @@ workflow QC {
     refflat
     rrna_intervals
     rseqc_bed
+    fasta
     fai
     dict
     baits
 
     main:
-    fasta = params.fasta
     ch_versions = Channel.empty()
 
     BAM_RSEQC(
@@ -35,7 +35,7 @@ workflow QC {
     PICARD_COLLECTRNASEQMETRICS(
         bam,
         refflat,
-        fasta,
+        fasta.map{it[1]}.first(),
         rrna_intervals
     )
     ch_versions = ch_versions.mix(PICARD_COLLECTRNASEQMETRICS.out.versions.first())
@@ -51,9 +51,9 @@ workflow QC {
             }.map{ meta, bam, bai, bait, bait_file, target_file ->
                 [meta, bam, bai, bait_file, target_file]
             },
-        [[:],fasta],
+	fasta,
         fai,
-        dict.map{ dict -> [[:],dict]}
+        dict
     )
 
     multiqc_files = multiqc_files
diff --git a/workflows/forte.nf b/workflows/forte.nf
index ea56c0a..4ccd0d9 100644
--- a/workflows/forte.nf
+++ b/workflows/forte.nf
@@ -129,6 +129,7 @@ workflow FORTE {
         PREPROCESS_READS.out.reads_trimmed,
         PREPROCESS_READS.out.reads_untrimmed,
         PREPARE_REFERENCES.out.star_index,
+	PREPARE_REFERENCES.out.fasta,
         PREPARE_REFERENCES.out.gtf,
         PREPARE_REFERENCES.out.starfusion_ref,
         PREPARE_REFERENCES.out.fusioncatcher_ref,
@@ -152,7 +153,7 @@ workflow FORTE {
         ALIGN_READS.out.bam,
         ALIGN_READS.out.bai,
         MAF_INPUT_CHECK.out.mafs,
-        params.fasta,
+        PREPARE_REFERENCES.out.fasta,
         PREPARE_REFERENCES.out.fasta_fai.map{ it[1] }.first()
     )
     ch_versions = ch_versions.mix(FILLOUT.out.ch_versions)
@@ -168,6 +169,7 @@ workflow FORTE {
         PREPARE_REFERENCES.out.refflat,
         PREPARE_REFERENCES.out.rrna_interval_list,
         PREPARE_REFERENCES.out.rseqc_bed,
+	PREPARE_REFERENCES.out.fasta,
         PREPARE_REFERENCES.out.fasta_fai,
         PREPARE_REFERENCES.out.fasta_dict,
         BAIT_INPUTS.out.baits
@@ -189,6 +191,7 @@ workflow FORTE {
         PREPARE_REFERENCES.out.refflat,
         PREPARE_REFERENCES.out.rrna_interval_list,
         PREPARE_REFERENCES.out.rseqc_bed,
+	PREPARE_REFERENCES.out.fasta,
         PREPARE_REFERENCES.out.fasta_fai,
         PREPARE_REFERENCES.out.fasta_dict,
         BAIT_INPUTS.out.baits

From e206da4aa740416f4810ad4ba2b6561ac9fc5587 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Wed, 25 Sep 2024 17:36:44 -0400
Subject: [PATCH 05/28] fix indentation

---
 conf/igenomes.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/igenomes.config b/conf/igenomes.config
index a653ef7..e39e26d 100644
--- a/conf/igenomes.config
+++ b/conf/igenomes.config
@@ -36,7 +36,7 @@ params {
         'GRCh38' {
             ensembl_version      = 88
             //fasta                = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38Decoy/Sequence/WholeGenomeFasta/genome.fa"
-	    fasta                = "https://ftp.ensembl.org/pub/release-88/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
+            fasta                = "https://ftp.ensembl.org/pub/release-88/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
             gtf                  = "https://ftp.ensembl.org/pub/release-88/gtf/homo_sapiens/Homo_sapiens.GRCh38.88.gtf.gz"
             //forte will generate refflat from gtf
             refflat              = null

From fa7f511494aa66438b29542d8d5faa0c9a4b3d06 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Wed, 25 Sep 2024 21:39:47 -0400
Subject: [PATCH 06/28] fix agfusion download command

---
 modules/local/agfusion/download/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/agfusion/download/main.nf b/modules/local/agfusion/download/main.nf
index 094ca39..44b3b2b 100644
--- a/modules/local/agfusion/download/main.nf
+++ b/modules/local/agfusion/download/main.nf
@@ -31,7 +31,7 @@ process AGFUSION_DOWNLOAD {
 
         pyensembl install --species ${pyensembl_species} --release ${ensembl_release}
 
-        agfusion download -g ${agfusion_genome}
+        agfusion download -s ${pyensembl_species} --release ${ensembl_release}
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":

From 20379c791379c8d9b146c6a182118771283ed45d Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Wed, 25 Sep 2024 21:41:05 -0400
Subject: [PATCH 07/28] clean up view operator

---
 subworkflows/local/fusion.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/fusion.nf b/subworkflows/local/fusion.nf
index fb8525e..28e8546 100644
--- a/subworkflows/local/fusion.nf
+++ b/subworkflows/local/fusion.nf
@@ -53,7 +53,7 @@ workflow FUSION {
         STAR_FOR_ARRIBA.out.bam,
         fasta,
         gtf,
-	arriba_blacklist.map{[[:],it]}.view(),
+	arriba_blacklist.map{[[:],it]},
 	arriba_known_fusions.map{[[:],it]},
         [[:],[]],
         [[:],[]],

From 39744f71f0813a5826b0ad8d8c88ca42a10aa52f Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Thu, 26 Sep 2024 12:13:41 -0400
Subject: [PATCH 08/28] fix fillout pytest

---
 subworkflows/local/fillout.nf | 2 +-
 workflows/forte.nf            | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/subworkflows/local/fillout.nf b/subworkflows/local/fillout.nf
index 3a3703f..9453446 100755
--- a/subworkflows/local/fillout.nf
+++ b/subworkflows/local/fillout.nf
@@ -32,7 +32,7 @@ workflow FILLOUT {
             .map{ meta, bam, bai, variants ->
                 [ meta, bam, bai, variants, "${variants.getBaseName()}.gbcms.maf"]
             },
-        fasta.map{it[1]}.first(),
+        fasta,
         fai
 
     )
diff --git a/workflows/forte.nf b/workflows/forte.nf
index 4ccd0d9..3ce6178 100644
--- a/workflows/forte.nf
+++ b/workflows/forte.nf
@@ -138,9 +138,9 @@ workflow FORTE {
         PREPARE_REFERENCES.out.metafusion_gene_bed,
         PREPARE_REFERENCES.out.metafusion_gene_info,
         PREPARE_REFERENCES.out.metafusion_blocklist,
-        workflow.profile.toString().split(",").contains("test") ? [] : PREPARE_REFERENCES.out.arriba_blacklist,
-        workflow.profile.toString().split(",").contains("test") ? [] : PREPARE_REFERENCES.out.arriba_known_fusions,
-        workflow.profile.toString().split(",").contains("test") ? [] : PREPARE_REFERENCES.out.arriba_protein_domains
+        workflow.profile.toString().split(",").contains("test") ? Channel.of([]) : PREPARE_REFERENCES.out.arriba_blacklist,
+        workflow.profile.toString().split(",").contains("test") ? Channel.of([]) : PREPARE_REFERENCES.out.arriba_known_fusions,
+        workflow.profile.toString().split(",").contains("test") ? Channel.of([]) : PREPARE_REFERENCES.out.arriba_protein_domains
     )
     ch_versions = ch_versions.mix(FUSION.out.ch_versions)
 
@@ -153,7 +153,7 @@ workflow FORTE {
         ALIGN_READS.out.bam,
         ALIGN_READS.out.bai,
         MAF_INPUT_CHECK.out.mafs,
-        PREPARE_REFERENCES.out.fasta,
+        PREPARE_REFERENCES.out.fasta.map{ it[1] }.first(),
         PREPARE_REFERENCES.out.fasta_fai.map{ it[1] }.first()
     )
     ch_versions = ch_versions.mix(FILLOUT.out.ch_versions)

From 01b2981ac91558280463edfaaf5ee85df3f964d8 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Thu, 26 Sep 2024 14:08:54 -0400
Subject: [PATCH 09/28] change pfam reference to stagnant release

---
 modules/local/agfusion/download/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/agfusion/download/main.nf b/modules/local/agfusion/download/main.nf
index 44b3b2b..ef4b80a 100644
--- a/modules/local/agfusion/download/main.nf
+++ b/modules/local/agfusion/download/main.nf
@@ -44,7 +44,7 @@ process AGFUSION_DOWNLOAD {
 
         pyensembl install --species ${pyensembl_species} --release ${ensembl_release}
 
-        curl http://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/database_files/pfamA.txt.gz > pfamA.txt.gz
+        curl http://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam37.0/database_files/pfamA.txt.gz > pfamA.txt.gz
         gunzip pfamA.txt.gz
         agfusion build --dir . --species ${agfusion_genome} --release ${ensembl_release} --pfam pfamA.txt
         rm pfamA.txt

From 732558981fab5a4b55cc54010ed4f69063921586 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Thu, 26 Sep 2024 16:23:52 -0400
Subject: [PATCH 10/28] add poison pill to reference channels

---
 subworkflows/local/prepare_references.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf
index d5a02dc..925571b 100644
--- a/subworkflows/local/prepare_references.nf
+++ b/subworkflows/local/prepare_references.nf
@@ -29,21 +29,21 @@ workflow PREPARE_REFERENCES {
         GUNZIP_FASTA([[id:params.genome],params.fasta])
         fasta = GUNZIP_FASTA.out.gunzip.first()
     } else {
-        fasta = Channel.of([[id:params.genome],params.fasta])
+        fasta = Channel.of([[id:params.genome],params.fasta]).first()
     }
 
     if (params.gtf.endsWith(".gz")){
         GUNZIP_GTF([[id:params.genome],params.gtf])
         gtf = GUNZIP_GTF.out.gunzip.first()
     } else {
-        gtf = Channel.of([[id:params.genome],params.gtf])
+        gtf = Channel.of([[id:params.genome],params.gtf]).first()
     }
 
     if (params.metafusion_blocklist.endsWith(".gz")){
         GUNZIP_METAFUSIONBLOCKLIST([[:],params.metafusion_blocklist])
         metafusion_blocklist = GUNZIP_METAFUSIONBLOCKLIST.out.gunzip.map{ it[1] }.first()
     } else {
-        metafusion_blocklist = params.metafusion_blocklist
+        metafusion_blocklist = Channel.of(params.metafusion_blocklist).first()
     }
 
     STAR_GENOMEGENERATE(
@@ -51,7 +51,7 @@ workflow PREPARE_REFERENCES {
         gtf
     )
     ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
-    star_index = STAR_GENOMEGENERATE.out.index
+    star_index = STAR_GENOMEGENERATE.out.index.first()
 
     UCSC_GTFTOGENEPRED(gtf)
     ch_versions = ch_versions.mix(UCSC_GTFTOGENEPRED.out.versions)

From c2041194528fce10d2d95eb8d5a3c08af0b0af07 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Thu, 26 Sep 2024 16:51:49 -0400
Subject: [PATCH 11/28] update md5sum in test_profile test

---
 tests/small_test/test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/small_test/test.yml b/tests/small_test/test.yml
index 50ff7f1..508e2ff 100755
--- a/tests/small_test/test.yml
+++ b/tests/small_test/test.yml
@@ -4,11 +4,11 @@
     - test_profile
   files:
     - path: output/analysis/SAMPLE_PAIRED_END/STAR/SAMPLE_PAIRED_END.Aligned.sortedByCoord.out.bam
-      md5sum: 781acdb8d313482de17a2933e18bb97a
+      md5sum: e46db0148604c6937a2cc7535d934292
     - path: output/analysis/SAMPLE_PAIRED_END_UMI/STAR/SAMPLE_PAIRED_END_UMI.Aligned.sortedByCoord.out.bam
     - path: output/analysis/SAMPLE_SINGLE_END/STAR/SAMPLE_SINGLE_END.Aligned.sortedByCoord.out.bam
     - path: output/analysis/SAMPLE_SINGLE_END/arriba/SAMPLE_SINGLE_END.fusions.discarded.tsv
-      md5sum: da3e17e01697fe9990fd545e1e26b822
+      md5sum: 9daf6f31ee9a90b6b263bf5ae28dbe96
     - path: output/analysis/SAMPLE_SINGLE_END/arriba/SAMPLE_SINGLE_END.fusions.tsv
       md5sum: 7c3383f7eb6d79b84b0bd30a7ef02d70
     - path: output/pipeline_info/software_versions.yml

From 7c5dc285876fe78398542e47e372b60a6c6ba4b0 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Thu, 26 Sep 2024 17:40:26 -0400
Subject: [PATCH 12/28] update ensembl version to 112

---
 conf/igenomes.config | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/conf/igenomes.config b/conf/igenomes.config
index e39e26d..83ecb5d 100644
--- a/conf/igenomes.config
+++ b/conf/igenomes.config
@@ -34,14 +34,14 @@ params {
             ensembl_version = 75
         }
         'GRCh38' {
-            ensembl_version      = 88
+            ensembl_version      = 112
             //fasta                = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38Decoy/Sequence/WholeGenomeFasta/genome.fa"
-            fasta                = "https://ftp.ensembl.org/pub/release-88/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
-            gtf                  = "https://ftp.ensembl.org/pub/release-88/gtf/homo_sapiens/Homo_sapiens.GRCh38.88.gtf.gz"
+            fasta                = "https://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
+            gtf                  = "https://ftp.ensembl.org/pub/release-112/gtf/homo_sapiens/Homo_sapiens.GRCh38.112.gtf.gz"
             //forte will generate refflat from gtf
             refflat              = null
             starfusion_url       = "https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_StarFv1.10/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz"
-            cdna                 = "https://ftp.ensembl.org/pub/release-88/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"
+            cdna                 = "https://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"
             metafusion_blocklist = "https://raw.githubusercontent.com/anoronh4/forte-references/main/GRCh38/blocklist_breakpoints.hg38.bedpe.gz"
         }
         'smallGRCh37' {

From d8e423ff52a89ee0dffe7a370de4cb0cd99cc06b Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Thu, 26 Sep 2024 17:48:28 -0400
Subject: [PATCH 13/28] update CHANGELOG.md

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a59f682..95508cf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - [#118](https://github.com/mskcc/forte/pull/118) - change the way the plug-n-play starfusion reference is downloaded.
 
+- [#128](https://github.com/mskcc/forte/pull/128) - full support for GRCh38 added
+
 ### `Fixed`
 
 - [#119](https://github.com/mskcc/forte/pull/119) - change script error behavior in METAFUSION_RUN process

From 4966057c714ce963aee2a645e613d278e9a709db Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Tue, 1 Oct 2024 11:41:47 -0400
Subject: [PATCH 14/28] change ensembl version to 111

---
 conf/igenomes.config | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/conf/igenomes.config b/conf/igenomes.config
index 83ecb5d..89826d2 100644
--- a/conf/igenomes.config
+++ b/conf/igenomes.config
@@ -34,14 +34,14 @@ params {
             ensembl_version = 75
         }
         'GRCh38' {
-            ensembl_version      = 112
+            ensembl_version      = 111
             //fasta                = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38Decoy/Sequence/WholeGenomeFasta/genome.fa"
-            fasta                = "https://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
-            gtf                  = "https://ftp.ensembl.org/pub/release-112/gtf/homo_sapiens/Homo_sapiens.GRCh38.112.gtf.gz"
+            fasta                = "https://ftp.ensembl.org/pub/release-111/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
+            gtf                  = "https://ftp.ensembl.org/pub/release-111/gtf/homo_sapiens/Homo_sapiens.GRCh38.111.gtf.gz"
             //forte will generate refflat from gtf
             refflat              = null
             starfusion_url       = "https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_StarFv1.10/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz"
-            cdna                 = "https://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"
+            cdna                 = "https://ftp.ensembl.org/pub/release-111/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"
             metafusion_blocklist = "https://raw.githubusercontent.com/anoronh4/forte-references/main/GRCh38/blocklist_breakpoints.hg38.bedpe.gz"
         }
         'smallGRCh37' {

From 3bbaecfda0ace815dbb4969ded60a2b18327aab7 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <9613506+anoronh4@users.noreply.github.com>
Date: Tue, 1 Oct 2024 15:12:08 -0400
Subject: [PATCH 15/28] update AGFusion to v1.4.3@mskcc.1

---
 modules/local/agfusion/batch/main.nf        |  4 +--
 modules/local/agfusion/container/Dockerfile | 27 ++++++++++++++++-----
 modules/local/agfusion/download/main.nf     |  8 +++---
 3 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/modules/local/agfusion/batch/main.nf b/modules/local/agfusion/batch/main.nf
index e8de04d..398a21b 100644
--- a/modules/local/agfusion/batch/main.nf
+++ b/modules/local/agfusion/batch/main.nf
@@ -5,8 +5,8 @@ process AGFUSION_BATCH {
     // Note: 2.7X indices incompatible with AWS iGenomes.
     conda 'bioconda::agfusion=1.252'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'docker://cmopipeline/agfusion:0.0.6' :
-        'docker.io/cmopipeline/agfusion:0.0.6' }"
+        'docker://cmopipeline/agfusion:0.0.7' :
+        'docker.io/cmopipeline/agfusion:0.0.7' }"
 
     input:
     tuple val(meta), path(fusions)
diff --git a/modules/local/agfusion/container/Dockerfile b/modules/local/agfusion/container/Dockerfile
index eaca5d5..a455f44 100755
--- a/modules/local/agfusion/container/Dockerfile
+++ b/modules/local/agfusion/container/Dockerfile
@@ -1,14 +1,30 @@
-FROM ubuntu:bionic-20230530
+FROM ubuntu:jammy-20240911.1
 
 LABEL maintainer="Anne Marie Noronha (noronhaa@mskcc.org)" \
-    version.image="0.0.6"
+    version.image="0.0.7"
 
 # INSTALL DEPENDENCIES
 
 ENV DEBIAN_FRONTEND=noninteractive
 
 RUN apt-get update -y
-RUN apt-get install -y build-essential python3 python3-pip python3-matplotlib python3-pandas python3-future python3-biopython curl less vim libnss-sss git zip
+RUN apt-get install -y \
+    build-essential \
+    python3 \
+    python3-pip \
+    python3-matplotlib \
+    python3-pandas \
+    python3-future \
+    python3-biopython \
+    python3-dev \
+    default-libmysqlclient-dev \
+    pkg-config \
+    curl \
+    less \
+    vim \
+    libnss-sss \
+    git \
+    zip
 RUN pip3 install --upgrade pip
 RUN pip3 install pyensembl
 
@@ -18,9 +34,8 @@ RUN pip3 install mysqlclient
 
 # INSTALL AGFUSION & DATABASE FILES
 WORKDIR /usr/local/bin
-RUN git clone https://github.com/mskcc/AGFusion.git --branch v1.4.1-fork1 --single-branch
+RUN git clone https://github.com/mskcc/AGFusion.git --branch v1.4.3@mskcc.1 --single-branch
 WORKDIR /usr/local/bin/AGFusion
+RUN pip3 install -r requirements.txt
 RUN pip3 install .
 
-# downgrade pyensembl for compatibility
-RUN pip3 install gtfparse==1.2.1 --upgrade
diff --git a/modules/local/agfusion/download/main.nf b/modules/local/agfusion/download/main.nf
index 094ca39..513384a 100644
--- a/modules/local/agfusion/download/main.nf
+++ b/modules/local/agfusion/download/main.nf
@@ -4,8 +4,8 @@ process AGFUSION_DOWNLOAD {
     // Note: 2.7X indices incompatible with AWS iGenomes.
     conda 'bioconda::agfusion=1.252'
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'docker://cmopipeline/agfusion:0.0.6' :
-        'docker.io/cmopipeline/agfusion:0.0.6' }"
+        'docker://cmopipeline/agfusion:0.0.7' :
+        'docker.io/cmopipeline/agfusion:0.0.7' }"
 
     input:
     val(ensembl_release)
@@ -25,13 +25,13 @@ process AGFUSION_DOWNLOAD {
         ['GRCh38','hg38'].contains(genome) ? 'hg38' :
         ['GRCm38','mm10'].contains(genome) ? 'mm10' : ''
     def pyensembl_species = ['GRCm38','mm10'].contains(genome) ? 'mus_musculus' : 'homo_sapiens'
-    if (ensembl_release < 93) {
+    if (ensembl_release < 112) {
         """
         export PYENSEMBL_CACHE_DIR=\$PWD/pyensembl_cache
 
         pyensembl install --species ${pyensembl_species} --release ${ensembl_release}
 
-        agfusion download -g ${agfusion_genome}
+        agfusion download -s ${pyensembl_species} -r ${ensembl_release}
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":

From cd06afcff6e1528ee4bdcf49267171f324a425da Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Wed, 2 Oct 2024 13:07:01 -0400
Subject: [PATCH 16/28] add cpus in AGAT_SPADDINTRONS resources

---
 conf/modules.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conf/modules.config b/conf/modules.config
index 5f15d4e..8d912d2 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -210,6 +210,7 @@ process {
         ]
     }
     withName: 'AGAT_SPADDINTRONS' {
+	cpus = { 4 * task.attempt }
         storeDir = { "${params.reference_base}/${params.genome}/metafusion/introns" }
         publishDir = [
             enabled: false,

From 3bfeb91eea29c90b443a3c54611d1078955e7b31 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Wed, 2 Oct 2024 13:09:11 -0400
Subject: [PATCH 17/28] fix indentation

---
 conf/modules.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules.config b/conf/modules.config
index 8d912d2..33b1603 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -210,7 +210,7 @@ process {
         ]
     }
     withName: 'AGAT_SPADDINTRONS' {
-	cpus = { 4 * task.attempt }
+        cpus = { 4 * task.attempt }
         storeDir = { "${params.reference_base}/${params.genome}/metafusion/introns" }
         publishDir = [
             enabled: false,

From 9147321e10c9e0c16e358869fba41932375dc8ef Mon Sep 17 00:00:00 2001
From: pintoa1-mskcc <pintoa1@mskcc.org>
Date: Wed, 2 Oct 2024 13:11:38 -0400
Subject: [PATCH 18/28] Set gene_id as gene_name for lncRNAs, remove NF
 transcripts from gene bed

---
 bin/final_generate_v75_gene_bed.R                     |  7 +++++--
 bin/make_gene_info_for_forte.R                        |  2 +-
 modules/local/metafusion/genebed/main.nf              |  4 ++--
 .../resources/usr/bin/final_generate_v75_gene_bed.R   | 11 +++++++----
 .../resources/usr/bin/make_gene_info_for_forte.R      |  2 +-
 5 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/bin/final_generate_v75_gene_bed.R b/bin/final_generate_v75_gene_bed.R
index ffcb064..156cfbc 100755
--- a/bin/final_generate_v75_gene_bed.R
+++ b/bin/final_generate_v75_gene_bed.R
@@ -3,7 +3,7 @@
 # __author__      = "Alexandria Dymun"
 # __email__       = "pintoa1@mskcc.org"
 # __contributor__ = "Anne Marie Noronha (noronhaa@mskcc.org)"
-# __version__     = "0.0.1"
+# __version__     = "0.0.2"
 # __status__      = "Dev"
 
 
@@ -35,6 +35,8 @@ if (length(args)!=2) {
 
 gtf <- rtracklayer::import(args[1])
 gtf_df <- as.data.frame(gtf)
+#remove incomplete transcripts mRNA_end_NF and mRNA_start_NF (not finished)
+gtf_df <- gtf_df[!grepl("NF",gtf_df$tag),]
 
 file.to_write <- args[2]
 
@@ -43,7 +45,8 @@ gtf_df <- gtf_df %>%
         chr = seqnames
     ) %>%
     select(c(chr, start, end, transcript_id, type, strand, gene_name, gene_id)) %>%
-    filter(type %in% c("exon","intron","UTR","CDS","cds","utr"))
+    filter(type %in% c("exon","intron","UTR","CDS","cds","utr")) %>% 
+    mutate(gene_name = ifelse(is.na(gene_name),gene_id,gene_name))
 
 
 #START CLOCK
diff --git a/bin/make_gene_info_for_forte.R b/bin/make_gene_info_for_forte.R
index 2ab3dfd..08ac644 100755
--- a/bin/make_gene_info_for_forte.R
+++ b/bin/make_gene_info_for_forte.R
@@ -106,7 +106,7 @@ gene_info <- rbind(gene_info,add_these_excess_gene_ids)
 gene_info <- merge(gene_info,do.call(rbind,unique_id_to_names[versioned_gtf])[,c("gene_id","gene_id_with_version")],by = "gene_id",all.x = T, all.y = F)
 
 gene_info$Synonyms <- ifelse(is.na(gene_info$gene_id_with_version),gene_info$gene_id,paste0(gene_info$gene_id,"|",gene_info$gene_id_with_version))
-gene_info$Symbol <- gene_info$gene_name
+gene_info$Symbol <- ifelse(is.na(gene_info$gene_name), gene_info$gene_id, gene_info$gene_name)
 
 gene_info <- gene_info[,c("Symbol","Synonyms")]
 
diff --git a/modules/local/metafusion/genebed/main.nf b/modules/local/metafusion/genebed/main.nf
index 2314c40..1936846 100644
--- a/modules/local/metafusion/genebed/main.nf
+++ b/modules/local/metafusion/genebed/main.nf
@@ -28,7 +28,7 @@ process METAFUSION_GENEBED {
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         R: \$(R --version | head -n1)
-        final_generate_v75_gene_bed.R: 0.0.1
+        final_generate_v75_gene_bed.R: 0.0.2
     END_VERSIONS
     """
 
@@ -41,7 +41,7 @@ process METAFUSION_GENEBED {
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         R: \$(R --version | head -n1)
-        final_generate_v75_gene_bed.R: 0.0.1
+        final_generate_v75_gene_bed.R: 0.0.2
     END_VERSIONS
     """
 }
diff --git a/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v75_gene_bed.R b/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v75_gene_bed.R
index 46a5d15..156cfbc 100755
--- a/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v75_gene_bed.R
+++ b/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v75_gene_bed.R
@@ -1,14 +1,14 @@
-
 #!/usr/local/bin/Rscript
+
 # __author__      = "Alexandria Dymun"
 # __email__       = "pintoa1@mskcc.org"
 # __contributor__ = "Anne Marie Noronha (noronhaa@mskcc.org)"
-# __version__     = "0.0.1"
+# __version__     = "0.0.2"
 # __status__      = "Dev"
 
 
 suppressPackageStartupMessages({
-    library(plyr)
+#    library(plyr)
     library(dplyr)
     library(data.table)
     library(stringr)
@@ -35,6 +35,8 @@ if (length(args)!=2) {
 
 gtf <- rtracklayer::import(args[1])
 gtf_df <- as.data.frame(gtf)
+#remove incomplete transcripts mRNA_end_NF and mRNA_start_NF (not finished)
+gtf_df <- gtf_df[!grepl("NF",gtf_df$tag),]
 
 file.to_write <- args[2]
 
@@ -43,7 +45,8 @@ gtf_df <- gtf_df %>%
         chr = seqnames
     ) %>%
     select(c(chr, start, end, transcript_id, type, strand, gene_name, gene_id)) %>%
-    filter(type %in% c("exon","intron","UTR","CDS","cds","utr"))
+    filter(type %in% c("exon","intron","UTR","CDS","cds","utr")) %>% 
+    mutate(gene_name = ifelse(is.na(gene_name),gene_id,gene_name))
 
 
 #START CLOCK
diff --git a/modules/local/metafusion/geneinfo/resources/usr/bin/make_gene_info_for_forte.R b/modules/local/metafusion/geneinfo/resources/usr/bin/make_gene_info_for_forte.R
index 2ab3dfd..08ac644 100755
--- a/modules/local/metafusion/geneinfo/resources/usr/bin/make_gene_info_for_forte.R
+++ b/modules/local/metafusion/geneinfo/resources/usr/bin/make_gene_info_for_forte.R
@@ -106,7 +106,7 @@ gene_info <- rbind(gene_info,add_these_excess_gene_ids)
 gene_info <- merge(gene_info,do.call(rbind,unique_id_to_names[versioned_gtf])[,c("gene_id","gene_id_with_version")],by = "gene_id",all.x = T, all.y = F)
 
 gene_info$Synonyms <- ifelse(is.na(gene_info$gene_id_with_version),gene_info$gene_id,paste0(gene_info$gene_id,"|",gene_info$gene_id_with_version))
-gene_info$Symbol <- gene_info$gene_name
+gene_info$Symbol <- ifelse(is.na(gene_info$gene_name), gene_info$gene_id, gene_info$gene_name)
 
 gene_info <- gene_info[,c("Symbol","Synonyms")]
 

From ad507432d783d61b98d92c0ddf8c2bb259a76ad5 Mon Sep 17 00:00:00 2001
From: pintoa1-mskcc <pintoa1@mskcc.org>
Date: Wed, 2 Oct 2024 14:10:34 -0400
Subject: [PATCH 19/28] fix linting error, ensure no scientific notation in
 gene bed

---
 bin/final_generate_v75_gene_bed.R                              | 3 ++-
 .../genebed/resources/usr/bin/final_generate_v75_gene_bed.R    | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/bin/final_generate_v75_gene_bed.R b/bin/final_generate_v75_gene_bed.R
index 156cfbc..f79c712 100755
--- a/bin/final_generate_v75_gene_bed.R
+++ b/bin/final_generate_v75_gene_bed.R
@@ -12,6 +12,7 @@ suppressPackageStartupMessages({
     library(dplyr)
     library(data.table)
     library(stringr)
+    options(scipen = 999)
 })
 
 usage <- function() {
@@ -45,7 +46,7 @@ gtf_df <- gtf_df %>%
         chr = seqnames
     ) %>%
     select(c(chr, start, end, transcript_id, type, strand, gene_name, gene_id)) %>%
-    filter(type %in% c("exon","intron","UTR","CDS","cds","utr")) %>% 
+    filter(type %in% c("exon","intron","UTR","CDS","cds","utr")) %>%
     mutate(gene_name = ifelse(is.na(gene_name),gene_id,gene_name))
 
 
diff --git a/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v75_gene_bed.R b/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v75_gene_bed.R
index 156cfbc..f79c712 100755
--- a/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v75_gene_bed.R
+++ b/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v75_gene_bed.R
@@ -12,6 +12,7 @@ suppressPackageStartupMessages({
     library(dplyr)
     library(data.table)
     library(stringr)
+    options(scipen = 999)
 })
 
 usage <- function() {
@@ -45,7 +46,7 @@ gtf_df <- gtf_df %>%
         chr = seqnames
     ) %>%
     select(c(chr, start, end, transcript_id, type, strand, gene_name, gene_id)) %>%
-    filter(type %in% c("exon","intron","UTR","CDS","cds","utr")) %>% 
+    filter(type %in% c("exon","intron","UTR","CDS","cds","utr")) %>%
     mutate(gene_name = ifelse(is.na(gene_name),gene_id,gene_name))
 
 

From 95e737e24d10ed753aedba2defce835e38185644 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Wed, 9 Oct 2024 21:08:03 -0400
Subject: [PATCH 20/28] remove deprecated arriba installation

---
 modules.json                    |  5 ---
 modules/nf-core/arriba/main.nf  | 66 -----------------------------
 modules/nf-core/arriba/meta.yml | 74 ---------------------------------
 3 files changed, 145 deletions(-)
 delete mode 100644 modules/nf-core/arriba/main.nf
 delete mode 100644 modules/nf-core/arriba/meta.yml

diff --git a/modules.json b/modules.json
index 5a728db..c89decd 100644
--- a/modules.json
+++ b/modules.json
@@ -21,11 +21,6 @@
                         "git_sha": "6898156da3604a6bdf26c36036053a970050fea0",
                         "installed_by": ["modules"]
                     },
-                    "arriba": {
-                        "branch": "master",
-                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
-                    },
                     "arriba/arriba": {
                         "branch": "master",
                         "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
diff --git a/modules/nf-core/arriba/main.nf b/modules/nf-core/arriba/main.nf
deleted file mode 100644
index e4b48be..0000000
--- a/modules/nf-core/arriba/main.nf
+++ /dev/null
@@ -1,66 +0,0 @@
-process ARRIBA {
-    tag "$meta.id"
-    label 'process_medium'
-
-    conda "bioconda::arriba=2.3.0"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/arriba:2.3.0--haa8aa89_0' :
-        'quay.io/biocontainers/arriba:2.3.0--haa8aa89_0' }"
-
-    input:
-    tuple val(meta), path(bam)
-    path fasta
-    path gtf
-    path blacklist
-    path known_fusions
-    path structural_variants
-    path tags
-    path protein_domains
-
-    output:
-    tuple val(meta), path("*.fusions.tsv")          , emit: fusions
-    tuple val(meta), path("*.fusions.discarded.tsv"), emit: fusions_fail
-    path "versions.yml"                             , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def blacklist = blacklist ? "-b $blacklist" : "-f blacklist"
-    def known_fusions = known_fusions ? "-k $known_fusions" : ""
-    def structural_variants = structural_variants ? "-d $structual_variants" : ""
-    def tags = tags ? "-t $tags" : ""
-    def protein_domains = protein_domains ? "-p $protein_domains" : ""
-
-    """
-    arriba \\
-        -x $bam \\
-        -a $fasta \\
-        -g $gtf \\
-        -o ${prefix}.fusions.tsv \\
-        -O ${prefix}.fusions.discarded.tsv \\
-        $blacklist \\
-        $known_fusions \\
-        $structural_variants \\
-        $tags \\
-        $protein_domains \\
-        $args
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        arriba: \$(arriba -h | grep 'Version:' 2>&1 |  sed 's/Version:\s//')
-    END_VERSIONS
-    """
-
-    stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    echo stub > ${prefix}.fusions.tsv
-    echo stub > ${prefix}.fusions.discarded.tsv
-
-    echo "${task.process}:" > versions.yml
-    echo ' arriba: 2.2.1' >> versions.yml
-    """
-}
diff --git a/modules/nf-core/arriba/meta.yml b/modules/nf-core/arriba/meta.yml
deleted file mode 100644
index 119dd91..0000000
--- a/modules/nf-core/arriba/meta.yml
+++ /dev/null
@@ -1,74 +0,0 @@
-name: arriba
-description: Arriba is a command-line tool for the detection of gene fusions from RNA-Seq data.
-keywords:
-  - fusion
-  - arriba
-tools:
-  - arriba:
-      description: Fast and accurate gene fusion detection from RNA-Seq data
-      homepage: https://github.com/suhrig/arriba
-      documentation: https://arriba.readthedocs.io/en/latest/
-      tool_dev_url: https://github.com/suhrig/arriba
-      doi: "10.1101/gr.257246.119"
-      licence: ["MIT"]
-
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - bam:
-      type: file
-      description: BAM/CRAM/SAM file
-      pattern: "*.{bam,cram,sam}"
-  - fasta:
-      type: file
-      description: Assembly FASTA file
-      pattern: "*.{fasta}"
-  - gtf:
-      type: file
-      description: Annotation GTF file
-      pattern: "*.{gtf}"
-  - blacklist:
-      type: file
-      description: Blacklist file
-      pattern: "*.{tsv}"
-  - known_fusions:
-      type: file
-      description: Known fusions file
-      pattern: "*.{tsv}"
-  - structural_variants:
-      type: file
-      description: Structural variants file
-      pattern: "*.{tsv}"
-  - tags:
-      type: file
-      description: Tags file
-      pattern: "*.{tsv}"
-  - protein_domains:
-      type: file
-      description: Protein domains file
-      pattern: "*.{gff3}"
-
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-  - fusions:
-      type: file
-      description: File contains fusions which pass all of Arriba's filters.
-      pattern: "*.{fusions.tsv}"
-  - fusions_fail:
-      type: file
-      description: File contains fusions that Arriba classified as an artifact or that are also observed in healthy tissue.
-      pattern: "*.{fusions.discarded.tsv}"
-
-authors:
-  - "@praveenraj2018,@rannick"

From 3d97ab86b026669b51e2686a6585769faa869075 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Thu, 10 Oct 2024 10:15:38 -0400
Subject: [PATCH 21/28] update to decoy fasta

---
 conf/igenomes.config                          |  4 +--
 .../local/fastaremoveprefix/environment.yml   |  5 +++
 modules/local/fastaremoveprefix/main.nf       | 32 +++++++++++++++++++
 modules/local/prepare_rrna/main.nf            |  5 +--
 subworkflows/local/prepare_references.nf      |  6 ++++
 5 files changed, 48 insertions(+), 4 deletions(-)
 create mode 100644 modules/local/fastaremoveprefix/environment.yml
 create mode 100644 modules/local/fastaremoveprefix/main.nf

diff --git a/conf/igenomes.config b/conf/igenomes.config
index 89826d2..69c949b 100644
--- a/conf/igenomes.config
+++ b/conf/igenomes.config
@@ -35,8 +35,8 @@ params {
         }
         'GRCh38' {
             ensembl_version      = 111
-            //fasta                = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38Decoy/Sequence/WholeGenomeFasta/genome.fa"
-            fasta                = "https://ftp.ensembl.org/pub/release-111/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
+            fasta                = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38Decoy/Sequence/WholeGenomeFasta/genome.fa"
+            //fasta                = "https://ftp.ensembl.org/pub/release-111/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
             gtf                  = "https://ftp.ensembl.org/pub/release-111/gtf/homo_sapiens/Homo_sapiens.GRCh38.111.gtf.gz"
             //forte will generate refflat from gtf
             refflat              = null
diff --git a/modules/local/fastaremoveprefix/environment.yml b/modules/local/fastaremoveprefix/environment.yml
new file mode 100644
index 0000000..315f6dc
--- /dev/null
+++ b/modules/local/fastaremoveprefix/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::gawk=5.3.0
diff --git a/modules/local/fastaremoveprefix/main.nf b/modules/local/fastaremoveprefix/main.nf
new file mode 100644
index 0000000..71a790f
--- /dev/null
+++ b/modules/local/fastaremoveprefix/main.nf
@@ -0,0 +1,32 @@
+process FASTAREMOVEPREFIX {
+    tag "$fasta"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gawk:5.3.0' :
+        'biocontainers/gawk:5.3.0' }"
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    input:
+    tuple val(meta), path(fasta, name: 'input/*')
+
+    output:
+    tuple val(meta), path("*.{fa,fasta}"), emit: fasta
+    path "versions.yml"                  , emit: versions
+
+    script:
+    def modified_fasta = fasta.fileName.name
+    """
+    cat ${fasta} | sed "s/^>chr/>/g" > ${modified_fasta}
+
+    cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
+    END_VERSIONS
+    """
+
+
+}
diff --git a/modules/local/prepare_rrna/main.nf b/modules/local/prepare_rrna/main.nf
index 37ec774..8d5ab0a 100644
--- a/modules/local/prepare_rrna/main.nf
+++ b/modules/local/prepare_rrna/main.nf
@@ -14,6 +14,7 @@ process PREPARE_RRNA {
     path "rna.bed", emit: rRNA_bed
 
     script:
+    def extra_filter_chr = params.genome == "GRCh38" ? "|^GL000220|^KI270733" : ""
     if (gtf) {
         """
         (${"${gtf}".endsWith(".gz") ? "z" : ""}grep "rRNA" ${gtf} || true) | \\
@@ -23,7 +24,7 @@ process PREPARE_RRNA {
                 /transcript_id "([^"]+)"/ or die "no transcript_id on \$.";
                 print join "\t", (@F[0,1,2,3], \$1)
             ' | \\
-            (grep -vP "^HG|^HSCHR" || true) | \\
+            (grep -vP "^HG|^HSCHR${extra_filter_chr}" || true) | \\
             sort -k1V -k2n -k3n \\
             > rna.bed
 
@@ -32,7 +33,7 @@ process PREPARE_RRNA {
         """
         (${"${refflat}".endsWith(".gz") ? "z" : ""}grep -P "^RNA5|^RNA1|^RNA2" ${refflat} || true) | \\
             awk -F"\\t" -v OFS="\\t" '{ print \$3,\$5,\$6,\$4,\$2 }' | \\
-            (grep -vP "^HG|^HSCHR" || true) | \\
+            (grep -vP "^HG|^HSCHR${extra_filter_chr}" || true) | \\
             sort -k1V -k2n -k3n \\
             > rna.bed
         """
diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf
index 925571b..698f376 100644
--- a/subworkflows/local/prepare_references.nf
+++ b/subworkflows/local/prepare_references.nf
@@ -19,6 +19,7 @@ include { AGFUSION_DOWNLOAD              } from '../../modules/local/agfusion/do
 include { AGAT_SPADDINTRONS              } from '../../modules/nf-core/agat/spaddintrons/main'
 include { METAFUSION_GENEBED             } from '../../modules/local/metafusion/genebed/main'
 include { METAFUSION_GENEINFO            } from '../../modules/local/metafusion/geneinfo/main'
+include { FASTAREMOVEPREFIX              } from '../../modules/local/fastaremoveprefix/main'
 
 workflow PREPARE_REFERENCES {
 
@@ -32,6 +33,11 @@ workflow PREPARE_REFERENCES {
         fasta = Channel.of([[id:params.genome],params.fasta]).first()
     }
 
+    if (params.genome == "GRCh38" ){
+        FASTAREMOVEPREFIX(fasta)
+	fasta = FASTAREMOVEPREFIX.out.fasta
+    }
+
     if (params.gtf.endsWith(".gz")){
         GUNZIP_GTF([[id:params.genome],params.gtf])
         gtf = GUNZIP_GTF.out.gunzip.first()

From 7f8c7c05ee22a5d4a8642ca2106ecf227d4c8431 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Thu, 10 Oct 2024 11:21:43 -0400
Subject: [PATCH 22/28] add storeDir directives for gunzip* and
 FASTAREMOVEPREFIX

---
 conf/modules.config | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/conf/modules.config b/conf/modules.config
index 33b1603..400d3c8 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -27,6 +27,14 @@ process {
         ]
     }
 
+    withName: '.*:PREPARE_REFERENCES:GUNZIP.*' {
+        storeDir = { "${params.reference_base}/${params.genome}/${task.process.tokenize(':')[-1].toLowerCase()}" }
+    }
+
+    withName: 'FASTAREMOVEPREFIX' {
+        storeDir = { "${params.reference_base}/${params.genome}/fasta" }
+    }
+
     withName: 'MSKCC_FORTE:FORTE:MULTIQC' {
         publishDir = [
             path: { "${report.folder}/report" },

From 709317a0abd709d03bb35203af5a4ec79e3db029 Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Thu, 10 Oct 2024 16:45:28 -0400
Subject: [PATCH 23/28] change chromosome M to MT in fasta

---
 modules/local/fastaremoveprefix/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/fastaremoveprefix/main.nf b/modules/local/fastaremoveprefix/main.nf
index 71a790f..c7ebf26 100644
--- a/modules/local/fastaremoveprefix/main.nf
+++ b/modules/local/fastaremoveprefix/main.nf
@@ -20,7 +20,7 @@ process FASTAREMOVEPREFIX {
     script:
     def modified_fasta = fasta.fileName.name
     """
-    cat ${fasta} | sed "s/^>chr/>/g" > ${modified_fasta}
+    cat ${fasta} | sed "s/^>chr/>/g" | sed "s/^>M />MT /g" > ${modified_fasta}
 
     cat <<-END_VERSIONS > versions.yml
         "${task.process}":

From a70828dbb9e348a308f881b9c7aae1efbf3e875b Mon Sep 17 00:00:00 2001
From: Anne Marie Noronha <anoronh4@users.noreply.github.com>
Date: Fri, 11 Oct 2024 15:34:12 -0400
Subject: [PATCH 24/28] add idt_v2 baits for GRCh38 to reference config

---
 conf/igenomes.config | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/conf/igenomes.config b/conf/igenomes.config
index 69c949b..cfb81b7 100644
--- a/conf/igenomes.config
+++ b/conf/igenomes.config
@@ -43,6 +43,12 @@ params {
             starfusion_url       = "https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_StarFv1.10/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz"
             cdna                 = "https://ftp.ensembl.org/pub/release-111/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"
             metafusion_blocklist = "https://raw.githubusercontent.com/anoronh4/forte-references/main/GRCh38/blocklist_breakpoints.hg38.bedpe.gz"
+            baits {
+                'idt_v2' {
+                    targets = "/juno/work/ccs/cmopipeline/forte/GRCh38_probes/xgen-exome-hyb-panel-v2-targets-hg38.bed"
+                    baits   = "/juno/work/ccs/cmopipeline/forte/GRCh38_probes/xgen-exome-hyb-panel-v2-probes-hg38.bed"
+                }
+            }
         }
         'smallGRCh37' {
             fasta          = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta"

From 612a80a4898f7117cfa95ec4ca7713cf73df5780 Mon Sep 17 00:00:00 2001
From: Alexandria Pinto <pintoa1@mskcc.org>
Date: Thu, 7 Nov 2024 15:52:33 -0500
Subject: [PATCH 25/28] edit genebed generation to function with v111
 nomenclature

---
 bin/final_generate_v111_gene_bed.R            | 122 ++++++++++++++++++
 modules/local/metafusion/genebed/main.nf      |  69 ++++++----
 .../usr/bin/final_generate_v111_gene_bed.R    | 122 ++++++++++++++++++
 subworkflows/local/prepare_references.nf      |   3 +-
 4 files changed, 291 insertions(+), 25 deletions(-)
 create mode 100755 bin/final_generate_v111_gene_bed.R
 create mode 100755 modules/local/metafusion/genebed/resources/usr/bin/final_generate_v111_gene_bed.R

diff --git a/bin/final_generate_v111_gene_bed.R b/bin/final_generate_v111_gene_bed.R
new file mode 100755
index 0000000..28983da
--- /dev/null
+++ b/bin/final_generate_v111_gene_bed.R
@@ -0,0 +1,122 @@
+#!/usr/local/bin/Rscript
+
+# __author__      = "Alexandria Dymun"
+# __email__       = "pintoa1@mskcc.org"
+# __contributor__ = "Anne Marie Noronha (noronhaa@mskcc.org)"
+# __version__     = "0.0.1"
+# __status__      = "Dev"
+
+
+suppressPackageStartupMessages({
+    library(plyr)
+    library(dplyr)
+    library(data.table)
+    library(stringr)
+    options(scipen = 999)
+})
+
+usage <- function() {
+    message("Usage:")
+    message("final_generate_v111_gene_bed.R <in.gff> <out.bed>")
+}
+
+args = commandArgs(TRUE)
+
+if (length(args)!=2) {
+    usage()
+    quit()
+}
+
+gtf <- rtracklayer::import(args[1])
+gtf_df <- as.data.frame(gtf)
+#remove incomplete transcripts mRNA_end_NF and mRNA_start_NF (not finished)
+gtf_df <- gtf_df[!grepl("NF",gtf_df$tag),]
+
+file.to_write <- args[2]
+
+### convert start to 0-based to match metafusion expectations of gff format
+gtf_df <- gtf_df %>%
+    rename(
+        chr = seqnames
+    ) %>%
+    select(c(chr, start, end, transcript_id, type, strand, gene_name, gene_id)) %>%
+    filter(type %in% c("exon","intron","five_prime_utr","three_prime_utr","CDS")) %>%
+    mutate(gene_name = ifelse(is.na(gene_name),gene_id,gene_name)) %>% mutate(start = start-1)
+
+
+#START CLOCK
+ptm <- proc.time()
+print(ptm)
+
+# Index each transcript feature, incrementing when an intron is passed
+## metafusion expects exon count 0 to (N(exons)-1)
+## Forward strand: Exon 0 == Exon 1
+### Reverse strand: Exon 0 == LAST EXON IN TRANSCRIPT
+
+print(dim(gtf_df))
+print(length(unique(gtf_df$transcript_id)))
+
+modify_transcript <- function(transcript){
+
+    # Remove exons if coding gene, since "exon" and "CDS" are duplicates of one another
+    if ("CDS" %in% transcript$type){
+        transcript <- transcript[!transcript$type == "exon",]
+        }
+    # Order features by increasing bp
+    transcript <- transcript[order(transcript$start, decreasing = FALSE),]
+    # Index features
+    idx <- 0
+    for (i in 1:nrow(transcript)){
+        transcript$idx[i]<- idx
+        if (transcript$type[i] == "intron"){
+            idx <- idx + 1
+        }
+    }
+    # REFORMAT TRANSCRIPT
+    #Change strand info (+ --> f, - --> r)
+    if (unique(transcript$strand) == "+"){
+        transcript$strand <- 'f'
+    } else if  (unique(transcript$strand) == "-"){
+        transcript$strand <- 'r'
+    } else {
+        errorCondition("Strand info for this transcript is inconsistent")
+    }
+    #Add "chr" prefix to chromosomes
+    transcript$chr <- sapply("chr", paste0,  transcript$chr)
+    #Change CDS --> cds ### IF A TRANSCRIPT LACKS "CDS" THIS LINE WILL DO NOTHING, Changing exon values to UTRs later
+    transcript <- transcript %>% mutate(type = as.character(type))
+    transcript <- transcript %>% mutate(type=ifelse(type == "CDS","cds",type))
+    transcript$type[transcript$type == "five_prime_utr"] <- "utr5"
+    transcript$type[transcript$type == "three_prime_utr"] <- "utr3"
+    
+
+    #### Any exon that remains after the cds change, is likely and untranslated region. change below
+    # Basically, subfeatures which are "exon" need to be changed (i.e. exon --> utr3/utr5)
+    #Forward strand
+    transcript$type[transcript$strand == "f" &  transcript$type == "exon" ] <- "utr5"
+    #Reverse strand
+    transcript$type[transcript$strand == "r" &  transcript$type == "exon"]<- "utr3"
+    expected_types <- c("cds","intron","utr3","utr5")
+    transcript <- transcript[transcript$type %in% c(expected_types),]
+    return(transcript)
+}
+
+if(file.exists(file.to_write) ) {file.remove(file.to_write)}
+
+gtf_df_modified <- gtf_df %>%
+    group_by(transcript_id,.drop = FALSE) %>%
+    group_modify(~ modify_transcript(.x)) %>%
+    select(c(chr, start, end, transcript_id, type, idx, strand, gene_name, gene_id )) %>%
+    arrange(chr,start,end)
+
+time <- proc.time() - ptm
+print(time)
+
+write.table(
+    gtf_df_modified,
+    file.to_write,
+    sep="\t",
+    quote=F,
+    row.names=F,
+    col.names=F
+)
diff --git a/modules/local/metafusion/genebed/main.nf b/modules/local/metafusion/genebed/main.nf
index 27a1a7f..3d11e7c 100644
--- a/modules/local/metafusion/genebed/main.nf
+++ b/modules/local/metafusion/genebed/main.nf
@@ -9,10 +9,9 @@ process METAFUSION_GENEBED {
 
     input:
     tuple val(meta), path(gff)
-    val ensembl_version
 
     output:
-    tuple val(meta), path("*.metafusion.gene.bed"), emit: metafusion_gene_bed
+    tuple val(meta), path("${meta.id}.metafusion.gene.bed"), emit: metafusion_gene_bed
     path "versions.yml"                           , emit: versions
 
     when:
@@ -21,28 +20,52 @@ process METAFUSION_GENEBED {
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    final_generate_v75_gene_bed.R \\
-        $gff \\
-        ${ensembl_version}.metafusion.gene.bed
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        R: \$(R --version | head -n1)
-        final_generate_v75_gene_bed.R: 0.0.2
-    END_VERSIONS
-    """
+    if( prefix == 'GRCh37' )
+        """
+        final_generate_v75_gene_bed.R \\
+            $gff \\
+            ${prefix}.metafusion.gene.bed
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            R: \$(R --version | head -n1)
+            final_generate_v75_gene_bed.R: 0.0.2
+        END_VERSIONS
+        """
+    else if( prefix == 'GRCh38' )
+        """
+        final_generate_v111_gene_bed.R \\
+            $gff \\
+            ${prefix}.metafusion.gene.bed
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            R: \$(R --version | head -n1)
+            final_generate_v111_gene_bed.R: 0.0.1
+        END_VERSIONS
+        """
 
     stub:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    touch ${prefix}.metafusion.gene.bed
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        R: \$(R --version | head -n1)
-        final_generate_v75_gene_bed.R: 0.0.2
-    END_VERSIONS
-    """
-}
+    if( prefix == 'GRCh37' )
+        """
+        touch ${prefix}.metafusion.gene.bed
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            R: \$(R --version | head -n1)
+            final_generate_v75_gene_bed.R: 0.0.2
+        END_VERSIONS
+        """
+    else if( prefix == 'GRCh38' )
+            """
+        touch ${prefix}.metafusion.gene.bed
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            R: \$(R --version | head -n1)
+            final_generate_v111_gene_bed.R: 0.0.1
+        END_VERSIONS
+        """
+}
\ No newline at end of file
diff --git a/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v111_gene_bed.R b/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v111_gene_bed.R
new file mode 100755
index 0000000..afbbf10
--- /dev/null
+++ b/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v111_gene_bed.R
@@ -0,0 +1,122 @@
+#!/usr/local/bin/Rscript
+
+# __author__      = "Alexandria Dymun"
+# __email__       = "pintoa1@mskcc.org"
+# __contributor__ = "Anne Marie Noronha (noronhaa@mskcc.org)"
+# __version__     = "0.0.1"
+# __status__      = "Dev"
+
+
+suppressPackageStartupMessages({
+    library(plyr)
+    library(dplyr)
+    library(data.table)
+    library(stringr)
+    options(scipen = 999)
+})
+
+usage <- function() {
+    message("Usage:")
+    message("final_generate_v111_gene_bed.R <in.gff> <out.bed>")
+}
+
+args = commandArgs(TRUE)
+
+if (length(args)!=2) {
+    usage()
+    quit()
+}
+
+gtf <- rtracklayer::import(args[1])
+gtf_df <- as.data.frame(gtf)
+#remove incomplete transcripts mRNA_end_NF and mRNA_start_NF (not finished)
+gtf_df <- gtf_df[!grepl("NF",gtf_df$tag),]
+
+file.to_write <- args[2]
+
+### convert start to 0-based to match metafusion expectations of gff format
+gtf_df <- gtf_df %>%
+    rename(
+        chr = seqnames
+    ) %>%
+    select(c(chr, start, end, transcript_id, type, strand, gene_name, gene_id)) %>%
+    filter(type %in% c("exon","intron","five_prime_utr","three_prime_utr","CDS")) %>%
+    mutate(gene_name = ifelse(is.na(gene_name),gene_id,gene_name)) %>% mutate(start = start-1)
+
+
+#START CLOCK
+ptm <- proc.time()
+print(ptm)
+
+# Index each transcript feature, incrementing when an intron is passed
+## metafusion expects exon count 0 to (N(exons)-1)
+## Forward strand: Exon 0 == Exon 1
+### Reverse strand: Exon 0 == LAST EXON IN TRANSCRIPT
+
+print(dim(gtf_df))
+print(length(unique(gtf_df$transcript_id)))
+
+modify_transcript <- function(transcript){
+
+    # Remove exons if coding gene, since "exon" and "CDS" are duplicates of one another
+    if ("CDS" %in% transcript$type){
+        transcript <- transcript[!transcript$type == "exon",]
+        }
+    # Order features by increasing bp
+    transcript <- transcript[order(transcript$start, decreasing = FALSE),]
+    # Index features
+    idx <- 0
+    for (i in 1:nrow(transcript)){
+        transcript$idx[i]<- idx
+        if (transcript$type[i] == "intron"){
+            idx <- idx + 1
+        }
+    }
+    # REFORMAT TRANSCRIPT
+    #Change strand info (+ --> f, - --> r)
+    if (unique(transcript$strand) == "+"){
+        transcript$strand <- 'f'
+    } else if  (unique(transcript$strand) == "-"){
+        transcript$strand <- 'r'
+    } else {
+        errorCondition("Strand info for this transcript is inconsistent")
+    }
+    #Add "chr" prefix to chromosomes
+    transcript$chr <- sapply("chr", paste0,  transcript$chr)
+    #Change CDS --> cds ### IF A TRANSCRIPT LACKS "CDS" THIS LINE WILL DO NOTHING, Changing exon values to UTRs later
+    transcript <- transcript %>% mutate(type = as.character(type))
+    transcript <- transcript %>% mutate(type=ifelse(type == "CDS","cds",type))
+    transcript$type[transcript$type == "five_prime_utr"] <- "utr5"
+    transcript$type[transcript$type == "three_prime_utr"] <- "utr3"
+    }
+
+    #### Any exon that remains after the cds change, is likely and untranslated region. change below
+    # Basically, subfeatures which are "exon" need to be changed (i.e. exon --> utr3/utr5)
+    #Forward strand
+    transcript$type[transcript$strand == "f" &  transcript$type == "exon" ] <- "utr5"
+    #Reverse strand
+    transcript$type[transcript$strand == "r" &  transcript$type == "exon"]<- "utr3"
+    expected_types <- c("cds","intron","utr3","utr5")
+    transcript <- transcript[transcript$type %in% c(expected_types),]
+    return(transcript)
+}
+
+if(file.exists(file.to_write) ) {file.remove(file.to_write)}
+
+gtf_df_modified <- gtf_df %>%
+    group_by(transcript_id,.drop = FALSE) %>%
+    group_modify(~ modify_transcript(.x)) %>%
+    select(c(chr, start, end, transcript_id, type, idx, strand, gene_name, gene_id )) %>%
+    arrange(chr,start,end)
+
+time <- proc.time() - ptm
+print(time)
+
+write.table(
+    gtf_df_modified,
+    file.to_write,
+    sep="\t",
+    quote=F,
+    row.names=F,
+    col.names=F
+)
diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf
index ef91f9d..698f376 100644
--- a/subworkflows/local/prepare_references.nf
+++ b/subworkflows/local/prepare_references.nf
@@ -107,8 +107,7 @@ workflow PREPARE_REFERENCES {
     )
 
     METAFUSION_GENEBED(
-        AGAT_SPADDINTRONS.out.gff,
-        params.ensembl_version
+        AGAT_SPADDINTRONS.out.gff
     )
 
     METAFUSION_GENEINFO(

From 1e0d3f172e656225d73c6accc557c5bf534583e4 Mon Sep 17 00:00:00 2001
From: Alexandria Pinto <pintoa1@mskcc.org>
Date: Thu, 7 Nov 2024 15:57:24 -0500
Subject: [PATCH 26/28] linting errors

---
 bin/final_generate_v111_gene_bed.R                           | 2 --
 modules/local/metafusion/genebed/main.nf                     | 5 ++++-
 .../genebed/resources/usr/bin/final_generate_v111_gene_bed.R | 2 --
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/bin/final_generate_v111_gene_bed.R b/bin/final_generate_v111_gene_bed.R
index 28983da..d114ccb 100755
--- a/bin/final_generate_v111_gene_bed.R
+++ b/bin/final_generate_v111_gene_bed.R
@@ -88,8 +88,6 @@ modify_transcript <- function(transcript){
     transcript <- transcript %>% mutate(type=ifelse(type == "CDS","cds",type))
     transcript$type[transcript$type == "five_prime_utr"] <- "utr5"
     transcript$type[transcript$type == "three_prime_utr"] <- "utr3"
-    
-
     #### Any exon that remains after the cds change, is likely and untranslated region. change below
     # Basically, subfeatures which are "exon" need to be changed (i.e. exon --> utr3/utr5)
     #Forward strand
diff --git a/modules/local/metafusion/genebed/main.nf b/modules/local/metafusion/genebed/main.nf
index 3d11e7c..25a3c1f 100644
--- a/modules/local/metafusion/genebed/main.nf
+++ b/modules/local/metafusion/genebed/main.nf
@@ -32,6 +32,7 @@ process METAFUSION_GENEBED {
             final_generate_v75_gene_bed.R: 0.0.2
         END_VERSIONS
         """
+
     else if( prefix == 'GRCh38' )
         """
         final_generate_v111_gene_bed.R \\
@@ -58,8 +59,9 @@ process METAFUSION_GENEBED {
             final_generate_v75_gene_bed.R: 0.0.2
         END_VERSIONS
         """
+
     else if( prefix == 'GRCh38' )
-            """
+        """
         touch ${prefix}.metafusion.gene.bed
 
         cat <<-END_VERSIONS > versions.yml
@@ -68,4 +70,5 @@ process METAFUSION_GENEBED {
             final_generate_v111_gene_bed.R: 0.0.1
         END_VERSIONS
         """
+
 }
\ No newline at end of file
diff --git a/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v111_gene_bed.R b/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v111_gene_bed.R
index afbbf10..d114ccb 100755
--- a/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v111_gene_bed.R
+++ b/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v111_gene_bed.R
@@ -88,8 +88,6 @@ modify_transcript <- function(transcript){
     transcript <- transcript %>% mutate(type=ifelse(type == "CDS","cds",type))
     transcript$type[transcript$type == "five_prime_utr"] <- "utr5"
     transcript$type[transcript$type == "three_prime_utr"] <- "utr3"
-    }
-
     #### Any exon that remains after the cds change, is likely and untranslated region. change below
     # Basically, subfeatures which are "exon" need to be changed (i.e. exon --> utr3/utr5)
     #Forward strand

From e10530535c8ec1febd1bb3bbc0879306ea08c0be Mon Sep 17 00:00:00 2001
From: Alexandria Pinto <pintoa1@mskcc.org>
Date: Thu, 7 Nov 2024 15:58:37 -0500
Subject: [PATCH 27/28] line endings

---
 modules/local/metafusion/genebed/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/metafusion/genebed/main.nf b/modules/local/metafusion/genebed/main.nf
index 25a3c1f..9cd2769 100644
--- a/modules/local/metafusion/genebed/main.nf
+++ b/modules/local/metafusion/genebed/main.nf
@@ -71,4 +71,4 @@ process METAFUSION_GENEBED {
         END_VERSIONS
         """
 
-}
\ No newline at end of file
+}

From fb2f514e2712ded692dd09716c556c76e8b41270 Mon Sep 17 00:00:00 2001
From: Alexandria Pinto <pintoa1@mskcc.org>
Date: Wed, 13 Nov 2024 13:15:44 -0500
Subject: [PATCH 28/28] Modify generate gene bed to one script

---
 bin/final_generate_v111_gene_bed.R            | 120 ------------------
 ...ate_v75_gene_bed.R => generate_gene_bed.R} |  13 +-
 modules/local/metafusion/genebed/main.nf      |  63 +++------
 .../usr/bin/final_generate_v111_gene_bed.R    | 120 ------------------
 ...ate_v75_gene_bed.R => generate_gene_bed.R} |  14 +-
 5 files changed, 27 insertions(+), 303 deletions(-)
 delete mode 100755 bin/final_generate_v111_gene_bed.R
 rename bin/{final_generate_v75_gene_bed.R => generate_gene_bed.R} (88%)
 delete mode 100755 modules/local/metafusion/genebed/resources/usr/bin/final_generate_v111_gene_bed.R
 rename modules/local/metafusion/genebed/resources/usr/bin/{final_generate_v75_gene_bed.R => generate_gene_bed.R} (88%)

diff --git a/bin/final_generate_v111_gene_bed.R b/bin/final_generate_v111_gene_bed.R
deleted file mode 100755
index d114ccb..0000000
--- a/bin/final_generate_v111_gene_bed.R
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/local/bin/Rscript
-
-# __author__      = "Alexandria Dymun"
-# __email__       = "pintoa1@mskcc.org"
-# __contributor__ = "Anne Marie Noronha (noronhaa@mskcc.org)"
-# __version__     = "0.0.1"
-# __status__      = "Dev"
-
-
-suppressPackageStartupMessages({
-    library(plyr)
-    library(dplyr)
-    library(data.table)
-    library(stringr)
-    options(scipen = 999)
-})
-
-usage <- function() {
-    message("Usage:")
-    message("final_generate_v111_gene_bed.R <in.gff> <out.bed>")
-}
-
-args = commandArgs(TRUE)
-
-if (length(args)!=2) {
-    usage()
-    quit()
-}
-
-gtf <- rtracklayer::import(args[1])
-gtf_df <- as.data.frame(gtf)
-#remove incomplete transcripts mRNA_end_NF and mRNA_start_NF (not finished)
-gtf_df <- gtf_df[!grepl("NF",gtf_df$tag),]
-
-file.to_write <- args[2]
-
-### convert start to 0-based to match metafusion expectations of gff format
-gtf_df <- gtf_df %>%
-    rename(
-        chr = seqnames
-    ) %>%
-    select(c(chr, start, end, transcript_id, type, strand, gene_name, gene_id)) %>%
-    filter(type %in% c("exon","intron","five_prime_utr","three_prime_utr","CDS")) %>%
-    mutate(gene_name = ifelse(is.na(gene_name),gene_id,gene_name)) %>% mutate(start = start-1)
-
-
-#START CLOCK
-ptm <- proc.time()
-print(ptm)
-
-# Index each transcript feature, incrementing when an intron is passed
-## metafusion expects exon count 0 to (N(exons)-1)
-## Forward strand: Exon 0 == Exon 1
-### Reverse strand: Exon 0 == LAST EXON IN TRANSCRIPT
-
-print(dim(gtf_df))
-print(length(unique(gtf_df$transcript_id)))
-
-modify_transcript <- function(transcript){
-
-    # Remove exons if coding gene, since "exon" and "CDS" are duplicates of one another
-    if ("CDS" %in% transcript$type){
-        transcript <- transcript[!transcript$type == "exon",]
-        }
-    # Order features by increasing bp
-    transcript <- transcript[order(transcript$start, decreasing = FALSE),]
-    # Index features
-    idx <- 0
-    for (i in 1:nrow(transcript)){
-        transcript$idx[i]<- idx
-        if (transcript$type[i] == "intron"){
-            idx <- idx + 1
-        }
-    }
-    # REFORMAT TRANSCRIPT
-    #Change strand info (+ --> f, - --> r)
-    if (unique(transcript$strand) == "+"){
-        transcript$strand <- 'f'
-    } else if  (unique(transcript$strand) == "-"){
-        transcript$strand <- 'r'
-    } else {
-        errorCondition("Strand info for this transcript is inconsistent")
-    }
-    #Add "chr" prefix to chromosomes
-    transcript$chr <- sapply("chr", paste0,  transcript$chr)
-    #Change CDS --> cds ### IF A TRANSCRIPT LACKS "CDS" THIS LINE WILL DO NOTHING, Changing exon values to UTRs later
-    transcript <- transcript %>% mutate(type = as.character(type))
-    transcript <- transcript %>% mutate(type=ifelse(type == "CDS","cds",type))
-    transcript$type[transcript$type == "five_prime_utr"] <- "utr5"
-    transcript$type[transcript$type == "three_prime_utr"] <- "utr3"
-    #### Any exon that remains after the cds change, is likely and untranslated region. change below
-    # Basically, subfeatures which are "exon" need to be changed (i.e. exon --> utr3/utr5)
-    #Forward strand
-    transcript$type[transcript$strand == "f" &  transcript$type == "exon" ] <- "utr5"
-    #Reverse strand
-    transcript$type[transcript$strand == "r" &  transcript$type == "exon"]<- "utr3"
-    expected_types <- c("cds","intron","utr3","utr5")
-    transcript <- transcript[transcript$type %in% c(expected_types),]
-    return(transcript)
-}
-
-if(file.exists(file.to_write) ) {file.remove(file.to_write)}
-
-gtf_df_modified <- gtf_df %>%
-    group_by(transcript_id,.drop = FALSE) %>%
-    group_modify(~ modify_transcript(.x)) %>%
-    select(c(chr, start, end, transcript_id, type, idx, strand, gene_name, gene_id )) %>%
-    arrange(chr,start,end)
-
-time <- proc.time() - ptm
-print(time)
-
-write.table(
-    gtf_df_modified,
-    file.to_write,
-    sep="\t",
-    quote=F,
-    row.names=F,
-    col.names=F
-)
diff --git a/bin/final_generate_v75_gene_bed.R b/bin/generate_gene_bed.R
similarity index 88%
rename from bin/final_generate_v75_gene_bed.R
rename to bin/generate_gene_bed.R
index a25b3ef..2a15149 100755
--- a/bin/final_generate_v75_gene_bed.R
+++ b/bin/generate_gene_bed.R
@@ -17,7 +17,7 @@ suppressPackageStartupMessages({
 
 usage <- function() {
     message("Usage:")
-    message("final_generate_v75_gene_bed.R <in.gff> <out.bed>")
+    message("generate_gene_bed.R <in.gff> <out.bed>")
 }
 
 args = commandArgs(TRUE)
@@ -27,13 +27,6 @@ if (length(args)!=2) {
     quit()
 }
 
-# Utilized gtf from igenomes for FORTE This corresponds to GRCh37 ensembl 75
-# Add introns to gtf, convert to gff3
-# bsub -R "rusage[mem=64]" -o add_introns_agat_%J.out singularity exec -B /juno/ \\
-# -B /tmp -B /scratch/ docker://quay.io/biocontainers/agat:0.8.0--pl5262hdfd78af_0  \\
-# /bin/bash -c "agat_sp_add_introns.pl -g /juno/work/taylorlab/cmopipeline/mskcc-igenomes/igenomes/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf\\
-# -o genes.INTRONS.gff3"
-
 gtf <- rtracklayer::import(args[1])
 gtf_df <- as.data.frame(gtf)
 #remove incomplete transcripts mRNA_end_NF and mRNA_start_NF (not finished)
@@ -47,7 +40,7 @@ gtf_df <- gtf_df %>%
         chr = seqnames
     ) %>%
     select(c(chr, start, end, transcript_id, type, strand, gene_name, gene_id)) %>%
-    filter(type %in% c("exon","intron","UTR","CDS","cds","utr")) %>%
+    filter(type %in% c("exon","intron","UTR","CDS","cds","utr","five_prime_utr","three_prime_utr")) %>%
     mutate(gene_name = ifelse(is.na(gene_name),gene_id,gene_name)) %>% mutate(start = start-1)
 
 
@@ -110,6 +103,8 @@ modify_transcript <- function(transcript){
             transcript$type[transcript$start >= stop_coding & transcript$type == "UTR"] <- "utr5"
         }
     }
+    transcript$type[transcript$type == "five_prime_utr"] <- "utr5"
+    transcript$type[transcript$type == "three_prime_utr"] <- "utr3"
     #### Any exon that remains after teh cds change, is likely and untranslated region. change below
 
     # Basically, subfeatures which are "exon" need to be changed (i.e. exon --> utr3/utr5)
diff --git a/modules/local/metafusion/genebed/main.nf b/modules/local/metafusion/genebed/main.nf
index 9cd2769..1fb97b5 100644
--- a/modules/local/metafusion/genebed/main.nf
+++ b/modules/local/metafusion/genebed/main.nf
@@ -20,55 +20,30 @@ process METAFUSION_GENEBED {
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
-    if( prefix == 'GRCh37' )
-        """
-        final_generate_v75_gene_bed.R \\
-            $gff \\
-            ${prefix}.metafusion.gene.bed
-
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            R: \$(R --version | head -n1)
-            final_generate_v75_gene_bed.R: 0.0.2
-        END_VERSIONS
-        """
-
-    else if( prefix == 'GRCh38' )
-        """
-        final_generate_v111_gene_bed.R \\
-            $gff \\
-            ${prefix}.metafusion.gene.bed
-
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            R: \$(R --version | head -n1)
-            final_generate_v111_gene_bed.R: 0.0.1
-        END_VERSIONS
-        """
+    """
+    generate_gene_bed.R \\
+        $gff \\
+        ${prefix}.metafusion.gene.bed
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        R: \$(R --version | head -n1)
+        generate_gene_bed.R: 0.0.2
+    END_VERSIONS
+    """
 
     stub:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
-    if( prefix == 'GRCh37' )
-        """
-        touch ${prefix}.metafusion.gene.bed
-
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            R: \$(R --version | head -n1)
-            final_generate_v75_gene_bed.R: 0.0.2
-        END_VERSIONS
-        """
 
-    else if( prefix == 'GRCh38' )
-        """
-        touch ${prefix}.metafusion.gene.bed
+    """
+    touch ${prefix}.metafusion.gene.bed
 
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            R: \$(R --version | head -n1)
-            final_generate_v111_gene_bed.R: 0.0.1
-        END_VERSIONS
-        """
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        R: \$(R --version | head -n1)
+        generate_gene_bed.R: 0.0.2
+    END_VERSIONS
+    """
 
 }
diff --git a/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v111_gene_bed.R b/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v111_gene_bed.R
deleted file mode 100755
index d114ccb..0000000
--- a/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v111_gene_bed.R
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/local/bin/Rscript
-
-# __author__      = "Alexandria Dymun"
-# __email__       = "pintoa1@mskcc.org"
-# __contributor__ = "Anne Marie Noronha (noronhaa@mskcc.org)"
-# __version__     = "0.0.1"
-# __status__      = "Dev"
-
-
-suppressPackageStartupMessages({
-    library(plyr)
-    library(dplyr)
-    library(data.table)
-    library(stringr)
-    options(scipen = 999)
-})
-
-usage <- function() {
-    message("Usage:")
-    message("final_generate_v111_gene_bed.R <in.gff> <out.bed>")
-}
-
-args = commandArgs(TRUE)
-
-if (length(args)!=2) {
-    usage()
-    quit()
-}
-
-gtf <- rtracklayer::import(args[1])
-gtf_df <- as.data.frame(gtf)
-#remove incomplete transcripts mRNA_end_NF and mRNA_start_NF (not finished)
-gtf_df <- gtf_df[!grepl("NF",gtf_df$tag),]
-
-file.to_write <- args[2]
-
-### convert start to 0-based to match metafusion expectations of gff format
-gtf_df <- gtf_df %>%
-    rename(
-        chr = seqnames
-    ) %>%
-    select(c(chr, start, end, transcript_id, type, strand, gene_name, gene_id)) %>%
-    filter(type %in% c("exon","intron","five_prime_utr","three_prime_utr","CDS")) %>%
-    mutate(gene_name = ifelse(is.na(gene_name),gene_id,gene_name)) %>% mutate(start = start-1)
-
-
-#START CLOCK
-ptm <- proc.time()
-print(ptm)
-
-# Index each transcript feature, incrementing when an intron is passed
-## metafusion expects exon count 0 to (N(exons)-1)
-## Forward strand: Exon 0 == Exon 1
-### Reverse strand: Exon 0 == LAST EXON IN TRANSCRIPT
-
-print(dim(gtf_df))
-print(length(unique(gtf_df$transcript_id)))
-
-modify_transcript <- function(transcript){
-
-    # Remove exons if coding gene, since "exon" and "CDS" are duplicates of one another
-    if ("CDS" %in% transcript$type){
-        transcript <- transcript[!transcript$type == "exon",]
-        }
-    # Order features by increasing bp
-    transcript <- transcript[order(transcript$start, decreasing = FALSE),]
-    # Index features
-    idx <- 0
-    for (i in 1:nrow(transcript)){
-        transcript$idx[i]<- idx
-        if (transcript$type[i] == "intron"){
-            idx <- idx + 1
-        }
-    }
-    # REFORMAT TRANSCRIPT
-    #Change strand info (+ --> f, - --> r)
-    if (unique(transcript$strand) == "+"){
-        transcript$strand <- 'f'
-    } else if  (unique(transcript$strand) == "-"){
-        transcript$strand <- 'r'
-    } else {
-        errorCondition("Strand info for this transcript is inconsistent")
-    }
-    #Add "chr" prefix to chromosomes
-    transcript$chr <- sapply("chr", paste0,  transcript$chr)
-    #Change CDS --> cds ### IF A TRANSCRIPT LACKS "CDS" THIS LINE WILL DO NOTHING, Changing exon values to UTRs later
-    transcript <- transcript %>% mutate(type = as.character(type))
-    transcript <- transcript %>% mutate(type=ifelse(type == "CDS","cds",type))
-    transcript$type[transcript$type == "five_prime_utr"] <- "utr5"
-    transcript$type[transcript$type == "three_prime_utr"] <- "utr3"
-    #### Any exon that remains after the cds change, is likely and untranslated region. change below
-    # Basically, subfeatures which are "exon" need to be changed (i.e. exon --> utr3/utr5)
-    #Forward strand
-    transcript$type[transcript$strand == "f" &  transcript$type == "exon" ] <- "utr5"
-    #Reverse strand
-    transcript$type[transcript$strand == "r" &  transcript$type == "exon"]<- "utr3"
-    expected_types <- c("cds","intron","utr3","utr5")
-    transcript <- transcript[transcript$type %in% c(expected_types),]
-    return(transcript)
-}
-
-if(file.exists(file.to_write) ) {file.remove(file.to_write)}
-
-gtf_df_modified <- gtf_df %>%
-    group_by(transcript_id,.drop = FALSE) %>%
-    group_modify(~ modify_transcript(.x)) %>%
-    select(c(chr, start, end, transcript_id, type, idx, strand, gene_name, gene_id )) %>%
-    arrange(chr,start,end)
-
-time <- proc.time() - ptm
-print(time)
-
-write.table(
-    gtf_df_modified,
-    file.to_write,
-    sep="\t",
-    quote=F,
-    row.names=F,
-    col.names=F
-)
diff --git a/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v75_gene_bed.R b/modules/local/metafusion/genebed/resources/usr/bin/generate_gene_bed.R
similarity index 88%
rename from modules/local/metafusion/genebed/resources/usr/bin/final_generate_v75_gene_bed.R
rename to modules/local/metafusion/genebed/resources/usr/bin/generate_gene_bed.R
index 1fb3d76..2a15149 100755
--- a/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v75_gene_bed.R
+++ b/modules/local/metafusion/genebed/resources/usr/bin/generate_gene_bed.R
@@ -17,7 +17,7 @@ suppressPackageStartupMessages({
 
 usage <- function() {
     message("Usage:")
-    message("final_generate_v75_gene_bed.R <in.gff> <out.bed>")
+    message("generate_gene_bed.R <in.gff> <out.bed>")
 }
 
 args = commandArgs(TRUE)
@@ -27,13 +27,6 @@ if (length(args)!=2) {
     quit()
 }
 
-# Utilized gtf from igenomes for FORTE This corresponds to GRCh37 ensembl 75
-# Add introns to gtf, convert to gff3
-# bsub -R "rusage[mem=64]" -o add_introns_agat_%J.out singularity exec -B /juno/ \\
-# -B /tmp -B /scratch/ docker://quay.io/biocontainers/agat:0.8.0--pl5262hdfd78af_0  \\
-# /bin/bash -c "agat_sp_add_introns.pl -g /juno/work/taylorlab/cmopipeline/mskcc-igenomes/igenomes/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf\\
-# -o genes.INTRONS.gff3"
-
 gtf <- rtracklayer::import(args[1])
 gtf_df <- as.data.frame(gtf)
 #remove incomplete transcripts mRNA_end_NF and mRNA_start_NF (not finished)
@@ -47,11 +40,10 @@ gtf_df <- gtf_df %>%
         chr = seqnames
     ) %>%
     select(c(chr, start, end, transcript_id, type, strand, gene_name, gene_id)) %>%
-    filter(type %in% c("exon","intron","UTR","CDS","cds","utr")) %>%
+    filter(type %in% c("exon","intron","UTR","CDS","cds","utr","five_prime_utr","three_prime_utr")) %>%
     mutate(gene_name = ifelse(is.na(gene_name),gene_id,gene_name)) %>% mutate(start = start-1)
 
 
-
 #START CLOCK
 ptm <- proc.time()
 print(ptm)
@@ -111,6 +103,8 @@ modify_transcript <- function(transcript){
             transcript$type[transcript$start >= stop_coding & transcript$type == "UTR"] <- "utr5"
         }
     }
+    transcript$type[transcript$type == "five_prime_utr"] <- "utr5"
+    transcript$type[transcript$type == "three_prime_utr"] <- "utr3"
     #### Any exon that remains after teh cds change, is likely and untranslated region. change below
 
     # Basically, subfeatures which are "exon" need to be changed (i.e. exon --> utr3/utr5)