diff --git a/CHANGELOG.md b/CHANGELOG.md index 23e1095..0e48469 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#118](https://github.com/mskcc/forte/pull/118) - change the way the plug-n-play starfusion reference is downloaded. +- [#128](https://github.com/mskcc/forte/pull/128) - full support for GRCh38 added + ### `Fixed` - [#119](https://github.com/mskcc/forte/pull/119) - change script error behavior in METAFUSION_RUN process diff --git a/bin/final_generate_v75_gene_bed.R b/bin/generate_gene_bed.R similarity index 87% rename from bin/final_generate_v75_gene_bed.R rename to bin/generate_gene_bed.R index ea98ac6..2a15149 100755 --- a/bin/final_generate_v75_gene_bed.R +++ b/bin/generate_gene_bed.R @@ -3,7 +3,7 @@ # __author__ = "Alexandria Dymun" # __email__ = "pintoa1@mskcc.org" # __contributor__ = "Anne Marie Noronha (noronhaa@mskcc.org)" -# __version__ = "0.0.1" +# __version__ = "0.0.2" # __status__ = "Dev" @@ -12,11 +12,12 @@ suppressPackageStartupMessages({ library(dplyr) library(data.table) library(stringr) + options(scipen = 999) }) usage <- function() { message("Usage:") - message("final_generate_v75_gene_bed.R ") + message("generate_gene_bed.R ") } args = commandArgs(TRUE) @@ -26,15 +27,10 @@ if (length(args)!=2) { quit() } -# Utilized gtf from igenomes for FORTE This corresponds to GRCh37 ensembl 75 -# Add introns to gtf, convert to gff3 -# bsub -R "rusage[mem=64]" -o add_introns_agat_%J.out singularity exec -B /juno/ \\ -# -B /tmp -B /scratch/ docker://quay.io/biocontainers/agat:0.8.0--pl5262hdfd78af_0 \\ -# /bin/bash -c "agat_sp_add_introns.pl -g /juno/work/taylorlab/cmopipeline/mskcc-igenomes/igenomes/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf\\ -# -o genes.INTRONS.gff3" - gtf <- rtracklayer::import(args[1]) gtf_df <- as.data.frame(gtf) +#remove incomplete transcripts mRNA_end_NF and mRNA_start_NF (not finished) +gtf_df <- gtf_df[!grepl("NF",gtf_df$tag),] file.to_write <- args[2] @@ -44,7 +40,8 @@ gtf_df <- gtf_df %>% chr = seqnames ) %>% select(c(chr, start, end, transcript_id, type, strand, gene_name, gene_id)) %>% - filter(type %in% c("exon","intron","UTR","CDS","cds","utr")) %>% mutate(start = start-1) + filter(type %in% c("exon","intron","UTR","CDS","cds","utr","five_prime_utr","three_prime_utr")) %>% + mutate(gene_name = ifelse(is.na(gene_name),gene_id,gene_name)) %>% mutate(start = start-1) #START CLOCK @@ -106,6 +103,8 @@ modify_transcript <- function(transcript){ transcript$type[transcript$start >= stop_coding & transcript$type == "UTR"] <- "utr5" } } + transcript$type[transcript$type == "five_prime_utr"] <- "utr5" + transcript$type[transcript$type == "three_prime_utr"] <- "utr3" #### Any exon that remains after teh cds change, is likely and untranslated region. change below # Basically, subfeatures which are "exon" need to be changed (i.e. exon --> utr3/utr5) diff --git a/bin/make_gene_info_for_forte.R b/bin/make_gene_info_for_forte.R index 2ab3dfd..08ac644 100755 --- a/bin/make_gene_info_for_forte.R +++ b/bin/make_gene_info_for_forte.R @@ -106,7 +106,7 @@ gene_info <- rbind(gene_info,add_these_excess_gene_ids) gene_info <- merge(gene_info,do.call(rbind,unique_id_to_names[versioned_gtf])[,c("gene_id","gene_id_with_version")],by = "gene_id",all.x = T, all.y = F) gene_info$Synonyms <- ifelse(is.na(gene_info$gene_id_with_version),gene_info$gene_id,paste0(gene_info$gene_id,"|",gene_info$gene_id_with_version)) -gene_info$Symbol <- gene_info$gene_name +gene_info$Symbol <- ifelse(is.na(gene_info$gene_name), gene_info$gene_id, gene_info$gene_name) gene_info <- gene_info[,c("Symbol","Synonyms")] diff --git a/conf/igenomes.config b/conf/igenomes.config index c618acc..cfb81b7 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -34,11 +34,21 @@ params { ensembl_version = 75 } 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - refflat = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes.gencode/refFlat.txt.gz" - starfusion_url = "https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_StarFv1.10/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz" - cdna = "https://ftp.ensembl.org/pub/release-86/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz" + ensembl_version = 111 + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38Decoy/Sequence/WholeGenomeFasta/genome.fa" + //fasta = "https://ftp.ensembl.org/pub/release-111/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz" + gtf = "https://ftp.ensembl.org/pub/release-111/gtf/homo_sapiens/Homo_sapiens.GRCh38.111.gtf.gz" + //forte will generate refflat from gtf + refflat = null + starfusion_url = "https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_StarFv1.10/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz" + cdna = "https://ftp.ensembl.org/pub/release-111/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz" + metafusion_blocklist = "https://raw.githubusercontent.com/anoronh4/forte-references/main/GRCh38/blocklist_breakpoints.hg38.bedpe.gz" + baits { + 'idt_v2' { + targets = "/juno/work/ccs/cmopipeline/forte/GRCh38_probes/xgen-exome-hyb-panel-v2-targets-hg38.bed" + baits = "/juno/work/ccs/cmopipeline/forte/GRCh38_probes/xgen-exome-hyb-panel-v2-probes-hg38.bed" + } + } } 'smallGRCh37' { fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" @@ -48,7 +58,6 @@ params { cdna = "http://ftp.ensemblgenomes.org/pub/viruses/fasta/sars_cov_2/cdna/Sars_cov_2.ASM985889v3.cdna.all.fa.gz" metafusion_blocklist = "https://raw.githubusercontent.com/anoronh4/forte-references/main/GRCh37_test/blocklist_breakpoints.bedpe" ensembl_version = 75 - } /* 'hg38' { diff --git a/conf/modules.config b/conf/modules.config index 9277358..400d3c8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -27,6 +27,14 @@ process { ] } + withName: '.*:PREPARE_REFERENCES:GUNZIP.*' { + storeDir = { "${params.reference_base}/${params.genome}/${task.process.tokenize(':')[-1].toLowerCase()}" } + } + + withName: 'FASTAREMOVEPREFIX' { + storeDir = { "${params.reference_base}/${params.genome}/fasta" } + } + withName: 'MSKCC_FORTE:FORTE:MULTIQC' { publishDir = [ path: { "${report.folder}/report" }, @@ -210,6 +218,7 @@ process { ] } withName: 'AGAT_SPADDINTRONS' { + cpus = { 4 * task.attempt } storeDir = { "${params.reference_base}/${params.genome}/metafusion/introns" } publishDir = [ enabled: false, @@ -475,7 +484,7 @@ process { ] } - withName: ARRIBA { + withName: ARRIBA_ARRIBA { ext.args = { "-s ${meta.single_end || meta.strandedness == "forward" ? "yes" : meta.strandedness == "reverse" ? "reverse" : "no" }" } diff --git a/modules.json b/modules.json index 2be0262..c89decd 100644 --- a/modules.json +++ b/modules.json @@ -21,9 +21,9 @@ "git_sha": "6898156da3604a6bdf26c36036053a970050fea0", "installed_by": ["modules"] }, - "arriba": { + "arriba/arriba": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "cat/cat": { @@ -48,12 +48,12 @@ }, "gatk4/bedtointervallist": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "gatk4/createsequencedictionary": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "gunzip": { @@ -134,7 +134,7 @@ }, "samtools/faidx": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "samtools/index": { @@ -149,13 +149,14 @@ }, "star/align": { "branch": "master", - "git_sha": "57d75dbac06812c59798a48585032f6e50bb1914", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "star/genomegenerate": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"], + "patch": "modules/nf-core/star/genomegenerate/star-genomegenerate.diff" }, "subread/featurecounts": { "branch": "master", diff --git a/modules/local/agfusion/batch/main.nf b/modules/local/agfusion/batch/main.nf index e8de04d..398a21b 100644 --- a/modules/local/agfusion/batch/main.nf +++ b/modules/local/agfusion/batch/main.nf @@ -5,8 +5,8 @@ process AGFUSION_BATCH { // Note: 2.7X indices incompatible with AWS iGenomes. conda 'bioconda::agfusion=1.252' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://cmopipeline/agfusion:0.0.6' : - 'docker.io/cmopipeline/agfusion:0.0.6' }" + 'docker://cmopipeline/agfusion:0.0.7' : + 'docker.io/cmopipeline/agfusion:0.0.7' }" input: tuple val(meta), path(fusions) diff --git a/modules/local/agfusion/container/Dockerfile b/modules/local/agfusion/container/Dockerfile index eaca5d5..a455f44 100755 --- a/modules/local/agfusion/container/Dockerfile +++ b/modules/local/agfusion/container/Dockerfile @@ -1,14 +1,30 @@ -FROM ubuntu:bionic-20230530 +FROM ubuntu:jammy-20240911.1 LABEL maintainer="Anne Marie Noronha (noronhaa@mskcc.org)" \ - version.image="0.0.6" + version.image="0.0.7" # INSTALL DEPENDENCIES ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update -y -RUN apt-get install -y build-essential python3 python3-pip python3-matplotlib python3-pandas python3-future python3-biopython curl less vim libnss-sss git zip +RUN apt-get install -y \ + build-essential \ + python3 \ + python3-pip \ + python3-matplotlib \ + python3-pandas \ + python3-future \ + python3-biopython \ + python3-dev \ + default-libmysqlclient-dev \ + pkg-config \ + curl \ + less \ + vim \ + libnss-sss \ + git \ + zip RUN pip3 install --upgrade pip RUN pip3 install pyensembl @@ -18,9 +34,8 @@ RUN pip3 install mysqlclient # INSTALL AGFUSION & DATABASE FILES WORKDIR /usr/local/bin -RUN git clone https://github.com/mskcc/AGFusion.git --branch v1.4.1-fork1 --single-branch +RUN git clone https://github.com/mskcc/AGFusion.git --branch v1.4.3@mskcc.1 --single-branch WORKDIR /usr/local/bin/AGFusion +RUN pip3 install -r requirements.txt RUN pip3 install . -# downgrade pyensembl for compatibility -RUN pip3 install gtfparse==1.2.1 --upgrade diff --git a/modules/local/agfusion/download/main.nf b/modules/local/agfusion/download/main.nf index 094ca39..633d63f 100644 --- a/modules/local/agfusion/download/main.nf +++ b/modules/local/agfusion/download/main.nf @@ -4,8 +4,8 @@ process AGFUSION_DOWNLOAD { // Note: 2.7X indices incompatible with AWS iGenomes. conda 'bioconda::agfusion=1.252' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://cmopipeline/agfusion:0.0.6' : - 'docker.io/cmopipeline/agfusion:0.0.6' }" + 'docker://cmopipeline/agfusion:0.0.7' : + 'docker.io/cmopipeline/agfusion:0.0.7' }" input: val(ensembl_release) @@ -25,13 +25,13 @@ process AGFUSION_DOWNLOAD { ['GRCh38','hg38'].contains(genome) ? 'hg38' : ['GRCm38','mm10'].contains(genome) ? 'mm10' : '' def pyensembl_species = ['GRCm38','mm10'].contains(genome) ? 'mus_musculus' : 'homo_sapiens' - if (ensembl_release < 93) { + if (ensembl_release < 112) { """ export PYENSEMBL_CACHE_DIR=\$PWD/pyensembl_cache pyensembl install --species ${pyensembl_species} --release ${ensembl_release} - agfusion download -g ${agfusion_genome} + agfusion download -s ${pyensembl_species} -r ${ensembl_release} cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -44,7 +44,7 @@ process AGFUSION_DOWNLOAD { pyensembl install --species ${pyensembl_species} --release ${ensembl_release} - curl http://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/database_files/pfamA.txt.gz > pfamA.txt.gz + curl http://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam37.0/database_files/pfamA.txt.gz > pfamA.txt.gz gunzip pfamA.txt.gz agfusion build --dir . --species ${agfusion_genome} --release ${ensembl_release} --pfam pfamA.txt rm pfamA.txt diff --git a/modules/local/fastaremoveprefix/environment.yml b/modules/local/fastaremoveprefix/environment.yml new file mode 100644 index 0000000..315f6dc --- /dev/null +++ b/modules/local/fastaremoveprefix/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::gawk=5.3.0 diff --git a/modules/local/fastaremoveprefix/main.nf b/modules/local/fastaremoveprefix/main.nf new file mode 100644 index 0000000..c7ebf26 --- /dev/null +++ b/modules/local/fastaremoveprefix/main.nf @@ -0,0 +1,32 @@ +process FASTAREMOVEPREFIX { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' : + 'biocontainers/gawk:5.3.0' }" + + when: + task.ext.when == null || task.ext.when + + input: + tuple val(meta), path(fasta, name: 'input/*') + + output: + tuple val(meta), path("*.{fa,fasta}"), emit: fasta + path "versions.yml" , emit: versions + + script: + def modified_fasta = fasta.fileName.name + """ + cat ${fasta} | sed "s/^>chr/>/g" | sed "s/^>M />MT /g" > ${modified_fasta} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ + + +} diff --git a/modules/local/metafusion/genebed/main.nf b/modules/local/metafusion/genebed/main.nf index 34cba9c..1fb97b5 100644 --- a/modules/local/metafusion/genebed/main.nf +++ b/modules/local/metafusion/genebed/main.nf @@ -9,10 +9,9 @@ process METAFUSION_GENEBED { input: tuple val(meta), path(gff) - val ensembl_version output: - tuple val(meta), path("*.metafusion.gene.bed"), emit: metafusion_gene_bed + tuple val(meta), path("${meta.id}.metafusion.gene.bed"), emit: metafusion_gene_bed path "versions.yml" , emit: versions when: @@ -22,27 +21,29 @@ process METAFUSION_GENEBED { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - final_generate_v75_gene_bed.R \\ + generate_gene_bed.R \\ $gff \\ - ${ensembl_version}.metafusion.gene.bed + ${prefix}.metafusion.gene.bed cat <<-END_VERSIONS > versions.yml "${task.process}": R: \$(R --version | head -n1) - final_generate_v75_gene_bed.R: 0.0.1 + generate_gene_bed.R: 0.0.2 END_VERSIONS """ stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + """ touch ${prefix}.metafusion.gene.bed cat <<-END_VERSIONS > versions.yml "${task.process}": R: \$(R --version | head -n1) - final_generate_v75_gene_bed.R: 0.0.1 + generate_gene_bed.R: 0.0.2 END_VERSIONS """ + } diff --git a/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v75_gene_bed.R b/modules/local/metafusion/genebed/resources/usr/bin/generate_gene_bed.R similarity index 86% rename from modules/local/metafusion/genebed/resources/usr/bin/final_generate_v75_gene_bed.R rename to modules/local/metafusion/genebed/resources/usr/bin/generate_gene_bed.R index 6a5b59c..2a15149 100755 --- a/modules/local/metafusion/genebed/resources/usr/bin/final_generate_v75_gene_bed.R +++ b/modules/local/metafusion/genebed/resources/usr/bin/generate_gene_bed.R @@ -1,22 +1,23 @@ - #!/usr/local/bin/Rscript + # __author__ = "Alexandria Dymun" # __email__ = "pintoa1@mskcc.org" # __contributor__ = "Anne Marie Noronha (noronhaa@mskcc.org)" -# __version__ = "0.0.1" +# __version__ = "0.0.2" # __status__ = "Dev" suppressPackageStartupMessages({ - library(plyr) +# library(plyr) library(dplyr) library(data.table) library(stringr) + options(scipen = 999) }) usage <- function() { message("Usage:") - message("final_generate_v75_gene_bed.R ") + message("generate_gene_bed.R ") } args = commandArgs(TRUE) @@ -26,15 +27,10 @@ if (length(args)!=2) { quit() } -# Utilized gtf from igenomes for FORTE This corresponds to GRCh37 ensembl 75 -# Add introns to gtf, convert to gff3 -# bsub -R "rusage[mem=64]" -o add_introns_agat_%J.out singularity exec -B /juno/ \\ -# -B /tmp -B /scratch/ docker://quay.io/biocontainers/agat:0.8.0--pl5262hdfd78af_0 \\ -# /bin/bash -c "agat_sp_add_introns.pl -g /juno/work/taylorlab/cmopipeline/mskcc-igenomes/igenomes/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf\\ -# -o genes.INTRONS.gff3" - gtf <- rtracklayer::import(args[1]) gtf_df <- as.data.frame(gtf) +#remove incomplete transcripts mRNA_end_NF and mRNA_start_NF (not finished) +gtf_df <- gtf_df[!grepl("NF",gtf_df$tag),] file.to_write <- args[2] @@ -44,7 +40,8 @@ gtf_df <- gtf_df %>% chr = seqnames ) %>% select(c(chr, start, end, transcript_id, type, strand, gene_name, gene_id)) %>% - filter(type %in% c("exon","intron","UTR","CDS","cds","utr")) %>% mutate(start = start-1) + filter(type %in% c("exon","intron","UTR","CDS","cds","utr","five_prime_utr","three_prime_utr")) %>% + mutate(gene_name = ifelse(is.na(gene_name),gene_id,gene_name)) %>% mutate(start = start-1) #START CLOCK @@ -106,6 +103,8 @@ modify_transcript <- function(transcript){ transcript$type[transcript$start >= stop_coding & transcript$type == "UTR"] <- "utr5" } } + transcript$type[transcript$type == "five_prime_utr"] <- "utr5" + transcript$type[transcript$type == "three_prime_utr"] <- "utr3" #### Any exon that remains after teh cds change, is likely and untranslated region. change below # Basically, subfeatures which are "exon" need to be changed (i.e. exon --> utr3/utr5) diff --git a/modules/local/metafusion/geneinfo/resources/usr/bin/make_gene_info_for_forte.R b/modules/local/metafusion/geneinfo/resources/usr/bin/make_gene_info_for_forte.R index 2ab3dfd..08ac644 100755 --- a/modules/local/metafusion/geneinfo/resources/usr/bin/make_gene_info_for_forte.R +++ b/modules/local/metafusion/geneinfo/resources/usr/bin/make_gene_info_for_forte.R @@ -106,7 +106,7 @@ gene_info <- rbind(gene_info,add_these_excess_gene_ids) gene_info <- merge(gene_info,do.call(rbind,unique_id_to_names[versioned_gtf])[,c("gene_id","gene_id_with_version")],by = "gene_id",all.x = T, all.y = F) gene_info$Synonyms <- ifelse(is.na(gene_info$gene_id_with_version),gene_info$gene_id,paste0(gene_info$gene_id,"|",gene_info$gene_id_with_version)) -gene_info$Symbol <- gene_info$gene_name +gene_info$Symbol <- ifelse(is.na(gene_info$gene_name), gene_info$gene_id, gene_info$gene_name) gene_info <- gene_info[,c("Symbol","Synonyms")] diff --git a/modules/local/prepare_rrna/main.nf b/modules/local/prepare_rrna/main.nf index 37ec774..8d5ab0a 100644 --- a/modules/local/prepare_rrna/main.nf +++ b/modules/local/prepare_rrna/main.nf @@ -14,6 +14,7 @@ process PREPARE_RRNA { path "rna.bed", emit: rRNA_bed script: + def extra_filter_chr = params.genome == "GRCh38" ? "|^GL000220|^KI270733" : "" if (gtf) { """ (${"${gtf}".endsWith(".gz") ? "z" : ""}grep "rRNA" ${gtf} || true) | \\ @@ -23,7 +24,7 @@ process PREPARE_RRNA { /transcript_id "([^"]+)"/ or die "no transcript_id on \$."; print join "\t", (@F[0,1,2,3], \$1) ' | \\ - (grep -vP "^HG|^HSCHR" || true) | \\ + (grep -vP "^HG|^HSCHR${extra_filter_chr}" || true) | \\ sort -k1V -k2n -k3n \\ > rna.bed @@ -32,7 +33,7 @@ process PREPARE_RRNA { """ (${"${refflat}".endsWith(".gz") ? "z" : ""}grep -P "^RNA5|^RNA1|^RNA2" ${refflat} || true) | \\ awk -F"\\t" -v OFS="\\t" '{ print \$3,\$5,\$6,\$4,\$2 }' | \\ - (grep -vP "^HG|^HSCHR" || true) | \\ + (grep -vP "^HG|^HSCHR${extra_filter_chr}" || true) | \\ sort -k1V -k2n -k3n \\ > rna.bed """ diff --git a/modules/nf-core/arriba/arriba/environment.yml b/modules/nf-core/arriba/arriba/environment.yml new file mode 100644 index 0000000..d0883a0 --- /dev/null +++ b/modules/nf-core/arriba/arriba/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::arriba=2.4.0 diff --git a/modules/nf-core/arriba/main.nf b/modules/nf-core/arriba/arriba/main.nf similarity index 70% rename from modules/nf-core/arriba/main.nf rename to modules/nf-core/arriba/arriba/main.nf index e4b48be..761d0bf 100644 --- a/modules/nf-core/arriba/main.nf +++ b/modules/nf-core/arriba/arriba/main.nf @@ -1,21 +1,21 @@ -process ARRIBA { +process ARRIBA_ARRIBA { tag "$meta.id" label 'process_medium' - conda "bioconda::arriba=2.3.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/arriba:2.3.0--haa8aa89_0' : - 'quay.io/biocontainers/arriba:2.3.0--haa8aa89_0' }" + 'https://depot.galaxyproject.org/singularity/arriba:2.4.0--h0033a41_2' : + 'biocontainers/arriba:2.4.0--h0033a41_2' }" input: tuple val(meta), path(bam) - path fasta - path gtf - path blacklist - path known_fusions - path structural_variants - path tags - path protein_domains + tuple val(meta2), path(fasta) + tuple val(meta3), path(gtf) + tuple val(meta4), path(blacklist) + tuple val(meta5), path(known_fusions) + tuple val(meta6), path(structural_variants) + tuple val(meta7), path(tags) + tuple val(meta8), path(protein_domains) output: tuple val(meta), path("*.fusions.tsv") , emit: fusions @@ -60,7 +60,9 @@ process ARRIBA { echo stub > ${prefix}.fusions.tsv echo stub > ${prefix}.fusions.discarded.tsv - echo "${task.process}:" > versions.yml - echo ' arriba: 2.2.1' >> versions.yml + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS """ } diff --git a/modules/nf-core/arriba/arriba/meta.yml b/modules/nf-core/arriba/arriba/meta.yml new file mode 100644 index 0000000..f230dda --- /dev/null +++ b/modules/nf-core/arriba/arriba/meta.yml @@ -0,0 +1,123 @@ +name: arriba_arriba +description: Arriba is a command-line tool for the detection of gene fusions from + RNA-Seq data. +keywords: + - fusion + - arriba + - detection + - RNA-Seq +tools: + - arriba: + description: Fast and accurate gene fusion detection from RNA-Seq data + homepage: https://github.com/suhrig/arriba + documentation: https://arriba.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/suhrig/arriba + doi: "10.1101/gr.257246.119" + licence: ["MIT"] + identifier: biotools:Arriba +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Assembly FASTA file + pattern: "*.{fasta}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - blacklist: + type: file + description: Blacklist file + pattern: "*.{tsv}" + - - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - known_fusions: + type: file + description: Known fusions file + pattern: "*.{tsv}" + - - meta6: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - structural_variants: + type: file + description: Structural variants file + pattern: "*.{tsv}" + - - meta7: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - tags: + type: file + description: Tags file + pattern: "*.{tsv}" + - - meta8: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - protein_domains: + type: file + description: Protein domains file + pattern: "*.{gff3}" +output: + - fusions: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fusions.tsv": + type: file + description: File contains fusions which pass all of Arriba's filters. + pattern: "*.{fusions.tsv}" + - fusions_fail: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fusions.discarded.tsv": + type: file + description: File contains fusions that Arriba classified as an artifact or + that are also observed in healthy tissue. + pattern: "*.{fusions.discarded.tsv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@praveenraj2018" + - "@rannick" +maintainers: + - "@praveenraj2018" + - "@rannick" diff --git a/modules/nf-core/arriba/meta.yml b/modules/nf-core/arriba/meta.yml deleted file mode 100644 index 119dd91..0000000 --- a/modules/nf-core/arriba/meta.yml +++ /dev/null @@ -1,74 +0,0 @@ -name: arriba -description: Arriba is a command-line tool for the detection of gene fusions from RNA-Seq data. -keywords: - - fusion - - arriba -tools: - - arriba: - description: Fast and accurate gene fusion detection from RNA-Seq data - homepage: https://github.com/suhrig/arriba - documentation: https://arriba.readthedocs.io/en/latest/ - tool_dev_url: https://github.com/suhrig/arriba - doi: "10.1101/gr.257246.119" - licence: ["MIT"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - fasta: - type: file - description: Assembly FASTA file - pattern: "*.{fasta}" - - gtf: - type: file - description: Annotation GTF file - pattern: "*.{gtf}" - - blacklist: - type: file - description: Blacklist file - pattern: "*.{tsv}" - - known_fusions: - type: file - description: Known fusions file - pattern: "*.{tsv}" - - structural_variants: - type: file - description: Structural variants file - pattern: "*.{tsv}" - - tags: - type: file - description: Tags file - pattern: "*.{tsv}" - - protein_domains: - type: file - description: Protein domains file - pattern: "*.{gff3}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - fusions: - type: file - description: File contains fusions which pass all of Arriba's filters. - pattern: "*.{fusions.tsv}" - - fusions_fail: - type: file - description: File contains fusions that Arriba classified as an artifact or that are also observed in healthy tissue. - pattern: "*.{fusions.discarded.tsv}" - -authors: - - "@praveenraj2018,@rannick" diff --git a/modules/nf-core/gatk4/bedtointervallist/environment.yml b/modules/nf-core/gatk4/bedtointervallist/environment.yml new file mode 100644 index 0000000..55993f4 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf index 41fab00..68863d6 100644 --- a/modules/nf-core/gatk4/bedtointervallist/main.nf +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -2,14 +2,14 @@ process GATK4_BEDTOINTERVALLIST { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.3.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.3.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.3.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(bed) - path dict + tuple val(meta2), path(dict) output: tuple val(meta), path('*.interval_list'), emit: interval_list @@ -22,14 +22,15 @@ process GATK4_BEDTOINTERVALLIST { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" BedToIntervalList \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + BedToIntervalList \\ --INPUT $bed \\ --OUTPUT ${prefix}.interval_list \\ --SEQUENCE_DICTIONARY $dict \\ diff --git a/modules/nf-core/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml index 986f159..25348e1 100644 --- a/modules/nf-core/gatk4/bedtointervallist/meta.yml +++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml @@ -2,6 +2,8 @@ name: gatk4_bedtointervallist description: Creates an interval list from a bed file and a reference dict keywords: - bed + - bedtointervallist + - gatk4 - interval list tools: - gatk4: @@ -13,28 +15,48 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test'] - - bed: - type: file - description: Input bed file - pattern: "*.bed" - - dict: - type: file - description: Sequence dictionary - pattern: "*.dict" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - bed: + type: file + description: Input bed file + pattern: "*.bed" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: Sequence dictionary + pattern: "*.dict" output: - interval_list: - type: file - description: gatk interval list file - pattern: "*.interval_list" + - meta: + type: file + description: gatk interval list file + pattern: "*.interval_list" + - "*.interval_list": + type: file + description: gatk interval list file + pattern: "*.interval_list" + - _list: + type: file + description: gatk interval list file + pattern: "*.interval_list" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@kevinmenden" + - "@ramprasadn" +maintainers: + - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test new file mode 100644 index 0000000..2289f73 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process GATK4_BEDTOINTERVALLIST" + script "../main.nf" + process "GATK4_BEDTOINTERVALLIST" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/bedtointervallist" + + test("test_gatk4_bedtointervallist") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [file(params.modules_testdata_base_path + + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + input[1] = [ [ id:'dict' ], // meta map + [file(params.modules_testdata_base_path + + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap new file mode 100644 index 0000000..48c322f --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test_gatk4_bedtointervallist": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,e51101c9357fb2d59fd30e370eefa39c" + ] + ], + "1": [ + "versions.yml:md5,29a18c36f27584eb5a5f2f5457088b3b" + ], + "interval_list": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,e51101c9357fb2d59fd30e370eefa39c" + ] + ], + "versions": [ + "versions.yml:md5,29a18c36f27584eb5a5f2f5457088b3b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T14:20:12.168775" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml new file mode 100644 index 0000000..b4d54f1 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/bedtointervallist: + - "modules/nf-core/gatk4/bedtointervallist/**" diff --git a/modules/nf-core/gatk4/createsequencedictionary/environment.yml b/modules/nf-core/gatk4/createsequencedictionary/environment.yml new file mode 100644 index 0000000..55993f4 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf index bc324ad..c7f1d75 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/main.nf +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -2,17 +2,17 @@ process GATK4_CREATESEQUENCEDICTIONARY { tag "$fasta" label 'process_medium' - conda "bioconda::gatk4=4.3.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.3.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.3.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: - path fasta + tuple val(meta), path(fasta) output: - path "*.dict" , emit: dict - path "versions.yml" , emit: versions + tuple val(meta), path('*.dict') , emit: dict + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,14 +20,15 @@ process GATK4_CREATESEQUENCEDICTIONARY { script: def args = task.ext.args ?: '' - def avail_mem = 6 + def avail_mem = 6144 if (!task.memory) { log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" CreateSequenceDictionary \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateSequenceDictionary \\ --REFERENCE $fasta \\ --URI $fasta \\ --TMP_DIR . \\ diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml index bd24788..7b5156b 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/meta.yml +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -1,32 +1,49 @@ name: gatk4_createsequencedictionary description: Creates a sequence dictionary for a reference sequence keywords: + - createsequencedictionary - dictionary - fasta + - gatk4 tools: - gatk: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ["Apache-2.0"] - + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" input: - - fasta: - type: file - description: Input fasta file - pattern: "*.{fasta,fa}" + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Input fasta file + pattern: "*.{fasta,fa}" output: - dict: - type: file - description: gatk dictionary file - pattern: "*.{dict}" + - meta: + type: file + description: gatk dictionary file + pattern: "*.{dict}" + - "*.dict": + type: file + description: gatk dictionary file + pattern: "*.{dict}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@maxulysse" + - "@ramprasadn" +maintainers: + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test new file mode 100644 index 0000000..a8a9c6d --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process GATK4_CREATESEQUENCEDICTIONARY" + script "../main.nf" + process "GATK4_CREATESEQUENCEDICTIONARY" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/createsequencedictionary" + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap new file mode 100644 index 0000000..16735f9 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.dict:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + ], + "dict": [ + [ + { + "id": "test" + }, + "genome.dict:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-05-16T10:16:16.34453" + }, + "sarscov2 - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.dict:md5,7362679f176e0f52add03c08f457f646" + ] + ], + "1": [ + "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + ], + "dict": [ + [ + { + "id": "test" + }, + "genome.dict:md5,7362679f176e0f52add03c08f457f646" + ] + ], + "versions": [ + "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-05-16T13:58:25.822068" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml b/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml new file mode 100644 index 0000000..035c5e4 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/createsequencedictionary: + - "modules/nf-core/gatk4/createsequencedictionary/**" diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 0000000..2bcd47e --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 4dd0e5b..28c0a81 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -2,18 +2,20 @@ process SAMTOOLS_FAIDX { tag "$fasta" label 'process_single' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(fasta) + tuple val(meta2), path(fai) output: - tuple val(meta), path ("*.fai"), emit: fai - tuple val(meta), path ("*.gzi"), emit: gzi, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,8 +25,8 @@ process SAMTOOLS_FAIDX { """ samtools \\ faidx \\ - $args \\ - $fasta + $fasta \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -33,8 +35,12 @@ process SAMTOOLS_FAIDX { """ stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' """ + ${fastacmd} touch ${fasta}.fai + cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index fe2fe9a..6721b2c 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -3,6 +3,7 @@ description: Index FASTA file keywords: - index - fasta + - faidx tools: - samtools: description: | @@ -13,35 +14,67 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: FASTA file - pattern: "*.{fa,fasta}" + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + - fa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{fa,fasta}": + type: file + description: FASTA file + pattern: "*.{fa}" - fai: - type: file - description: FASTA index file - pattern: "*.{fai}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fai": + type: file + description: FASTA index file + pattern: "*.{fai}" - gzi: - type: file - description: Optional gzip index file for compressed inputs - pattern: "*.gzi" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gzi": + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@ewels" - "@phue" +maintainers: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test new file mode 100644 index 0000000..17244ef --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -0,0 +1,122 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FAIDX" + script "../main.nf" + process "SAMTOOLS_FAIDX" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/faidx" + + test("test_samtools_faidx") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_bgzip") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fasta") { + + config "./nextflow2.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fai") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap new file mode 100644 index 0000000..1bbb3ec --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -0,0 +1,249 @@ +{ + "test_samtools_faidx": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:57:47.450887871" + }, + "test_samtools_faidx_bgzip": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:04.804905659" + }, + "test_samtools_faidx_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:23.831268154" + }, + "test_samtools_faidx_stub_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:35.600243706" + }, + "test_samtools_faidx_stub_fai": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:58:54.705460167" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config new file mode 100644 index 0000000..f76a3ba --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = 'MT192765.1 -o extract.fa' + } + +} diff --git a/modules/nf-core/samtools/faidx/tests/nextflow2.config b/modules/nf-core/samtools/faidx/tests/nextflow2.config new file mode 100644 index 0000000..33ebbd5 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow2.config @@ -0,0 +1,6 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = '-o extract.fa' + } +} diff --git a/modules/nf-core/samtools/faidx/tests/tags.yml b/modules/nf-core/samtools/faidx/tests/tags.yml new file mode 100644 index 0000000..e4a8394 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/faidx: + - modules/nf-core/samtools/faidx/** diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml new file mode 100644 index 0000000..1debc4c --- /dev/null +++ b/modules/nf-core/star/align/environment.yml @@ -0,0 +1,9 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.18 + - bioconda::samtools=1.18 + - bioconda::star=2.7.10a + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf index 8cb8e9a..ae67e00 100644 --- a/modules/nf-core/star/align/main.nf +++ b/modules/nf-core/star/align/main.nf @@ -2,15 +2,15 @@ process STAR_ALIGN { tag "$meta.id" label 'process_high' - conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : - 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" input: tuple val(meta), path(reads, stageAs: "input*/*") - path index - path gtf + tuple val(meta2), path(index) + tuple val(meta3), path(gtf) val star_ignore_sjdbgtf val seq_platform val seq_center @@ -81,6 +81,8 @@ process STAR_ALIGN { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ + echo "" | gzip > ${prefix}.unmapped_1.fastq.gz + echo "" | gzip > ${prefix}.unmapped_2.fastq.gz touch ${prefix}Xd.out.bam touch ${prefix}.Log.final.out touch ${prefix}.Log.out @@ -89,8 +91,6 @@ process STAR_ALIGN { touch ${prefix}.toTranscriptome.out.bam touch ${prefix}.Aligned.unsort.out.bam touch ${prefix}.Aligned.sortedByCoord.out.bam - touch ${prefix}.unmapped_1.fastq.gz - touch ${prefix}.unmapped_2.fastq.gz touch ${prefix}.tab touch ${prefix}.SJ.out.tab touch ${prefix}.ReadsPerGene.out.tab diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml index bce16d3..d30556b 100644 --- a/modules/nf-core/star/align/meta.yml +++ b/modules/nf-core/star/align/meta.yml @@ -14,76 +14,194 @@ tools: manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf doi: 10.1093/bioinformatics/bts635 licence: ["MIT"] + identifier: biotools:star input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - - index: - type: directory - description: STAR genome index - pattern: "star" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - index: + type: directory + description: STAR genome index + pattern: "star" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + - - star_ignore_sjdbgtf: + type: boolean + description: Ignore annotation GTF file + - - seq_platform: + type: string + description: Sequencing platform + - - seq_center: + type: string + description: Sequencing center output: - - bam: - type: file - description: Output BAM file containing read alignments - pattern: "*.{bam}" - log_final: - type: file - description: STAR final log file - pattern: "*Log.final.out" + - meta: + type: file + description: STAR final log file + pattern: "*Log.final.out" + - "*Log.final.out": + type: file + description: STAR final log file + pattern: "*Log.final.out" - log_out: - type: file - description: STAR lot out file - pattern: "*Log.out" + - meta: + type: file + description: STAR lot out file + pattern: "*Log.out" + - "*Log.out": + type: file + description: STAR lot out file + pattern: "*Log.out" - log_progress: - type: file - description: STAR log progress file - pattern: "*Log.progress.out" + - meta: + type: file + description: STAR log progress file + pattern: "*Log.progress.out" + - "*Log.progress.out": + type: file + description: STAR log progress file + pattern: "*Log.progress.out" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + - meta: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - "*d.out.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" - bam_sorted: - type: file - description: Sorted BAM file of read alignments (optional) - pattern: "*sortedByCoord.out.bam" + - meta: + type: file + description: Sorted BAM file of read alignments (optional) + pattern: "*sortedByCoord.out.bam" + - "*sortedByCoord.out.bam": + type: file + description: Sorted BAM file of read alignments (optional) + pattern: "*sortedByCoord.out.bam" - bam_transcript: - type: file - description: Output BAM file of transcriptome alignment (optional) - pattern: "*toTranscriptome.out.bam" + - meta: + type: file + description: Output BAM file of transcriptome alignment (optional) + pattern: "*toTranscriptome.out.bam" + - "*toTranscriptome.out.bam": + type: file + description: Output BAM file of transcriptome alignment (optional) + pattern: "*toTranscriptome.out.bam" - bam_unsorted: - type: file - description: Unsorted BAM file of read alignments (optional) - pattern: "*Aligned.unsort.out.bam" + - meta: + type: file + description: Unsorted BAM file of read alignments (optional) + pattern: "*Aligned.unsort.out.bam" + - "*Aligned.unsort.out.bam": + type: file + description: Unsorted BAM file of read alignments (optional) + pattern: "*Aligned.unsort.out.bam" - fastq: - type: file - description: Unmapped FastQ files (optional) - pattern: "*fastq.gz" + - meta: + type: file + description: Unmapped FastQ files (optional) + pattern: "*fastq.gz" + - "*fastq.gz": + type: file + description: Unmapped FastQ files (optional) + pattern: "*fastq.gz" - tab: - type: file - description: STAR output tab file(s) (optional) - pattern: "*.tab" + - meta: + type: file + description: STAR output tab file(s) (optional) + pattern: "*.tab" + - "*.tab": + type: file + description: STAR output tab file(s) (optional) + pattern: "*.tab" + - spl_junc_tab: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.SJ.out.tab": + type: file + description: STAR output splice junction tab file + pattern: "*.SJ.out.tab" + - read_per_gene_tab: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.ReadsPerGene.out.tab": + type: file + description: STAR output read per gene tab file + pattern: "*.ReadsPerGene.out.tab" - junction: - type: file - description: STAR chimeric junction output file (optional) - pattern: "*.out.junction" + - meta: + type: file + description: STAR chimeric junction output file (optional) + pattern: "*.out.junction" + - "*.out.junction": + type: file + description: STAR chimeric junction output file (optional) + pattern: "*.out.junction" + - sam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.out.sam": + type: file + description: STAR output SAM file + pattern: "*.out.sam" - wig: - type: file - description: STAR output wiggle format file(s) (optional) - pattern: "*.wig" + - meta: + type: file + description: STAR output wiggle format file(s) (optional) + pattern: "*.wig" + - "*.wig": + type: file + description: STAR output wiggle format file(s) (optional) + pattern: "*.wig" - bedgraph: - type: file - description: STAR output bedGraph format file(s) (optional) - pattern: "*.bg" - + - meta: + type: file + description: STAR output bedGraph format file(s) (optional) + pattern: "*.bg" + - "*.bg": + type: file + description: STAR output bedGraph format file(s) (optional) + pattern: "*.bg" authors: - "@kevinmenden" - "@drpatelh" - "@praveenraj2018" +maintainers: + - "@kevinmenden" + - "@drpatelh" + - "@praveenraj2018" diff --git a/modules/nf-core/star/align/tests/main.nf.test b/modules/nf-core/star/align/tests/main.nf.test new file mode 100644 index 0000000..2d9f72d --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test @@ -0,0 +1,609 @@ +nextflow_process { + + name "Test Process STAR_ALIGN" + script "../main.nf" + process "STAR_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/align" + tag "star/genomegenerate" + + test("homo_sapiens - single_end") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + process.out.bam, + process.out.bam_sorted, + process.out.bam_transcript, + process.out.bam_unsorted, + process.out.bedgraph, + process.out.fastq, + process.out.junction, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + process.out.bam, + process.out.bam_sorted, + process.out.bam_transcript, + process.out.bam_unsorted, + process.out.bedgraph, + process.out.fastq, + process.out.junction, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - arriba") { + config "./nextflow.arriba.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + process.out.bam, + process.out.bam_sorted, + process.out.bam_transcript, + process.out.bam_unsorted, + process.out.bedgraph, + process.out.fastq, + process.out.junction, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - starfusion") { + config "./nextflow.starfusion.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + process.out.bam, + process.out.bam_sorted, + process.out.bam_transcript, + process.out.bam_unsorted, + process.out.bedgraph, + process.out.fastq, + process.out.junction, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - multiple") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + process.out.bam, + process.out.bam_sorted, + process.out.bam_transcript, + process.out.bam_unsorted, + process.out.bedgraph, + process.out.fastq, + process.out.junction, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - single_end - stub") { + options "-stub" + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - stub") { + options "-stub" + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - arriba - stub") { + options "-stub" + config "./nextflow.arriba.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - starfusion - stub") { + options "-stub" + config "./nextflow.starfusion.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - paired_end - multiple - stub") { + options "-stub" + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/star/align/tests/main.nf.test.snap b/modules/nf-core/star/align/tests/main.nf.test.snap new file mode 100644 index 0000000..c814eb5 --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test.snap @@ -0,0 +1,1973 @@ +{ + "homo_sapiens - single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": true + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": true + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": true + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "wig": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T15:16:04.712114" + }, + "homo_sapiens - paired_end - arriba - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T15:16:28.874293" + }, + "homo_sapiens - single_end": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" + ] + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Signal.Unique.str1.out.bg:md5,c56fc1472776fb927eaf62d973da5f9a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,e93373cf6f2a2a9506e2efdb260cdd4f" + ] + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ], + [ + + ], + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T18:02:34.35338" + }, + "homo_sapiens - paired_end": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" + ] + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ], + [ + + ], + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T18:03:16.701923" + }, + "homo_sapiens - paired_end - multiple - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T15:16:51.360287" + }, + "homo_sapiens - paired_end - multiple": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" + ] + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ], + [ + + ], + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T13:13:28.987438" + }, + "homo_sapiens - paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T15:16:16.798018" + }, + "homo_sapiens - paired_end - starfusion": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.out.bam:md5,bcad07b838f6762fc01eea52b5cd3f84" + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,c10ef219f4a30e83711b995bc5e40dba" + ] + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ], + [ + + ], + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T13:10:55.371956" + }, + "homo_sapiens - paired_end - arriba": { + "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.out.bam:md5,c1b1747f5873f2d17762725636e891d5" + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ], + [ + + ], + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T13:05:10.7534" + }, + "homo_sapiens - paired_end - starfusion - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T15:16:40.64399" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/align/tests/nextflow.arriba.config b/modules/nf-core/star/align/tests/nextflow.arriba.config new file mode 100644 index 0000000..2324b9e --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.arriba.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/nextflow.config b/modules/nf-core/star/align/tests/nextflow.config new file mode 100644 index 0000000..c4ac580 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --outWigType bedGraph --outWigStrand Unstranded' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/nextflow.starfusion.config b/modules/nf-core/star/align/tests/nextflow.starfusion.config new file mode 100644 index 0000000..467b649 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.starfusion.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outReadsUnmapped None --twopassMode Basic --outSAMstrandField intronMotif --outSAMunmapped Within --chimSegmentMin 12 --chimJunctionOverhangMin 8 --chimOutJunctionFormat 1 --alignSJDBoverhangMin 10 --alignMatesGapMax 100000 --alignIntronMax 100000 --alignSJstitchMismatchNmax 5 -1 5 5 --chimMultimapScoreRange 3 --chimScoreJunctionNonGTAG -4 --chimMultimapNmax 20 --chimNonchimScoreDropMin 10 --peOverlapNbasesMin 12 --peOverlapMMp 0.1 --alignInsertionFlush Right --alignSplicedMateMapLminOverLmate 0 --alignSplicedMateMapLmin 30' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/tags.yml b/modules/nf-core/star/align/tests/tags.yml new file mode 100644 index 0000000..8beace1 --- /dev/null +++ b/modules/nf-core/star/align/tests/tags.yml @@ -0,0 +1,2 @@ +star/align: + - modules/nf-core/star/align/** diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml new file mode 100644 index 0000000..1debc4c --- /dev/null +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -0,0 +1,9 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.18 + - bioconda::samtools=1.18 + - bioconda::star=2.7.10a + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf index 9146248..4d4055b 100644 --- a/modules/nf-core/star/genomegenerate/main.nf +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -2,26 +2,27 @@ process STAR_GENOMEGENERATE { tag "$fasta" label 'process_high' - conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : - 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" input: - path fasta - path gtf + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) output: - path "star" , emit: index - path "versions.yml", emit: versions + tuple val(meta), path("star") , emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args_list = args.tokenize() - def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${(task.memory.toBytes()*task.cpus) - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' if (args_list.contains('--genomeSAindexNbases')) { """ mkdir star @@ -29,7 +30,7 @@ process STAR_GENOMEGENERATE { --runMode genomeGenerate \\ --genomeDir star/ \\ --genomeFastaFiles $fasta \\ - --sjdbGTFfile $gtf \\ + $include_gtf \\ --runThreadN $task.cpus \\ $memory \\ $args @@ -51,7 +52,7 @@ process STAR_GENOMEGENERATE { --runMode genomeGenerate \\ --genomeDir star/ \\ --genomeFastaFiles $fasta \\ - --sjdbGTFfile $gtf \\ + $include_gtf \\ --runThreadN $task.cpus \\ --genomeSAindexNbases \$NUM_BASES \\ $memory \\ @@ -67,30 +68,52 @@ process STAR_GENOMEGENERATE { } stub: - """ - mkdir star - touch star/Genome - touch star/Log.out - touch star/SA - touch star/SAindex - touch star/chrLength.txt - touch star/chrName.txt - touch star/chrNameLength.txt - touch star/chrStart.txt - touch star/exonGeTrInfo.tab - touch star/exonInfo.tab - touch star/geneInfo.tab - touch star/genomeParameters.txt - touch star/sjdbInfo.txt - touch star/sjdbList.fromGTF.out.tab - touch star/sjdbList.out.tab - touch star/transcriptInfo.tab + if (gtf) { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab - cat <<-END_VERSIONS > versions.yml - "${task.process}": - star: \$(STAR --version | sed -e "s/STAR_//g") - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') - END_VERSIONS - """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/genomeParameters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } } diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml index 8181157..33c1f65 100644 --- a/modules/nf-core/star/genomegenerate/meta.yml +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -14,24 +14,43 @@ tools: manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf doi: 10.1093/bioinformatics/bts635 licence: ["MIT"] + identifier: biotools:star input: - - fasta: - type: file - description: Fasta file of the reference genome - - gtf: - type: file - description: GTF file of the reference genome - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Fasta file of the reference genome + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: GTF file of the reference genome output: - index: - type: directory - description: Folder containing the star index files - pattern: "star" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - star: + type: directory + description: Folder containing the star index files + pattern: "star" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@kevinmenden" - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/star/genomegenerate/star-genomegenerate.diff b/modules/nf-core/star/genomegenerate/star-genomegenerate.diff new file mode 100644 index 0000000..247d39a --- /dev/null +++ b/modules/nf-core/star/genomegenerate/star-genomegenerate.diff @@ -0,0 +1,20 @@ +Changes in module 'nf-core/star/genomegenerate' +'modules/nf-core/star/genomegenerate/environment.yml' is unchanged +'modules/nf-core/star/genomegenerate/meta.yml' is unchanged +Changes in 'star/genomegenerate/main.nf': +--- modules/nf-core/star/genomegenerate/main.nf ++++ modules/nf-core/star/genomegenerate/main.nf +@@ -21,7 +21,7 @@ + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() +- def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' ++ def memory = task.memory ? "--limitGenomeGenerateRAM ${(task.memory.toBytes()*task.cpus) - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' + if (args_list.contains('--genomeSAindexNbases')) { + """ + +'modules/nf-core/star/genomegenerate/tests/tags.yml' is unchanged +'modules/nf-core/star/genomegenerate/tests/main.nf.test' is unchanged +'modules/nf-core/star/genomegenerate/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test new file mode 100644 index 0000000..4d619c4 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -0,0 +1,114 @@ +nextflow_process { + + name "Test Process STAR_GENOMEGENERATE" + script "../main.nf" + process "STAR_GENOMEGENERATE" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/genomegenerate" + + test("fasta_gtf") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString(), + process.out.versions) + .match() } + ) + } + } + + test("fasta") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString(), + process.out.versions + ).match() } + ) + } + } + + test("fasta_gtf_stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fasta_stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap new file mode 100644 index 0000000..207f4b4 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -0,0 +1,148 @@ +{ + "fasta_gtf": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]", + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:55:35.478401" + }, + "fasta_gtf_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ], + "index": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:55:57.247585" + }, + "fasta_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ], + "index": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:56:07.01742" + }, + "fasta": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]", + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:55:45.48784" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml new file mode 100644 index 0000000..79f619b --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/tags.yml @@ -0,0 +1,2 @@ +star/genomegenerate: + - modules/nf-core/star/genomegenerate/** diff --git a/subworkflows/local/fusion.nf b/subworkflows/local/fusion.nf index 0539428..28e8546 100644 --- a/subworkflows/local/fusion.nf +++ b/subworkflows/local/fusion.nf @@ -1,5 +1,5 @@ include { STAR_ALIGN as STAR_FOR_ARRIBA } from '../../modules/nf-core/star/align/main' -include { ARRIBA } from '../../modules/nf-core/arriba/main' +include { ARRIBA_ARRIBA } from '../../modules/nf-core/arriba/arriba/main' include { STAR_ALIGN as STAR_FOR_STARFUSION } from '../../modules/nf-core/star/align/main' include { STARFUSION } from '../../modules/local/starfusion/detect/main' include { FUSIONCATCHER_DETECT } from '../../modules/local/fusioncatcher/detect/main' @@ -19,6 +19,7 @@ workflow FUSION { reads reads_untrimmed star_index + fasta gtf starfusion_ref fusioncatcher_ref @@ -33,7 +34,7 @@ workflow FUSION { main: ch_versions = Channel.empty() - fasta = params.fasta + //fasta = params.fasta //gene_bed = params.metafusion_gene_bed //gene_info = params.metafusion_gene_info //blocklist = params.metafusion_blocklist @@ -48,23 +49,23 @@ workflow FUSION { ) ch_versions = ch_versions.mix(STAR_FOR_ARRIBA.out.versions.first()) - ARRIBA( + ARRIBA_ARRIBA( STAR_FOR_ARRIBA.out.bam, fasta, gtf, - arriba_blacklist, - arriba_known_fusions, - [], - [], - arriba_protein_domains + arriba_blacklist.map{[[:],it]}, + arriba_known_fusions.map{[[:],it]}, + [[:],[]], + [[:],[]], + arriba_protein_domains.map{[[:],it]} ) - ch_versions = ch_versions.mix(ARRIBA.out.versions.first()) + ch_versions = ch_versions.mix(ARRIBA_ARRIBA.out.versions.first()) STAR_FOR_STARFUSION( reads, // use the star index in the starfusion reference to ensure compatibility - starfusion_ref.map{ file( it + "/ref_genome.fa.star.idx")}, - starfusion_ref.map{ file( it + "/ref_annot.gtf")}, + starfusion_ref.map{ [[id:params.genome],file( it + "/ref_genome.fa.star.idx")] }, + starfusion_ref.map{ [[id:params.genome],file( it + "/ref_annot.gtf")] }, false, [], [] @@ -88,7 +89,7 @@ workflow FUSION { fc_fusions = ["GRCh37","hg19","smallGRCh37"].contains(params.genome) ? FUSIONCATCHER_DETECT.out.fusions_alt : FUSIONCATCHER_DETECT.out.fusions - ARRIBA_TO_CFF(ARRIBA.out.fusions + ARRIBA_TO_CFF(ARRIBA_ARRIBA.out.fusions .map{ meta, file ->[ meta, "arriba", file ] }) FUSIONCATCHER_TO_CFF(fc_fusions .map{ meta, file -> [ meta, "fusioncatcher", file ] } ) @@ -113,7 +114,7 @@ workflow FUSION { MERGE_CFF.out.file_out, gene_bed.map{ it[1] }.first(), gene_info.map{ it[1] }.first(), - fasta, + fasta.map{ it[1] }.first(), blocklist ) diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 97e4124..698f376 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -6,6 +6,7 @@ include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/ include { GATK4_BEDTOINTERVALLIST } from '../../modules/nf-core/gatk4/bedtointervallist/main' include { PREPARE_RRNA } from '../../modules/local/prepare_rrna/main' include { + GUNZIP as GUNZIP_FASTA ; GUNZIP as GUNZIP_GTF ; GUNZIP as GUNZIP_METAFUSIONGENEBED ; GUNZIP as GUNZIP_METAFUSIONBLOCKLIST @@ -18,31 +19,47 @@ include { AGFUSION_DOWNLOAD } from '../../modules/local/agfusion/do include { AGAT_SPADDINTRONS } from '../../modules/nf-core/agat/spaddintrons/main' include { METAFUSION_GENEBED } from '../../modules/local/metafusion/genebed/main' include { METAFUSION_GENEINFO } from '../../modules/local/metafusion/geneinfo/main' +include { FASTAREMOVEPREFIX } from '../../modules/local/fastaremoveprefix/main' workflow PREPARE_REFERENCES { main: ch_versions = Channel.empty() + if (params.fasta.endsWith(".gz")){ + GUNZIP_FASTA([[id:params.genome],params.fasta]) + fasta = GUNZIP_FASTA.out.gunzip.first() + } else { + fasta = Channel.of([[id:params.genome],params.fasta]).first() + } + + if (params.genome == "GRCh38" ){ + FASTAREMOVEPREFIX(fasta) + fasta = FASTAREMOVEPREFIX.out.fasta + } + if (params.gtf.endsWith(".gz")){ - GUNZIP_GTF([[:],params.gtf]) - gtf = GUNZIP_GTF.out.gunzip.map{ it[1] }.first() + GUNZIP_GTF([[id:params.genome],params.gtf]) + gtf = GUNZIP_GTF.out.gunzip.first() } else { - gtf = params.gtf + gtf = Channel.of([[id:params.genome],params.gtf]).first() } if (params.metafusion_blocklist.endsWith(".gz")){ GUNZIP_METAFUSIONBLOCKLIST([[:],params.metafusion_blocklist]) metafusion_blocklist = GUNZIP_METAFUSIONBLOCKLIST.out.gunzip.map{ it[1] }.first() } else { - metafusion_blocklist = params.metafusion_blocklist + metafusion_blocklist = Channel.of(params.metafusion_blocklist).first() } - STAR_GENOMEGENERATE(params.fasta,gtf) + STAR_GENOMEGENERATE( + fasta, + gtf + ) ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) - star_index = STAR_GENOMEGENERATE.out.index + star_index = STAR_GENOMEGENERATE.out.index.first() - UCSC_GTFTOGENEPRED(Channel.value(gtf).map{[[id:params.genome],it]}) + UCSC_GTFTOGENEPRED(gtf) ch_versions = ch_versions.mix(UCSC_GTFTOGENEPRED.out.versions) UCSC_GENEPREDTOBED(UCSC_GTFTOGENEPRED.out.genepred) @@ -55,7 +72,7 @@ workflow PREPARE_REFERENCES { } PREPARE_RRNA([],refflat) - GATK4_CREATESEQUENCEDICTIONARY(params.fasta) + GATK4_CREATESEQUENCEDICTIONARY(fasta) ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) GATK4_BEDTOINTERVALLIST( @@ -64,7 +81,7 @@ workflow PREPARE_REFERENCES { ) ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST.out.versions) - SAMTOOLS_FAIDX ([[:],params.fasta]) + SAMTOOLS_FAIDX(fasta,[[:],[]]) if (params.starfusion_url) { UNTAR_STARFUSION([[id:params.starfusion_url.tokenize("/")[-1].replaceFirst(/\.tar\.gz$/, "")],params.starfusion_url]) @@ -85,17 +102,16 @@ workflow PREPARE_REFERENCES { ARRIBA_DOWNLOAD() AGAT_SPADDINTRONS( - [[:],gtf], + gtf, [] ) METAFUSION_GENEBED( - AGAT_SPADDINTRONS.out.gff, - params.ensembl_version + AGAT_SPADDINTRONS.out.gff ) METAFUSION_GENEINFO( - [[:],gtf], starfusion_ref, fusioncatcher_ref + gtf,starfusion_ref, fusioncatcher_ref ) AGFUSION_DOWNLOAD( @@ -111,6 +127,7 @@ workflow PREPARE_REFERENCES { star_index = star_index // Convert queue channel to value channel so it never gets poison pilled refflat = refflat + fasta = fasta fasta_dict = GATK4_CREATESEQUENCEDICTIONARY.out.dict fasta_fai = SAMTOOLS_FAIDX.out.fai rrna_bed = PREPARE_RRNA.out.rRNA_bed diff --git a/subworkflows/local/qc.nf b/subworkflows/local/qc.nf index b9e3bc5..58b4e04 100644 --- a/subworkflows/local/qc.nf +++ b/subworkflows/local/qc.nf @@ -17,12 +17,12 @@ workflow QC { refflat rrna_intervals rseqc_bed + fasta fai dict baits main: - fasta = params.fasta ch_versions = Channel.empty() BAM_RSEQC( @@ -35,7 +35,7 @@ workflow QC { PICARD_COLLECTRNASEQMETRICS( bam, refflat, - fasta, + fasta.map{it[1]}.first(), rrna_intervals ) ch_versions = ch_versions.mix(PICARD_COLLECTRNASEQMETRICS.out.versions.first()) @@ -51,9 +51,9 @@ workflow QC { }.map{ meta, bam, bai, bait, bait_file, target_file -> [meta, bam, bai, bait_file, target_file] }, - [[:],fasta], + fasta, fai, - dict.map{ dict -> [[:],dict]} + dict ) multiqc_files = multiqc_files diff --git a/subworkflows/local/quantification.nf b/subworkflows/local/quantification.nf index 6a1c543..43321b5 100644 --- a/subworkflows/local/quantification.nf +++ b/subworkflows/local/quantification.nf @@ -19,14 +19,14 @@ workflow QUANTIFICATION { HTSEQ_COUNT( bam.join(bai,by:[0]), - gtf + gtf.map{it[1]} ) ch_versions = ch_versions.mix(HTSEQ_COUNT.out.versions) FEATURECOUNTS_GENE( bam, - gtf + gtf.map{it[1]} ) ch_versions = ch_versions.mix(FEATURECOUNTS_GENE.out.versions) @@ -40,7 +40,7 @@ workflow QUANTIFICATION { COUNT_FEATURES( KALLISTO_QUANT.out.abundance, - gtf + gtf.map{it[1]} ) diff --git a/tests/small_test/test.yml b/tests/small_test/test.yml index 50ff7f1..508e2ff 100755 --- a/tests/small_test/test.yml +++ b/tests/small_test/test.yml @@ -4,11 +4,11 @@ - test_profile files: - path: output/analysis/SAMPLE_PAIRED_END/STAR/SAMPLE_PAIRED_END.Aligned.sortedByCoord.out.bam - md5sum: 781acdb8d313482de17a2933e18bb97a + md5sum: e46db0148604c6937a2cc7535d934292 - path: output/analysis/SAMPLE_PAIRED_END_UMI/STAR/SAMPLE_PAIRED_END_UMI.Aligned.sortedByCoord.out.bam - path: output/analysis/SAMPLE_SINGLE_END/STAR/SAMPLE_SINGLE_END.Aligned.sortedByCoord.out.bam - path: output/analysis/SAMPLE_SINGLE_END/arriba/SAMPLE_SINGLE_END.fusions.discarded.tsv - md5sum: da3e17e01697fe9990fd545e1e26b822 + md5sum: 9daf6f31ee9a90b6b263bf5ae28dbe96 - path: output/analysis/SAMPLE_SINGLE_END/arriba/SAMPLE_SINGLE_END.fusions.tsv md5sum: 7c3383f7eb6d79b84b0bd30a7ef02d70 - path: output/pipeline_info/software_versions.yml diff --git a/workflows/forte.nf b/workflows/forte.nf index ea56c0a..3ce6178 100644 --- a/workflows/forte.nf +++ b/workflows/forte.nf @@ -129,6 +129,7 @@ workflow FORTE { PREPROCESS_READS.out.reads_trimmed, PREPROCESS_READS.out.reads_untrimmed, PREPARE_REFERENCES.out.star_index, + PREPARE_REFERENCES.out.fasta, PREPARE_REFERENCES.out.gtf, PREPARE_REFERENCES.out.starfusion_ref, PREPARE_REFERENCES.out.fusioncatcher_ref, @@ -137,9 +138,9 @@ workflow FORTE { PREPARE_REFERENCES.out.metafusion_gene_bed, PREPARE_REFERENCES.out.metafusion_gene_info, PREPARE_REFERENCES.out.metafusion_blocklist, - workflow.profile.toString().split(",").contains("test") ? [] : PREPARE_REFERENCES.out.arriba_blacklist, - workflow.profile.toString().split(",").contains("test") ? [] : PREPARE_REFERENCES.out.arriba_known_fusions, - workflow.profile.toString().split(",").contains("test") ? [] : PREPARE_REFERENCES.out.arriba_protein_domains + workflow.profile.toString().split(",").contains("test") ? Channel.of([]) : PREPARE_REFERENCES.out.arriba_blacklist, + workflow.profile.toString().split(",").contains("test") ? Channel.of([]) : PREPARE_REFERENCES.out.arriba_known_fusions, + workflow.profile.toString().split(",").contains("test") ? Channel.of([]) : PREPARE_REFERENCES.out.arriba_protein_domains ) ch_versions = ch_versions.mix(FUSION.out.ch_versions) @@ -152,7 +153,7 @@ workflow FORTE { ALIGN_READS.out.bam, ALIGN_READS.out.bai, MAF_INPUT_CHECK.out.mafs, - params.fasta, + PREPARE_REFERENCES.out.fasta.map{ it[1] }.first(), PREPARE_REFERENCES.out.fasta_fai.map{ it[1] }.first() ) ch_versions = ch_versions.mix(FILLOUT.out.ch_versions) @@ -168,6 +169,7 @@ workflow FORTE { PREPARE_REFERENCES.out.refflat, PREPARE_REFERENCES.out.rrna_interval_list, PREPARE_REFERENCES.out.rseqc_bed, + PREPARE_REFERENCES.out.fasta, PREPARE_REFERENCES.out.fasta_fai, PREPARE_REFERENCES.out.fasta_dict, BAIT_INPUTS.out.baits @@ -189,6 +191,7 @@ workflow FORTE { PREPARE_REFERENCES.out.refflat, PREPARE_REFERENCES.out.rrna_interval_list, PREPARE_REFERENCES.out.rseqc_bed, + PREPARE_REFERENCES.out.fasta, PREPARE_REFERENCES.out.fasta_fai, PREPARE_REFERENCES.out.fasta_dict, BAIT_INPUTS.out.baits