From dbb45c91eeb1d52d9e63d93a7b0d5e515bf5c8ec Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 17 Dec 2024 13:42:01 +0100 Subject: [PATCH 01/21] fix test config resources --- conf/test.config | 9 +++++++++ conf/test_build.config | 9 +++++++++ conf/test_cosmic.config | 9 +++++++++ 3 files changed, 27 insertions(+) diff --git a/conf/test.config b/conf/test.config index 3cafa252..d042d923 100644 --- a/conf/test.config +++ b/conf/test.config @@ -17,3 +17,12 @@ params { // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' } + +// Limit and standardize resources for github actions and reproducibility +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} \ No newline at end of file diff --git a/conf/test_build.config b/conf/test_build.config index 616d734f..1e324571 100644 --- a/conf/test_build.config +++ b/conf/test_build.config @@ -22,3 +22,12 @@ params { skip_salmon_index = true } + +// Limit and standardize resources for github actions and reproducibility +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} \ No newline at end of file diff --git a/conf/test_cosmic.config b/conf/test_cosmic.config index cf199da4..7101deb5 100644 --- a/conf/test_cosmic.config +++ b/conf/test_cosmic.config @@ -20,3 +20,12 @@ params { cosmic_username = secrets.COSMIC_USERNAME cosmic_passwd = secrets.COSMIC_PASSWD } + +// Limit and standardize resources for github actions and reproducibility +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} \ No newline at end of file From 4245c543cab4ed66ce60efc737a04455a995e75d Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 17 Dec 2024 13:42:59 +0100 Subject: [PATCH 02/21] add ctatsplicing module --- conf/modules.config | 12 ++ .../ctatsplicing/startocancerintrons/main.nf | 71 +++++++ .../startocancerintrons/tests/main.nf.test | 69 +++++++ .../tests/main.nf.test.snap | 191 ++++++++++++++++++ 4 files changed, 343 insertions(+) create mode 100644 modules/local/ctatsplicing/startocancerintrons/main.nf create mode 100644 modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test create mode 100644 modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test.snap diff --git a/conf/modules.config b/conf/modules.config index 264d1c87..9a72b994 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -49,6 +49,18 @@ process { ] } + withName: 'CTATSPLICING_STARTOCANCERINTRONS' { + ext.args = {[ + bam ? "--vis" : "", + "--sample_name ${meta.id}", + ].join(" ")} + publishDir = [ + path: { "${params.outdir}/ctat_splicing" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'ENSEMBL_DOWNLOAD' { publishDir = [ path: { "${params.genomes_base}/ensembl" }, diff --git a/modules/local/ctatsplicing/startocancerintrons/main.nf b/modules/local/ctatsplicing/startocancerintrons/main.nf new file mode 100644 index 00000000..ba7a6110 --- /dev/null +++ b/modules/local/ctatsplicing/startocancerintrons/main.nf @@ -0,0 +1,71 @@ +process CTATSPLICING_STARTOCANCERINTRONS { + tag "$meta.id" + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://data.broadinstitute.org/Trinity/CTAT_SINGULARITY/CTAT-SPLICING/ctat_splicing.v0.0.2.simg' : + 'docker.io/trinityctat/ctat_splicing:0.0.2' }" + + input: + tuple val(meta), path(sj), path(junction), path(bam), path(bai) + tuple val(meta2), path(genome_lib) + + output: + tuple val(meta), path("*.cancer_intron_reads.sorted.bam") , emit: cancer_introns_sorted_bam + tuple val(meta), path("*.cancer_intron_reads.sorted.bam.bai") , emit: cancer_introns_sorted_bai + tuple val(meta), path("*.gene_reads.sorted.sifted.bam") , emit: gene_reads_sorted_bam + tuple val(meta), path("*.gene_reads.sorted.sifted.bam.bai") , emit: gene_reads_sorted_bai + tuple val(meta), path("*.cancer.introns") , emit: cancer_introns + tuple val(meta), path("*.cancer.introns.prelim") , emit: cancer_introns_prelim + tuple val(meta), path("*${prefix}.introns") , emit: introns + tuple val(meta), path("*.introns.for_IGV.bed") , emit: introns_igv_bed, optional: true + tuple val(meta), path("*.ctat-splicing.igv.html") , emit: igv_html, optional: true + tuple val(meta), path("*.igv.tracks") , emit: igv_tracks, optional: true + tuple val(meta), path("*.chckpts") , emit: chckpts + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def bam_arg = bam ? "--bam_file ${bam}" : "" + def VERSION = '0.0.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def create_index = bam && !bai ? "samtools index ${bam}" : "" + """ + ${create_index} + /usr/local/src/CTAT-SPLICING/STAR_to_cancer_introns.py \\ + --SJ_tab_file ${sj} \\ + --chimJ_file ${junction} \\ + ${bam_arg} \\ + --output_prefix ${prefix} \\ + --ctat_genome_lib ${genome_lib} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ctat-splicing: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '0.0.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def create_igv_files = args.contains("--vis") ? "touch ${prefix}.introns.for_IGV.bed && touch ${prefix}.ctat-splicing.igv.html && touch ${prefix}.igv.tracks" : "" + """ + ${create_igv_files} + touch ${prefix}.cancer_intron_reads.sorted.bam + touch ${prefix}.cancer_intron_reads.sorted.bam.bai + touch ${prefix}.gene_reads.sorted.sifted.bam + touch ${prefix}.gene_reads.sorted.sifted.bam.bai + touch ${prefix}.cancer.introns + touch ${prefix}.cancer.introns.prelim + touch ${prefix}.introns + touch ${prefix}.chckpts + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ctat-splicing: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test b/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test new file mode 100644 index 00000000..dad961c4 --- /dev/null +++ b/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_process { + + name "Test Process CTATSPLICING_STARTOCANCERINTRONS" + script "../main.nf" + process "CTATSPLICING_STARTOCANCERINTRONS" + options "-stub" + + test("test without BAM") { + + when { + params { + outdir = "tests/results" + } + process { + """ + input[0] = [ + [id:"test"], + file("test.SJ.out.tab"), + file("test.Chimeric.out.junctions"), + [], + [] + ] + input[1] = [ + [id:"reference"], + file("ctat_genome_lib") + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, value -> !key.isNumber() }).match() } + ) + } + } + + test("test with BAM") { + + when { + params { + outdir = "tests/results" + } + process { + """ + input[0] = [ + [id:"test"], + file("test.SJ.out.tab"), + file("test.Chimeric.out.junctions"), + file("test.Aligned.sortedByCoord.out.bam"), + [] + ] + input[1] = [ + [id:"reference"], + file("ctat_genome_lib") + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, value -> !key.isNumber() }).match() } + ) + } + } +} diff --git a/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test.snap b/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test.snap new file mode 100644 index 00000000..b0ee3416 --- /dev/null +++ b/modules/local/ctatsplicing/startocancerintrons/tests/main.nf.test.snap @@ -0,0 +1,191 @@ +{ + "test without BAM": { + "content": [ + { + "cancer_introns": [ + [ + { + "id": "test" + }, + "test.cancer.introns:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_prelim": [ + [ + { + "id": "test" + }, + "test.cancer.introns.prelim:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_sorted_bai": [ + [ + { + "id": "test" + }, + "test.cancer_intron_reads.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_sorted_bam": [ + [ + { + "id": "test" + }, + "test.cancer_intron_reads.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "chckpts": [ + [ + { + "id": "test" + }, + "test.chckpts:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gene_reads_sorted_bai": [ + [ + { + "id": "test" + }, + "test.gene_reads.sorted.sifted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gene_reads_sorted_bam": [ + [ + { + "id": "test" + }, + "test.gene_reads.sorted.sifted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "igv_html": [ + + ], + "igv_tracks": [ + + ], + "introns": [ + [ + { + "id": "test" + }, + "test.introns:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "introns_igv_bed": [ + + ], + "versions": [ + "versions.yml:md5,fcf861a15f9951342a874b6bc476a37e" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.1" + }, + "timestamp": "2024-12-17T13:35:13.723215847" + }, + "test with BAM": { + "content": [ + { + "cancer_introns": [ + [ + { + "id": "test" + }, + "test.cancer.introns:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_prelim": [ + [ + { + "id": "test" + }, + "test.cancer.introns.prelim:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_sorted_bai": [ + [ + { + "id": "test" + }, + "test.cancer_intron_reads.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cancer_introns_sorted_bam": [ + [ + { + "id": "test" + }, + "test.cancer_intron_reads.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "chckpts": [ + [ + { + "id": "test" + }, + "test.chckpts:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gene_reads_sorted_bai": [ + [ + { + "id": "test" + }, + "test.gene_reads.sorted.sifted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gene_reads_sorted_bam": [ + [ + { + "id": "test" + }, + "test.gene_reads.sorted.sifted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "igv_html": [ + [ + { + "id": "test" + }, + "test.ctat-splicing.igv.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "igv_tracks": [ + [ + { + "id": "test" + }, + "test.igv.tracks:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "introns": [ + [ + { + "id": "test" + }, + "test.introns:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "introns_igv_bed": [ + [ + { + "id": "test" + }, + "test.introns.for_IGV.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,fcf861a15f9951342a874b6bc476a37e" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.1" + }, + "timestamp": "2024-12-17T13:33:27.36677449" + } +} \ No newline at end of file From f0259453d818c16dd2a3b40660745d5bffb034c5 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 17 Dec 2024 14:18:55 +0100 Subject: [PATCH 03/21] implement ctatsplicing --- conf/modules.config | 2 +- .../ctatsplicing/startocancerintrons/main.nf | 4 +- nextflow.config | 1 + nextflow_schema.json | 5 ++ .../local/ctatsplicing_workflow/main.nf | 31 ++++++++++ subworkflows/local/starfusion_workflow.nf | 25 +++++--- workflows/rnafusion.nf | 57 +++++++++++-------- 7 files changed, 91 insertions(+), 34 deletions(-) create mode 100644 subworkflows/local/ctatsplicing_workflow/main.nf diff --git a/conf/modules.config b/conf/modules.config index 9a72b994..ed6bc1eb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -55,7 +55,7 @@ process { "--sample_name ${meta.id}", ].join(" ")} publishDir = [ - path: { "${params.outdir}/ctat_splicing" }, + path: { "${params.outdir}/ctatsplicing" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/modules/local/ctatsplicing/startocancerintrons/main.nf b/modules/local/ctatsplicing/startocancerintrons/main.nf index ba7a6110..30fd5c3f 100644 --- a/modules/local/ctatsplicing/startocancerintrons/main.nf +++ b/modules/local/ctatsplicing/startocancerintrons/main.nf @@ -7,7 +7,7 @@ process CTATSPLICING_STARTOCANCERINTRONS { 'docker.io/trinityctat/ctat_splicing:0.0.2' }" input: - tuple val(meta), path(sj), path(junction), path(bam), path(bai) + tuple val(meta), path(split_junction), path(junction), path(bam), path(bai) tuple val(meta2), path(genome_lib) output: @@ -33,7 +33,7 @@ process CTATSPLICING_STARTOCANCERINTRONS { """ ${create_index} /usr/local/src/CTAT-SPLICING/STAR_to_cancer_introns.py \\ - --SJ_tab_file ${sj} \\ + --SJ_tab_file ${split_junction} \\ --chimJ_file ${junction} \\ ${bam_arg} \\ --output_prefix ${prefix} \\ diff --git a/nextflow.config b/nextflow.config index d436182c..46c2b517 100644 --- a/nextflow.config +++ b/nextflow.config @@ -61,6 +61,7 @@ params { // Enable or disable tools all = false arriba = false + ctatsplicing = false fusioncatcher = false starindex = false starfusion = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 017a19fb..c8618561 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -143,6 +143,11 @@ "fa_icon": "far fa-file-code", "description": "Path to arriba output" }, + "ctatsplicing": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Build or run arriba CTAT-SPLICING" + }, "ensembl_ref": { "type": "string", "fa_icon": "far fa-file-code", diff --git a/subworkflows/local/ctatsplicing_workflow/main.nf b/subworkflows/local/ctatsplicing_workflow/main.nf new file mode 100644 index 00000000..a1516397 --- /dev/null +++ b/subworkflows/local/ctatsplicing_workflow/main.nf @@ -0,0 +1,31 @@ +include { CTATSPLICING_STARTOCANCERINTRONS } from '../../../modules/local/ctatsplicing/startocancerintrons' + +workflow CTATSPLICING_WORKFLOW { + take: + split_junctions // [ val(meta), path(split_junctions.SJ.out.tab) ] + junctions // [ val(meta), path(junctions.Chimeric.out.junction) ] + aligned_bams // [ val(meta), path(aligned_bams.Aligned.sortedByCoord.out.bam) ] + ctat_genome_lib // [ val(meta2), path(path/to/ctat_genome_lib) ] + + main: + def ch_versions = Channel.empty() + + if (params.ctatsplicing || params.all) { + def ch_ctatsplicing_input = split_junctions + .join(junctions, failOnMismatch:true, failOnDuplicate:true) + .join(aligned_bams, failOnMismatch:true, failOnDuplicate:true) + .map { meta, split_junction, junction, bam -> + [ meta, split_junction, junction, bam, [] ] + } + + CTATSPLICING_STARTOCANCERINTRONS( + ch_ctatsplicing_input, + ctat_genome_lib + ) + + } + + emit: + versions = ch_versions +} + diff --git a/subworkflows/local/starfusion_workflow.nf b/subworkflows/local/starfusion_workflow.nf index de99d8e5..8a9340da 100644 --- a/subworkflows/local/starfusion_workflow.nf +++ b/subworkflows/local/starfusion_workflow.nf @@ -10,15 +10,20 @@ workflow STARFUSION_WORKFLOW { ch_chrgtf ch_starindex_ref ch_fasta + ch_starfusion_ref main: - ch_versions = Channel.empty() - ch_align = Channel.empty() - bam_sorted_indexed = Channel.empty() + def ch_versions = Channel.empty() + def ch_align = Channel.empty() + def ch_starfusion_fusions = Channel.empty() + def bam_sorted_indexed = Channel.empty() + def ch_bam_align_sorted = Channel.empty() + def ch_split_junctions = Channel.empty() + def ch_junctions = Channel.empty() ch_dummy_file = file("$baseDir/assets/dummy_file_starfusion.txt", checkIfExists: true) - if ((params.starfusion || params.all || params.stringtie) && !params.fusioninspector_only) { + if ((params.starfusion || params.all || params.stringtie || params.ctatsplicing) && !params.fusioninspector_only) { if (params.starfusion_fusions){ ch_starfusion_fusions = reads.combine(Channel.value(file(params.starfusion_fusions, checkIfExists:true))) .map { meta, reads, fusions -> [ meta, fusions ] } @@ -32,6 +37,10 @@ workflow STARFUSION_WORKFLOW { bam_sorted_indexed = STAR_FOR_STARFUSION.out.bam_sorted.join(SAMTOOLS_INDEX_FOR_STARFUSION.out.bai) reads_junction = reads.join(STAR_FOR_STARFUSION.out.junction ) + ch_bam_align_sorted = STAR_FOR_STARFUSION.out.bam_sorted_aligned + ch_split_junctions = STAR_FOR_STARFUSION.out.spl_junc_tab + ch_junctions = STAR_FOR_STARFUSION.out.junction + if (params.cram.contains('starfusion')){ SAMTOOLS_VIEW_FOR_STARFUSION (bam_sorted_indexed, ch_fasta, [] ) ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FOR_STARFUSION.out.versions) @@ -39,8 +48,8 @@ workflow STARFUSION_WORKFLOW { SAMTOOLS_INDEX_FOR_STARFUSION_CRAM (SAMTOOLS_VIEW_FOR_STARFUSION.out.cram) ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_STARFUSION_CRAM.out.versions) } - if (params.starfusion || params.all){ - STARFUSION( reads_junction, params.starfusion_ref) + if (params.starfusion || params.all || params.ctatsplicing){ + STARFUSION( reads_junction, ch_starfusion_ref.map { it -> it[1] }) ch_versions = ch_versions.mix(STARFUSION.out.versions) ch_starfusion_fusions = STARFUSION.out.fusions } @@ -61,7 +70,9 @@ workflow STARFUSION_WORKFLOW { star_gene_count = ch_star_gene_count ch_bam_sorted = ch_align.ifEmpty([[],[]]) ch_bam_sorted_indexed = bam_sorted_indexed.ifEmpty([[],[],[]]) + bam_align_sorted = ch_bam_align_sorted + split_junctions = ch_split_junctions + junctions = ch_junctions versions = ch_versions - } diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 0c8a6e5f..2f1bbfab 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -6,6 +6,7 @@ include { TRIM_WORKFLOW } from '../subworkflows/local/trim_workflow' include { ARRIBA_WORKFLOW } from '../subworkflows/local/arriba_workflow' include { QC_WORKFLOW } from '../subworkflows/local/qc_workflow' +include { CTATSPLICING_WORKFLOW } from '../subworkflows/local/ctatsplicing_workflow' include { STARFUSION_WORKFLOW } from '../subworkflows/local/starfusion_workflow' include { STRINGTIE_WORKFLOW } from '../subworkflows/local/stringtie_workflow/main' include { FUSIONCATCHER_WORKFLOW } from '../subworkflows/local/fusioncatcher_workflow' @@ -22,27 +23,6 @@ include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pi include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_rnafusion_pipeline' -ch_chrgtf = params.starfusion_build ? Channel.fromPath(params.chrgtf).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.starfusion_ref}/ref_annot.gtf").map { it -> [[id:it.Name], it] }.collect() -ch_starindex_ref = params.starfusion_build ? Channel.fromPath(params.starindex_ref).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.starfusion_ref}/ref_genome.fa.star.idx").map { it -> [[id:it.Name], it] }.collect() -ch_starindex_ensembl_ref = Channel.fromPath(params.starindex_ref).map { it -> [[id:it.Name], it] }.collect() -ch_refflat = params.starfusion_build ? Channel.fromPath(params.refflat).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.ensembl_ref}/ref_annot.gtf.refflat").map { it -> [[id:it.Name], it] }.collect() -ch_rrna_interval = params.starfusion_build ? Channel.fromPath(params.rrna_intervals).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.ensembl_ref}/ref_annot.interval_list").map { it -> [[id:it.Name], it] }.collect() -ch_adapter_fastp = params.adapter_fasta ? Channel.fromPath(params.adapter_fasta, checkIfExists: true) : Channel.empty() -ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { it -> [[id:it.Name], it] }.collect() -ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { it -> [[id:it.Name], it] }.collect() -ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { it -> [[id:it.Name], it] }.collect() -ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { it -> [[id:it.Name], it] }.collect() -ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands).map { it -> [[id:it.Name], it] }.collect() -ch_hgnc_ref = Channel.fromPath(params.hgnc_ref).map { it -> [[id:it.Name], it] }.collect() -ch_hgnc_date = Channel.fromPath(params.hgnc_date).map { it -> [[id:it.Name], it] }.collect() -ch_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it.Name], it] }.collect() -ch_gtf = Channel.fromPath(params.gtf).map { it -> [[id:it.Name], it] }.collect() -ch_salmon_index = Channel.fromPath(params.salmon_index).map { it -> [[id:it.Name], it] }.collect() -ch_transcript = Channel.fromPath(params.transcript).map { it -> [[id:it.Name], it] }.collect() -ch_fai = Channel.fromPath(params.fai).map { it -> [[id:it.Name], it] }.collect() - - - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -56,8 +36,29 @@ workflow RNAFUSION { main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + def ch_versions = Channel.empty() + def ch_multiqc_files = Channel.empty() + + def ch_chrgtf = params.starfusion_build ? Channel.fromPath(params.chrgtf).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.starfusion_ref}/ref_annot.gtf").map { it -> [[id:it.Name], it] }.collect() + def ch_starindex_ref = params.starfusion_build ? Channel.fromPath(params.starindex_ref).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.starfusion_ref}/ref_genome.fa.star.idx").map { it -> [[id:it.Name], it] }.collect() + def ch_starindex_ensembl_ref = Channel.fromPath(params.starindex_ref).map { it -> [[id:it.Name], it] }.collect() + def ch_refflat = params.starfusion_build ? Channel.fromPath(params.refflat).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.ensembl_ref}/ref_annot.gtf.refflat").map { it -> [[id:it.Name], it] }.collect() + def ch_rrna_interval = params.starfusion_build ? Channel.fromPath(params.rrna_intervals).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.ensembl_ref}/ref_annot.interval_list").map { it -> [[id:it.Name], it] }.collect() + def ch_adapter_fastp = params.adapter_fasta ? Channel.fromPath(params.adapter_fasta, checkIfExists: true) : Channel.empty() + def ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { it -> [[id:it.Name], it] }.collect() + def ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { it -> [[id:it.Name], it] }.collect() + def ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { it -> [[id:it.Name], it] }.collect() + def ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { it -> [[id:it.Name], it] }.collect() + def ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands).map { it -> [[id:it.Name], it] }.collect() + def ch_hgnc_ref = Channel.fromPath(params.hgnc_ref).map { it -> [[id:it.Name], it] }.collect() + def ch_hgnc_date = Channel.fromPath(params.hgnc_date).map { it -> [[id:it.Name], it] }.collect() + def ch_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it.Name], it] }.collect() + def ch_gtf = Channel.fromPath(params.gtf).map { it -> [[id:it.Name], it] }.collect() + def ch_salmon_index = Channel.fromPath(params.salmon_index).map { it -> [[id:it.Name], it] }.collect() + def ch_transcript = Channel.fromPath(params.transcript).map { it -> [[id:it.Name], it] }.collect() + def ch_fai = Channel.fromPath(params.fai).map { it -> [[id:it.Name], it] }.collect() + def ch_starfusion_ref = Channel.fromPath(params.starfusion_ref).map { it -> [[id:it.name], it]}.collect() + // // MODULE: Run FastQC // @@ -115,7 +116,8 @@ workflow RNAFUSION { ch_reads_all, ch_chrgtf, ch_starindex_ref, - ch_fasta + ch_fasta, + ch_starfusion_ref ) ch_versions = ch_versions.mix(STARFUSION_WORKFLOW.out.versions) @@ -145,6 +147,13 @@ workflow RNAFUSION { ) ch_versions = ch_versions.mix(FUSIONREPORT_WORKFLOW.out.versions) + //Run CTAT-splicing + CTATSPLICING_WORKFLOW( + STARFUSION_WORKFLOW.out.split_junctions, + STARFUSION_WORKFLOW.out.junctions, + STARFUSION_WORKFLOW.out.bam_align_sorted, + ch_starfusion_ref + ) //Run fusionInpector FUSIONINSPECTOR_WORKFLOW ( From 3268a455bb633220f33462dbae8c42db0aba9657 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 17 Dec 2024 14:33:47 +0100 Subject: [PATCH 04/21] fix pre-commit --- conf/test.config | 2 +- conf/test_build.config | 2 +- conf/test_cosmic.config | 2 +- subworkflows/local/ctatsplicing_workflow/main.nf | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/conf/test.config b/conf/test.config index d042d923..b3d32585 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,4 +25,4 @@ process { memory: '15.GB', time: '1.h' ] -} \ No newline at end of file +} diff --git a/conf/test_build.config b/conf/test_build.config index 1e324571..70c3a4ce 100644 --- a/conf/test_build.config +++ b/conf/test_build.config @@ -30,4 +30,4 @@ process { memory: '15.GB', time: '1.h' ] -} \ No newline at end of file +} diff --git a/conf/test_cosmic.config b/conf/test_cosmic.config index 7101deb5..9cc1bcda 100644 --- a/conf/test_cosmic.config +++ b/conf/test_cosmic.config @@ -28,4 +28,4 @@ process { memory: '15.GB', time: '1.h' ] -} \ No newline at end of file +} diff --git a/subworkflows/local/ctatsplicing_workflow/main.nf b/subworkflows/local/ctatsplicing_workflow/main.nf index a1516397..8b279033 100644 --- a/subworkflows/local/ctatsplicing_workflow/main.nf +++ b/subworkflows/local/ctatsplicing_workflow/main.nf @@ -17,15 +17,15 @@ workflow CTATSPLICING_WORKFLOW { .map { meta, split_junction, junction, bam -> [ meta, split_junction, junction, bam, [] ] } - + CTATSPLICING_STARTOCANCERINTRONS( ch_ctatsplicing_input, ctat_genome_lib ) + ch_versions = ch_versions.mix(CTATSPLICING_STARTOCANCERINTRONS.out.versions.first()) } emit: versions = ch_versions } - From a84033b11302d29381ad6ec4ccf52e52883c0ad7 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:42:25 +0100 Subject: [PATCH 05/21] Update nextflow.config Co-authored-by: Annick Renevey <47788523+rannick@users.noreply.github.com> --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 46c2b517..06a1278f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -61,7 +61,7 @@ params { // Enable or disable tools all = false arriba = false - ctatsplicing = false + ctatsplicing = false fusioncatcher = false starindex = false starfusion = false From 4247b12844c276dd3200f42e04c61f2583c86e2c Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 17 Dec 2024 14:48:30 +0100 Subject: [PATCH 06/21] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5c881c3..2dfa7a79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add nf-test to local subworkflow: `TRIM_WORKFLOW` [#572](https://github.com/nf-core/rnafusion/pull/572) - Add nf-test to local module: `FUSIONREPORT_DETECT`. Improve `FUSIONREPORT_DOWNLOAD` module [#572](https://github.com/nf-core/rnafusion/pull/577) - Add nf-test to local subworkflow: `ARRIBA_WORKFLOW` [#578](https://github.com/nf-core/rnafusion/pull/578) +- Added a new module `CTATSPLICING_STARTOCANCERINTRONS` [#587](https://github.com/nf-core/rnafusion/pull/587) ### Changed From 0db19d9301c69e4435c5c6ab9d3409a7165f0628 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 17 Dec 2024 16:11:53 +0100 Subject: [PATCH 07/21] make ctatsplicing possible with arriba input --- conf/modules.config | 16 +++++++++-- nextflow_schema.json | 2 +- subworkflows/local/arriba_workflow/main.nf | 33 +++++++++++++++------- subworkflows/local/starfusion_workflow.nf | 22 ++++++++------- workflows/rnafusion.nf | 11 ++------ 5 files changed, 52 insertions(+), 32 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index ed6bc1eb..253d0bf0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -49,13 +49,25 @@ process { ] } - withName: 'CTATSPLICING_STARTOCANCERINTRONS' { + withName: '.*ARRIBA_WORKFLOW:.*:CTATSPLICING_STARTOCANCERINTRONS' { ext.args = {[ bam ? "--vis" : "", "--sample_name ${meta.id}", ].join(" ")} publishDir = [ - path: { "${params.outdir}/ctatsplicing" }, + path: { "${params.outdir}/ctatsplicing/arriba" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*STARFUSION_WORKFLOW:.*:CTATSPLICING_STARTOCANCERINTRONS' { + ext.args = {[ + bam ? "--vis" : "", + "--sample_name ${meta.id}", + ].join(" ")} + publishDir = [ + path: { "${params.outdir}/ctatsplicing/starfusion" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/nextflow_schema.json b/nextflow_schema.json index c8618561..f7855d95 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -146,7 +146,7 @@ "ctatsplicing": { "type": "boolean", "fa_icon": "far fa-file-code", - "description": "Build or run arriba CTAT-SPLICING" + "description": "Run CTAT-splicing to detect abberant cancer splicing introns. Needs --arriba and/or --starfusion to run." }, "ensembl_ref": { "type": "string", diff --git a/subworkflows/local/arriba_workflow/main.nf b/subworkflows/local/arriba_workflow/main.nf index 5f59916a..5f99ef8e 100644 --- a/subworkflows/local/arriba_workflow/main.nf +++ b/subworkflows/local/arriba_workflow/main.nf @@ -4,20 +4,24 @@ include { SAMTOOLS_SORT as SAMTOOLS_SORT_FOR_ARRIBA } from '../../../modules/n include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_ARRIBA } from '../../../modules/nf-core/samtools/view/main' include { STAR_ALIGN as STAR_FOR_ARRIBA } from '../../../modules/nf-core/star/align/main' +include { CTATSPLICING_WORKFLOW } from '../ctatsplicing_workflow' + workflow ARRIBA_WORKFLOW { take: - reads // channel [ meta, [ fastqs ] ] - ch_gtf // channel [ meta, path_gtf ] - ch_fasta // channel [ meta, path_fasta ] - ch_starindex_ref // channel [ meta, path_index ] - ch_arriba_ref_blacklist // channel [ meta, path_blacklist ] - ch_arriba_ref_known_fusions // channel [ meta, path_known_fusions ] - ch_arriba_ref_cytobands // channel [ meta, path_cytobands ] - ch_arriba_ref_protein_domains // channel [ meta, path_proteins ] + reads // channel [ meta, [ fastqs ] ] + ch_gtf // channel [ meta, path_gtf ] + ch_fasta // channel [ meta, path_fasta ] + ch_starindex_ref // channel [ meta, path_index ] + ch_arriba_ref_blacklist // channel [ meta, path_blacklist ] + ch_arriba_ref_known_fusions // channel [ meta, path_known_fusions ] + ch_arriba_ref_cytobands // channel [ meta, path_cytobands ] + ch_arriba_ref_protein_domains // channel [ meta, path_proteins ] + ch_starfusion_ref // channel [ meta, path_starfusion_ref ] arriba // boolean all // boolean fusioninspector_only // boolean star_ignore_sjdbgtf // boolean + ctatsplicing // boolean seq_center // string arriba_fusions // path cram // array @@ -25,7 +29,7 @@ workflow ARRIBA_WORKFLOW { main: ch_versions = Channel.empty() ch_cram_index = Channel.empty() - ch_dummy_file = file("$baseDir/assets/dummy_file_arriba.txt", checkIfExists: true) + ch_dummy_file = file("$projectDir/assets/dummy_file_arriba.txt", checkIfExists: true) if (( arriba || all ) && !fusioninspector_only) { @@ -37,9 +41,18 @@ workflow ARRIBA_WORKFLOW { '', seq_center ) - ch_versions = ch_versions.mix(STAR_FOR_ARRIBA.out.versions) + if ( ctatsplicing || all ) { + CTATSPLICING_WORKFLOW( + STAR_FOR_ARRIBA.out.spl_junc_tab, + STAR_FOR_ARRIBA.out.junction, + STAR_FOR_ARRIBA.out.bam_sorted_aligned, + ch_starfusion_ref + ) + ch_versions = ch_versions.mix(CTATSPLICING_WORKFLOW.out.versions) + } + if ( arriba_fusions ) { ch_arriba_fusions = reads.combine( Channel.value( file( arriba_fusions, checkIfExists: true ) ) ) diff --git a/subworkflows/local/starfusion_workflow.nf b/subworkflows/local/starfusion_workflow.nf index 8a9340da..2a086ce1 100644 --- a/subworkflows/local/starfusion_workflow.nf +++ b/subworkflows/local/starfusion_workflow.nf @@ -4,6 +4,8 @@ include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FOR_STARFUSION } from '../../mod include { STAR_ALIGN as STAR_FOR_STARFUSION } from '../../modules/nf-core/star/align/main' include { STARFUSION } from '../../modules/local/starfusion/detect/main' +include { CTATSPLICING_WORKFLOW } from './ctatsplicing_workflow' + workflow STARFUSION_WORKFLOW { take: reads @@ -17,13 +19,10 @@ workflow STARFUSION_WORKFLOW { def ch_align = Channel.empty() def ch_starfusion_fusions = Channel.empty() def bam_sorted_indexed = Channel.empty() - def ch_bam_align_sorted = Channel.empty() - def ch_split_junctions = Channel.empty() - def ch_junctions = Channel.empty() ch_dummy_file = file("$baseDir/assets/dummy_file_starfusion.txt", checkIfExists: true) - if ((params.starfusion || params.all || params.stringtie || params.ctatsplicing) && !params.fusioninspector_only) { + if ((params.starfusion || params.all || params.stringtie) && !params.fusioninspector_only) { if (params.starfusion_fusions){ ch_starfusion_fusions = reads.combine(Channel.value(file(params.starfusion_fusions, checkIfExists:true))) .map { meta, reads, fusions -> [ meta, fusions ] } @@ -37,9 +36,15 @@ workflow STARFUSION_WORKFLOW { bam_sorted_indexed = STAR_FOR_STARFUSION.out.bam_sorted.join(SAMTOOLS_INDEX_FOR_STARFUSION.out.bai) reads_junction = reads.join(STAR_FOR_STARFUSION.out.junction ) - ch_bam_align_sorted = STAR_FOR_STARFUSION.out.bam_sorted_aligned - ch_split_junctions = STAR_FOR_STARFUSION.out.spl_junc_tab - ch_junctions = STAR_FOR_STARFUSION.out.junction + if (params.ctatsplicing || params.all) { + CTATSPLICING_WORKFLOW( + STAR_FOR_STARFUSION.out.spl_junc_tab, + STAR_FOR_STARFUSION.out.junction, + STAR_FOR_STARFUSION.out.bam_sorted_aligned, + ch_starfusion_ref + ) + ch_versions = ch_versions.mix(CTATSPLICING_WORKFLOW.out.versions) + } if (params.cram.contains('starfusion')){ SAMTOOLS_VIEW_FOR_STARFUSION (bam_sorted_indexed, ch_fasta, [] ) @@ -70,9 +75,6 @@ workflow STARFUSION_WORKFLOW { star_gene_count = ch_star_gene_count ch_bam_sorted = ch_align.ifEmpty([[],[]]) ch_bam_sorted_indexed = bam_sorted_indexed.ifEmpty([[],[],[]]) - bam_align_sorted = ch_bam_align_sorted - split_junctions = ch_split_junctions - junctions = ch_junctions versions = ch_versions } diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 2f1bbfab..74315e84 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -6,7 +6,6 @@ include { TRIM_WORKFLOW } from '../subworkflows/local/trim_workflow' include { ARRIBA_WORKFLOW } from '../subworkflows/local/arriba_workflow' include { QC_WORKFLOW } from '../subworkflows/local/qc_workflow' -include { CTATSPLICING_WORKFLOW } from '../subworkflows/local/ctatsplicing_workflow' include { STARFUSION_WORKFLOW } from '../subworkflows/local/starfusion_workflow' include { STRINGTIE_WORKFLOW } from '../subworkflows/local/stringtie_workflow/main' include { FUSIONCATCHER_WORKFLOW } from '../subworkflows/local/fusioncatcher_workflow' @@ -100,10 +99,12 @@ workflow RNAFUSION { ch_arriba_ref_known_fusions, ch_arriba_ref_cytobands, ch_arriba_ref_protein_domains, + ch_starfusion_ref, params.arriba, // boolean params.all, // boolean params.fusioninspector_only, // boolean params.star_ignore_sjdbgtf, // boolean + params.ctatsplicing, // boolean params.seq_center ?: '', // string params.arriba_fusions, // path params.cram // array @@ -147,14 +148,6 @@ workflow RNAFUSION { ) ch_versions = ch_versions.mix(FUSIONREPORT_WORKFLOW.out.versions) - //Run CTAT-splicing - CTATSPLICING_WORKFLOW( - STARFUSION_WORKFLOW.out.split_junctions, - STARFUSION_WORKFLOW.out.junctions, - STARFUSION_WORKFLOW.out.bam_align_sorted, - ch_starfusion_ref - ) - //Run fusionInpector FUSIONINSPECTOR_WORKFLOW ( ch_reads_all, From 350de1b443139e8e38634fc03438f2f3e243876e Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 19 Dec 2024 10:08:36 +0100 Subject: [PATCH 08/21] add some views to debug the full run --- subworkflows/local/fusionreport_workflow.nf | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/fusionreport_workflow.nf b/subworkflows/local/fusionreport_workflow.nf index 09ec9965..a0673f64 100644 --- a/subworkflows/local/fusionreport_workflow.nf +++ b/subworkflows/local/fusionreport_workflow.nf @@ -15,10 +15,16 @@ workflow FUSIONREPORT_WORKFLOW { ch_csv = Channel.empty() if (!params.fusioninspector_only) { + arriba_fusions.view { it -> "arriba fusions:${it}"} + starfusion_fusions.view { it -> "starfusion fusions:${it}"} + fusioncatcher_fusions.view { it -> "fusioncatcher fusions:${it}"} + reads_fusions = reads - .join(arriba_fusions, remainder: true) - .join(starfusion_fusions, remainder: true) - .join(fusioncatcher_fusions, remainder: true) + .join(arriba_fusions, failOnMismatch:true, failOnDuplicate:true) + .join(starfusion_fusions, failOnMismatch:true, failOnDuplicate:true) + .join(fusioncatcher_fusions, failOnMismatch:true, failOnDuplicate:true) + + reads_fusions.view() FUSIONREPORT(reads_fusions, fusionreport_ref, params.tools_cutoff) ch_fusion_list = FUSIONREPORT.out.fusion_list From b8bae77aef45119ebb922bb2fba11ca6b017a8f4 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 19 Dec 2024 14:59:21 +0100 Subject: [PATCH 09/21] try to fix issue with arriba output --- subworkflows/local/arriba_workflow/main.nf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/arriba_workflow/main.nf b/subworkflows/local/arriba_workflow/main.nf index 5f99ef8e..949e35ff 100644 --- a/subworkflows/local/arriba_workflow/main.nf +++ b/subworkflows/local/arriba_workflow/main.nf @@ -27,9 +27,10 @@ workflow ARRIBA_WORKFLOW { cram // array main: - ch_versions = Channel.empty() - ch_cram_index = Channel.empty() - ch_dummy_file = file("$projectDir/assets/dummy_file_arriba.txt", checkIfExists: true) + def ch_versions = Channel.empty() + def ch_cram_index = Channel.empty() + def ch_dummy_file = file("$projectDir/assets/dummy_file_arriba.txt", checkIfExists: true) + def ch_arriba_fusions = Channel.empty() if (( arriba || all ) && !fusioninspector_only) { From 00063944fa9f6623855ebbee272bcfba5f08a331 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 19 Dec 2024 15:18:05 +0100 Subject: [PATCH 10/21] remove views --- subworkflows/local/fusionreport_workflow.nf | 6 ------ 1 file changed, 6 deletions(-) diff --git a/subworkflows/local/fusionreport_workflow.nf b/subworkflows/local/fusionreport_workflow.nf index a0673f64..6a87244f 100644 --- a/subworkflows/local/fusionreport_workflow.nf +++ b/subworkflows/local/fusionreport_workflow.nf @@ -15,17 +15,11 @@ workflow FUSIONREPORT_WORKFLOW { ch_csv = Channel.empty() if (!params.fusioninspector_only) { - arriba_fusions.view { it -> "arriba fusions:${it}"} - starfusion_fusions.view { it -> "starfusion fusions:${it}"} - fusioncatcher_fusions.view { it -> "fusioncatcher fusions:${it}"} - reads_fusions = reads .join(arriba_fusions, failOnMismatch:true, failOnDuplicate:true) .join(starfusion_fusions, failOnMismatch:true, failOnDuplicate:true) .join(fusioncatcher_fusions, failOnMismatch:true, failOnDuplicate:true) - reads_fusions.view() - FUSIONREPORT(reads_fusions, fusionreport_ref, params.tools_cutoff) ch_fusion_list = FUSIONREPORT.out.fusion_list ch_fusion_list_filtered = FUSIONREPORT.out.fusion_list_filtered From 05e0e61110137b1684c3b661e603196bf6ea82f9 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 19 Dec 2024 15:20:29 +0100 Subject: [PATCH 11/21] remove breaking ifEmpties --- subworkflows/local/starfusion_workflow.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/starfusion_workflow.nf b/subworkflows/local/starfusion_workflow.nf index 2a086ce1..74c1839e 100644 --- a/subworkflows/local/starfusion_workflow.nf +++ b/subworkflows/local/starfusion_workflow.nf @@ -73,8 +73,8 @@ workflow STARFUSION_WORKFLOW { fusions = ch_starfusion_fusions star_stats = ch_star_stats star_gene_count = ch_star_gene_count - ch_bam_sorted = ch_align.ifEmpty([[],[]]) - ch_bam_sorted_indexed = bam_sorted_indexed.ifEmpty([[],[],[]]) + ch_bam_sorted = ch_align + ch_bam_sorted_indexed = bam_sorted_indexed versions = ch_versions } From 62fdf33845012d6c24993e105aec82e6318cda8a Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 19 Dec 2024 15:50:33 +0100 Subject: [PATCH 12/21] remove def --- modules/nf-core/star/align/main.nf | 3 ++- subworkflows/local/arriba_workflow/main.nf | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf index b5bc9ddf..18a3b461 100644 --- a/modules/nf-core/star/align/main.nf +++ b/modules/nf-core/star/align/main.nf @@ -41,7 +41,8 @@ process STAR_ALIGN { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def reads1 = [], reads2 = [] + def reads1 = [] + def reads2 = [] meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf" def seq_platform = seq_platform ? "'PL:$seq_platform'" : "" diff --git a/subworkflows/local/arriba_workflow/main.nf b/subworkflows/local/arriba_workflow/main.nf index 949e35ff..c7089e85 100644 --- a/subworkflows/local/arriba_workflow/main.nf +++ b/subworkflows/local/arriba_workflow/main.nf @@ -30,7 +30,6 @@ workflow ARRIBA_WORKFLOW { def ch_versions = Channel.empty() def ch_cram_index = Channel.empty() def ch_dummy_file = file("$projectDir/assets/dummy_file_arriba.txt", checkIfExists: true) - def ch_arriba_fusions = Channel.empty() if (( arriba || all ) && !fusioninspector_only) { @@ -48,7 +47,7 @@ workflow ARRIBA_WORKFLOW { CTATSPLICING_WORKFLOW( STAR_FOR_ARRIBA.out.spl_junc_tab, STAR_FOR_ARRIBA.out.junction, - STAR_FOR_ARRIBA.out.bam_sorted_aligned, + STAR_FOR_ARRIBA.out.bam, ch_starfusion_ref ) ch_versions = ch_versions.mix(CTATSPLICING_WORKFLOW.out.versions) From 8155168298873fdfc04f9c99fd68a179e2c40b4f Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 19 Dec 2024 16:03:01 +0100 Subject: [PATCH 13/21] redo star changes --- modules/nf-core/star/align/main.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf index 18a3b461..b5bc9ddf 100644 --- a/modules/nf-core/star/align/main.nf +++ b/modules/nf-core/star/align/main.nf @@ -41,8 +41,7 @@ process STAR_ALIGN { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def reads1 = [] - def reads2 = [] + def reads1 = [], reads2 = [] meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf" def seq_platform = seq_platform ? "'PL:$seq_platform'" : "" From e386c81f7d8d948da0db6aa509202ca61297bf86 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 19 Dec 2024 16:05:12 +0100 Subject: [PATCH 14/21] use bam from starfusion --- subworkflows/local/starfusion_workflow.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/starfusion_workflow.nf b/subworkflows/local/starfusion_workflow.nf index 74c1839e..b0c72bfc 100644 --- a/subworkflows/local/starfusion_workflow.nf +++ b/subworkflows/local/starfusion_workflow.nf @@ -40,7 +40,7 @@ workflow STARFUSION_WORKFLOW { CTATSPLICING_WORKFLOW( STAR_FOR_STARFUSION.out.spl_junc_tab, STAR_FOR_STARFUSION.out.junction, - STAR_FOR_STARFUSION.out.bam_sorted_aligned, + STAR_FOR_STARFUSION.out.bam, ch_starfusion_ref ) ch_versions = ch_versions.mix(CTATSPLICING_WORKFLOW.out.versions) From 62ee76f8c38849ccf13e715c544e8ddb4a94030b Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 19 Dec 2024 16:13:19 +0100 Subject: [PATCH 15/21] create junctions with arriba for ctatsplicing --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 253d0bf0..b19d935b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -291,7 +291,7 @@ process { --alignSplicedMateMapLminOverLmate 0.5 \ --alignSJstitchMismatchNmax 5 -1 5 5 \ --chimSegmentMin 10 \ - --chimOutType WithinBAM HardClip \ + --chimOutType WithinBAM HardClip Junctions \ --chimJunctionOverhangMin 10 \ --chimScoreDropMax 30 \ --chimScoreJunctionNonGTAG 0 \ From 19d1cc19651476042a96834d2c125842768ab11c Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 19 Dec 2024 17:15:46 +0100 Subject: [PATCH 16/21] do not create an index in ctat splicing --- modules/local/ctatsplicing/startocancerintrons/main.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/local/ctatsplicing/startocancerintrons/main.nf b/modules/local/ctatsplicing/startocancerintrons/main.nf index 30fd5c3f..edd9878f 100644 --- a/modules/local/ctatsplicing/startocancerintrons/main.nf +++ b/modules/local/ctatsplicing/startocancerintrons/main.nf @@ -29,9 +29,7 @@ process CTATSPLICING_STARTOCANCERINTRONS { prefix = task.ext.prefix ?: "${meta.id}" def bam_arg = bam ? "--bam_file ${bam}" : "" def VERSION = '0.0.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - def create_index = bam && !bai ? "samtools index ${bam}" : "" """ - ${create_index} /usr/local/src/CTAT-SPLICING/STAR_to_cancer_introns.py \\ --SJ_tab_file ${split_junction} \\ --chimJ_file ${junction} \\ From 9dc352fb4fd95259ce6ac877fba7bcb93d093ad0 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 20 Dec 2024 08:25:55 +0100 Subject: [PATCH 17/21] readd indexing to ctat & sort arriba bam --- conf/modules.config | 2 +- modules/local/ctatsplicing/startocancerintrons/main.nf | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index b19d935b..252fede9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -283,7 +283,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] ext.args = '--readFilesCommand zcat \ - --outSAMtype BAM Unsorted \ + --outSAMtype BAM SortedByCoordinate \ --outSAMunmapped Within \ --outBAMcompression 0 \ --outFilterMultimapNmax 50 \ diff --git a/modules/local/ctatsplicing/startocancerintrons/main.nf b/modules/local/ctatsplicing/startocancerintrons/main.nf index edd9878f..a8d683ec 100644 --- a/modules/local/ctatsplicing/startocancerintrons/main.nf +++ b/modules/local/ctatsplicing/startocancerintrons/main.nf @@ -29,7 +29,10 @@ process CTATSPLICING_STARTOCANCERINTRONS { prefix = task.ext.prefix ?: "${meta.id}" def bam_arg = bam ? "--bam_file ${bam}" : "" def VERSION = '0.0.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def create_index = bam && !bai ? "samtools index ${bam}" : "" """ + ${create_index} + /usr/local/src/CTAT-SPLICING/STAR_to_cancer_introns.py \\ --SJ_tab_file ${split_junction} \\ --chimJ_file ${junction} \\ From 9ea1f3b46836c440a8868352bb47632b47709e42 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 20 Dec 2024 11:32:36 +0100 Subject: [PATCH 18/21] remove ctatsplicing check from starfusion --- subworkflows/local/starfusion_workflow.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/starfusion_workflow.nf b/subworkflows/local/starfusion_workflow.nf index b0c72bfc..2083036f 100644 --- a/subworkflows/local/starfusion_workflow.nf +++ b/subworkflows/local/starfusion_workflow.nf @@ -53,7 +53,7 @@ workflow STARFUSION_WORKFLOW { SAMTOOLS_INDEX_FOR_STARFUSION_CRAM (SAMTOOLS_VIEW_FOR_STARFUSION.out.cram) ch_versions = ch_versions.mix(SAMTOOLS_INDEX_FOR_STARFUSION_CRAM.out.versions) } - if (params.starfusion || params.all || params.ctatsplicing){ + if (params.starfusion || params.all){ STARFUSION( reads_junction, ch_starfusion_ref.map { it -> it[1] }) ch_versions = ch_versions.mix(STARFUSION.out.versions) ch_starfusion_fusions = STARFUSION.out.fusions From 1c98fc76d7dbf9404ad8522b4c0b8c55eed2fb2f Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 20 Dec 2024 11:32:43 +0100 Subject: [PATCH 19/21] clarify changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7168d02b..f09fb222 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add nf-test to local subworkflow: `TRIM_WORKFLOW` [#572](https://github.com/nf-core/rnafusion/pull/572) - Add nf-test to local module: `FUSIONREPORT_DETECT`. Improve `FUSIONREPORT_DOWNLOAD` module [#577](https://github.com/nf-core/rnafusion/pull/577) - Add nf-test to local subworkflow: `ARRIBA_WORKFLOW` [#578](https://github.com/nf-core/rnafusion/pull/578) -- Added a new module `CTATSPLICING_STARTOCANCERINTRONS` [#587](https://github.com/nf-core/rnafusion/pull/587) +- Added a new module `CTATSPLICING_STARTOCANCERINTRONS` and a new parameter `--ctatsplicing`. This options creates reports on cancer splicing abberations and requires one or both of `--arriba` and `--starfusion` to be given. [#587](https://github.com/nf-core/rnafusion/pull/587) ### Changed From 74d4534681116df35a81b72d4488ff346a4ed84f Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 20 Dec 2024 11:40:13 +0100 Subject: [PATCH 20/21] update docs --- docs/output.md | 37 +++++++++++++++++++++++++++++++++++++ docs/usage.md | 4 ++-- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index 62859668..efc1425f 100644 --- a/docs/output.md +++ b/docs/output.md @@ -17,6 +17,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [STAR-fusion](#starfusion) - STAR-fusion fusion detection - [StringTie](#stringtie) - StringTie assembly - [FusionCatcher](#fusioncatcher) - Fusion catcher fusion detection +- [CTAT-SPLICING](#ctat-splicing) - Detection and annotation of cancer splicing aberrations - [Samtools](#samtools) - SAM/BAM file manipulation - [Fusion-report](#fusion-report) - Summary of the findings of each tool and comparison to COSMIC, Mitelman, and FusionGDB2 databases - [FusionInspector](#fusionInspector) - Supervised analysis of fusion predictions from fusion-report, recover and re-score evidence for such predictions @@ -186,6 +187,42 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m [FusionCatcher](https://github.com/ndaniel/fusioncatcher) searches for novel/known somatic fusion genes translocations, and chimeras in RNA-seq data. Possibility to use parameter `--fusioncatcher_limitSjdbInsertNsj` to modify limitSjdbInsertNsj. +### CTAT-SPLICING + +
+Output files + +- `ctatsplicing` + - `arriba` + - `.cancer_intron_reads.sorted.bam` + - `.cancer_intron_reads.sorted.bam.bai` + - `.cancer.introns` + - `.cancer.introns.prelim` + - `.chckpts` + - `.ctat-splicing.igv.html` + - `.gene_reads.sorted.sifted.bam` + - `.gene_reads.sorted.sifted.bam.bai` + - `.igv.tracks` + - `.introns` + - `.introns.for_IGV.bed` + - `starfusion` + - `.cancer_intron_reads.sorted.bam` + - `.cancer_intron_reads.sorted.bam.bai` + - `.cancer.introns` + - `.cancer.introns.prelim` + - `.chckpts` + - `.ctat-splicing.igv.html` + - `.gene_reads.sorted.sifted.bam` + - `.gene_reads.sorted.sifted.bam.bai` + - `.igv.tracks` + - `.introns` + - `.introns.for_IGV.bed` + +
+ +[CTAT-SPLICING](https://github.com/TrinityCTAT/CTAT-SPLICING/wiki) detects and annotates of aberrant splicing isoforms in cancer. This is run on the input files for `arriba` and/or `starfusion`. + + ### FusionInspector
diff --git a/docs/usage.md b/docs/usage.md index b353b28c..1bddcf71 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -16,7 +16,7 @@ The pipeline is divided into two parts: 2. Detecting fusions -- Supported tools: `Arriba`, `FusionCatcher`, `STAR-Fusion`, and `StringTie` +- Supported tools: `Arriba`, `FusionCatcher`, `STAR-Fusion`, `StringTie` and `CTAT-SPLICING` - QC: `Fastqc`, `MultiQC`, and `Picard CollectInsertSize`, `Picard CollectWgsMetrics`, `Picard Markduplicates` - Fusions visualization: `Arriba`, `fusion-report`, `FusionInspector`, and `vcf_collect` @@ -136,7 +136,7 @@ As you can see above for multiple runs of the same sample, the `sample` name has ### Starting commands -The pipeline can either be run using all fusion detection tools or specifying individual tools. Visualisation tools will be run on all fusions detected. To run all tools (`arriba`, `fusioncatcher`, `starfusion`, `stringtie`) use the `--all` parameter: +The pipeline can either be run using all fusion detection tools or specifying individual tools. Visualisation tools will be run on all fusions detected. To run all tools (`arriba`, `fusioncatcher`, `starfusion`, `stringtie`, `ctat-splicing`) use the `--all` parameter: ```bash nextflow run nf-core/rnafusion \ From a209feaab80eb5a34428337e1b3173d4028f3452 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 20 Dec 2024 11:50:57 +0100 Subject: [PATCH 21/21] prettyyy --- docs/output.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index efc1425f..dd370575 100644 --- a/docs/output.md +++ b/docs/output.md @@ -222,7 +222,6 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m [CTAT-SPLICING](https://github.com/TrinityCTAT/CTAT-SPLICING/wiki) detects and annotates of aberrant splicing isoforms in cancer. This is run on the input files for `arriba` and/or `starfusion`. - ### FusionInspector