From 032583a93135193e38d6ce761a6d31dc989b1f05 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 10 Dec 2024 01:05:43 +0000 Subject: [PATCH 01/24] fix: fix container versions --- modules/local/starfusion/build/environment.yml | 9 +++++---- modules/local/starfusion/build/main.nf | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/local/starfusion/build/environment.yml b/modules/local/starfusion/build/environment.yml index 06a9828a..3c0f4ac5 100644 --- a/modules/local/starfusion/build/environment.yml +++ b/modules/local/starfusion/build/environment.yml @@ -4,7 +4,8 @@ channels: dependencies: - bioconda::dfam=3.7 - bioconda::hmmer=3.4 - - bioconda::star-fusion=1.7.0 - - bioconda::trinity=2.15.2 - - bioconda::samtools=1.21 - - bioconda::star=2.7.11b + - bioconda::minimap2 + - bioconda::samtools + - bioconda::star + - bioconda::star-fusion=1.14.0 + - bioconda::trinity diff --git a/modules/local/starfusion/build/main.nf b/modules/local/starfusion/build/main.nf index 6ac3c694..202dae02 100644 --- a/modules/local/starfusion/build/main.nf +++ b/modules/local/starfusion/build/main.nf @@ -2,7 +2,7 @@ process STARFUSION_BUILD { tag 'star-fusion' conda "${moduleDir}/environment.yml" - container 'community.wave.seqera.io/library/dfam_hmmer_samtools_star-fusion_pruned:5694d82381bf039e' + container 'community.wave.seqera.io/library/dfam_hmmer_minimap2_samtools_pruned:63e3d21ca68ea531' input: tuple val(meta), path(fasta) @@ -24,7 +24,7 @@ process STARFUSION_BUILD { wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3m --no-check-certificate wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3p --no-check-certificate gunzip Pfam-A.hmm.gz && hmmpress Pfam-A.hmm - $binPath \\ + /opt/conda/lib/STAR-Fusion/ctat-genome-lib-builder/prep_genome_lib.pl \\ --genome_fa $fasta \\ --gtf $gtf \\ --annot_filter_rule AnnotFilterRule.pm \\ From 6dcac4fc79d78df11ae8f1d7a2cbb58725490879 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 10 Dec 2024 01:08:37 +0000 Subject: [PATCH 02/24] test: add nf-test initial draft --- .../local/fusioninspector/tests/main.nf.test | 57 +++++++++++++++++++ .../local/starfusion/build/tests/main.nf.test | 55 ++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 modules/local/fusioninspector/tests/main.nf.test create mode 100644 modules/local/starfusion/build/tests/main.nf.test diff --git a/modules/local/fusioninspector/tests/main.nf.test b/modules/local/fusioninspector/tests/main.nf.test new file mode 100644 index 00000000..0afa34ac --- /dev/null +++ b/modules/local/fusioninspector/tests/main.nf.test @@ -0,0 +1,57 @@ +nextflow_process { + + name "Test Process FUSIONINSPECTOR" + script "../main.nf" + process "FUSIONINSPECTOR" + + test("FUSIONINSPECTOR - human") { + + when { + process { + """ + input[0] = [ + [ id:'test_sample' ], + [ + file("https://raw.githubusercontent.com/FusionInspector/FusionInspector/master/test/test.reads_1.fastq.gz"), + file("https://raw.githubusercontent.com/FusionInspector/FusionInspector/master/test/test.reads_2.fastq.gz") + ], + file("https://raw.githubusercontent.com/FusionInspector/FusionInspector/master/test/fusion_targets.A.txt") + ] + input[1] = file("/path/to/reference/genome_lib") + """ + } + } + + then { + assert snapshot(process.out).match() + } + + } + + test("FUSIONINSPECTOR - human - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test_stub' ], + [ + file("https://raw.githubusercontent.com/FusionInspector/FusionInspector/master/test/test.reads_1.fastq.gz"), + file("https://raw.githubusercontent.com/FusionInspector/FusionInspector/master/test/test.reads_2.fastq.gz") + ], + file("https://raw.githubusercontent.com/FusionInspector/FusionInspector/master/test/fusion_targets.A.txt") + ] + input[1] = file("https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh37_gencode_v19_CTAT_lib_Mar012021.STAR_v2.7.11a.plug-n-play.tar.gz") + """ + } + } + + then { + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/local/starfusion/build/tests/main.nf.test b/modules/local/starfusion/build/tests/main.nf.test new file mode 100644 index 00000000..ef9aa393 --- /dev/null +++ b/modules/local/starfusion/build/tests/main.nf.test @@ -0,0 +1,55 @@ +nextflow_process { + + name "Test Process STARFUSION_BUILD" + script "../main.nf" + process "STARFUSION_BUILD" + + test("STARFUSION_BUILD - human - chr4") { + + when { + process { + """ + input[0] = [ + [ id:'fasta' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa") + ] + input[1] = [ + [ id:'gtf' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf") + ] + """ + } + } + + then { + assert snapshot(process.out).match() + } + + } + + test("STARFUSION_BUILD - human - chr4") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'fasta' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa") + ] + input[1] = [ + [ id:'gtf' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf") + ] + """ + } + } + + then { + assert snapshot(process.out).match() + } + + } + +} From 58aab76ed09c8434b0249531cbca20368c3b0dfb Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 10 Dec 2024 19:11:47 +0000 Subject: [PATCH 03/24] fix: simplify starfusion module --- modules/local/starfusion/build/main.nf | 28 +++++++++----------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/modules/local/starfusion/build/main.nf b/modules/local/starfusion/build/main.nf index 202dae02..4d3c70b6 100644 --- a/modules/local/starfusion/build/main.nf +++ b/modules/local/starfusion/build/main.nf @@ -7,31 +7,23 @@ process STARFUSION_BUILD { input: tuple val(meta), path(fasta) tuple val(meta2), path(gtf) + path fusion_annot_lib + val dfam_species output: - path "*" , emit: reference + path "ctat_genome_lib_build_dir" , emit: reference script: - def binPath = (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) ? "prep_genome_lib.pl" : "/usr/local/src/STAR-Fusion/ctat-genome-lib-builder/prep_genome_lib.pl" + def binPath = (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) ? "prep_genome_lib.pl" : "/opt/conda/lib/STAR-Fusion/ctat-genome-lib-builder/prep_genome_lib.pl" + if (dfam_species != "human" && dfam_species != "mouse") { + error "Invalid species for --dfam_db. Only 'human' or 'mouse' are accepted. Provided: ${dfam_species}" + } """ - export TMPDIR=/tmp - wget http://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam34.0/Pfam-A.hmm.gz --no-check-certificate - wget https://github.com/FusionAnnotator/CTAT_HumanFusionLib/releases/download/v0.3.0/fusion_lib.Mar2021.dat.gz -O CTAT_HumanFusionLib_Mar2021.dat.gz --no-check-certificate - wget https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/AnnotFilterRule.pm -O AnnotFilterRule.pm --no-check-certificate - wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm --no-check-certificate - wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3f --no-check-certificate - wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3i --no-check-certificate - wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3m --no-check-certificate - wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3p --no-check-certificate - gunzip Pfam-A.hmm.gz && hmmpress Pfam-A.hmm - /opt/conda/lib/STAR-Fusion/ctat-genome-lib-builder/prep_genome_lib.pl \\ + prep_genome_lib.pl \\ --genome_fa $fasta \\ --gtf $gtf \\ - --annot_filter_rule AnnotFilterRule.pm \\ - --fusion_annot_lib CTAT_HumanFusionLib_Mar2021.dat.gz \\ - --pfam_db Pfam-A.hmm \\ - --dfam_db homo_sapiens_dfam.hmm \\ - --max_readlength $params.read_length \\ + --dfam_db ${dfam_species} \\ + --fusion_annot_lib $fusion_annot_lib \\ --CPU $task.cpus cat <<-END_VERSIONS > versions.yml From 7ff8be18d1466fe07ea8eb74a11f9b56c0e1d014 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 10 Dec 2024 19:15:57 +0000 Subject: [PATCH 04/24] tests: update nf-test starfusion --- .../local/starfusion/build/tests/main.nf.test | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/modules/local/starfusion/build/tests/main.nf.test b/modules/local/starfusion/build/tests/main.nf.test index ef9aa393..75993c00 100644 --- a/modules/local/starfusion/build/tests/main.nf.test +++ b/modules/local/starfusion/build/tests/main.nf.test @@ -4,19 +4,23 @@ nextflow_process { script "../main.nf" process "STARFUSION_BUILD" - test("STARFUSION_BUILD - human - chr4") { + test("STARFUSION_BUILD - human - minigenome") { when { process { """ input[0] = [ - [ id:'fasta' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa") + [ id:'minigenome fasta' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa") ] input[1] = [ - [ id:'gtf' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf") + [ id:'minigenome gtf' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf") ] + + input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz") + input [3] = "human" + """ } } @@ -27,7 +31,7 @@ nextflow_process { } - test("STARFUSION_BUILD - human - chr4") { + test("STARFUSION_BUILD - human - minigenome - stub") { options "-stub" @@ -35,13 +39,17 @@ nextflow_process { process { """ input[0] = [ - [ id:'fasta' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.1700000-54900000.fa") + [ id:'minigenome fasta' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa") ] input[1] = [ - [ id:'gtf' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/rnafusion/testdata/reference/ensembl/Homo_sapiens.GRCh38.102.chr4.gtf") + [ id:'minigenome gtf' ], + file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf") ] + + input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz") + input [3] = "human" + """ } } From 0770939ea7f410f511aa01955bc6e07ea44f76e2 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 10 Dec 2024 21:19:53 +0000 Subject: [PATCH 05/24] refactor: add stubs and ext.args --- modules/local/starfusion/build/main.nf | 42 ++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/modules/local/starfusion/build/main.nf b/modules/local/starfusion/build/main.nf index 4d3c70b6..12943648 100644 --- a/modules/local/starfusion/build/main.nf +++ b/modules/local/starfusion/build/main.nf @@ -1,5 +1,6 @@ process STARFUSION_BUILD { - tag 'star-fusion' + tag "$meta.id" + label 'process_high' conda "${moduleDir}/environment.yml" container 'community.wave.seqera.io/library/dfam_hmmer_minimap2_samtools_pruned:63e3d21ca68ea531' @@ -14,16 +15,18 @@ process STARFUSION_BUILD { path "ctat_genome_lib_build_dir" , emit: reference script: - def binPath = (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) ? "prep_genome_lib.pl" : "/opt/conda/lib/STAR-Fusion/ctat-genome-lib-builder/prep_genome_lib.pl" if (dfam_species != "human" && dfam_species != "mouse") { error "Invalid species for --dfam_db. Only 'human' or 'mouse' are accepted. Provided: ${dfam_species}" } + def args = task.ext.args ?: '' """ prep_genome_lib.pl \\ --genome_fa $fasta \\ --gtf $gtf \\ --dfam_db ${dfam_species} \\ + --pfam_db current \\ --fusion_annot_lib $fusion_annot_lib \\ + ${args} \\ --CPU $task.cpus cat <<-END_VERSIONS > versions.yml @@ -35,7 +38,40 @@ process STARFUSION_BUILD { stub: """ mkdir ctat_genome_lib_build_dir - touch ref_annot.cdna.fa + touch ctat_genome_lib_build_dir/AnnotFilterRule.pm + touch ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz + touch ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans + touch ctat_genome_lib_build_dir/ref_genome.fa.mm2 + touch ctat_genome_lib_build_dir/ref_genome.fa.ntf + touch ctat_genome_lib_build_dir/blast_pairs.dat.gz + touch ctat_genome_lib_build_dir/ref_annot.cdna.fa + touch ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu + touch ctat_genome_lib_build_dir/ref_genome.fa.ndb + touch ctat_genome_lib_build_dir/ref_genome.fa.nto + touch ctat_genome_lib_build_dir/blast_pairs.idx + touch ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx + touch ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed + touch ctat_genome_lib_build_dir/ref_genome.fa.nhr + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx + touch ctat_genome_lib_build_dir/__chkpts + touch ctat_genome_lib_build_dir/ref_annot.cds + touch ctat_genome_lib_build_dir/ref_annot.pep + touch ctat_genome_lib_build_dir/ref_genome.fa.nin + touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat + touch ctat_genome_lib_build_dir/fusion_annot_lib.gz + touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa + touch ctat_genome_lib_build_dir/ref_annot.prot_info.dbm + touch ctat_genome_lib_build_dir/ref_genome.fa.njs + touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm + touch ctat_genome_lib_build_dir/fusion_annot_lib.idx + touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx + touch ctat_genome_lib_build_dir/ref_genome.fa + touch ctat_genome_lib_build_dir/ref_genome.fa.not + touch ctat_genome_lib_build_dir/trans.blast.dat.gz + touch ctat_genome_lib_build_dir/pfam_domains.dbm + touch ctat_genome_lib_build_dir/ref_annot.gtf + touch ctat_genome_lib_build_dir/ref_genome.fa.fai + touch ctat_genome_lib_build_dir/ref_genome.fa.nsq cat <<-END_VERSIONS > versions.yml "${task.process}": From 5b0b9e108798bf0804aa791986bbbac9e6c3bf40 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 10 Dec 2024 21:42:15 +0000 Subject: [PATCH 06/24] docs: update meta.yml --- modules/local/starfusion/build/meta.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/modules/local/starfusion/build/meta.yml b/modules/local/starfusion/build/meta.yml index c87b251b..d7f65a0b 100644 --- a/modules/local/starfusion/build/meta.yml +++ b/modules/local/starfusion/build/meta.yml @@ -1,7 +1,7 @@ -name: starfusion_downloadgenome +name: starfusion_build description: Download STAR-fusion genome resource required to run STAR-Fusion caller keywords: - - downoad + - download tools: - star-fusion: description: Fusion calling algorithm for RNAseq data @@ -20,6 +20,13 @@ input: type: file description: genome gtf file pattern: "*.{gtf}" + - fusion_annot_lib: + type: file + description: Fusion annotation library (key/val pairs, tab-delimited). + pattern: "*.dat.gz" + - dfam_species: + type: string + description: DNA transposable element database (Dfam.hmm), required for repeat masking. Only 'human' or 'mouse' are accepted (will automatically pull the resources from dfam). output: - reference: From 572e8df646ff4f7bf01eece487a3363dca6720c3 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 10 Dec 2024 22:13:14 +0000 Subject: [PATCH 07/24] test: update snapshots --- modules/local/starfusion/build/main.nf | 8 +- .../starfusion/build/tests/main.nf.test.snap | 260 ++++++++++++++++++ 2 files changed, 264 insertions(+), 4 deletions(-) create mode 100644 modules/local/starfusion/build/tests/main.nf.test.snap diff --git a/modules/local/starfusion/build/main.nf b/modules/local/starfusion/build/main.nf index 12943648..80b52911 100644 --- a/modules/local/starfusion/build/main.nf +++ b/modules/local/starfusion/build/main.nf @@ -39,11 +39,11 @@ process STARFUSION_BUILD { """ mkdir ctat_genome_lib_build_dir touch ctat_genome_lib_build_dir/AnnotFilterRule.pm - touch ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz + gzip -c /dev/null > ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz touch ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans touch ctat_genome_lib_build_dir/ref_genome.fa.mm2 touch ctat_genome_lib_build_dir/ref_genome.fa.ntf - touch ctat_genome_lib_build_dir/blast_pairs.dat.gz + gzip -c /dev/null > ctat_genome_lib_build_dir/blast_pairs.dat.gz touch ctat_genome_lib_build_dir/ref_annot.cdna.fa touch ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu touch ctat_genome_lib_build_dir/ref_genome.fa.ndb @@ -58,7 +58,7 @@ process STARFUSION_BUILD { touch ctat_genome_lib_build_dir/ref_annot.pep touch ctat_genome_lib_build_dir/ref_genome.fa.nin touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat - touch ctat_genome_lib_build_dir/fusion_annot_lib.gz + gzip -c /dev/null > ctat_genome_lib_build_dir/fusion_annot_lib.gz touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa touch ctat_genome_lib_build_dir/ref_annot.prot_info.dbm touch ctat_genome_lib_build_dir/ref_genome.fa.njs @@ -67,7 +67,7 @@ process STARFUSION_BUILD { touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx touch ctat_genome_lib_build_dir/ref_genome.fa touch ctat_genome_lib_build_dir/ref_genome.fa.not - touch ctat_genome_lib_build_dir/trans.blast.dat.gz + gzip -c /dev/null > ctat_genome_lib_build_dir/trans.blast.dat.gz touch ctat_genome_lib_build_dir/pfam_domains.dbm touch ctat_genome_lib_build_dir/ref_annot.gtf touch ctat_genome_lib_build_dir/ref_genome.fa.fai diff --git a/modules/local/starfusion/build/tests/main.nf.test.snap b/modules/local/starfusion/build/tests/main.nf.test.snap new file mode 100644 index 00000000..5e5e99ac --- /dev/null +++ b/modules/local/starfusion/build/tests/main.nf.test.snap @@ -0,0 +1,260 @@ +{ + "STARFUSION_BUILD - human - minigenome": { + "content": [ + { + "0": [ + [ + "AnnotFilterRule.pm:md5,5391fcc58d9c71cd1f0e45668c5ec597", + "PFAM.domtblout.dat.gz:md5,6d8ba244e68072be23895ec22c8a8ce4", + [ + "_fusion_annot_lib.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_prot_info_db.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "annotfiltrule_cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gene_blast_pairs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_pfam_dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_pfam_hits.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "makeblastdb.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2.splice_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2_genome_idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "validate_ctat_genome_lib.ok:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "blast_pairs.dat.gz:md5,06d08c55cfa099ccb56d312ca0970729", + "blast_pairs.idx:md5,884cc8c9886409f7ec0fdf8586ffb78d", + "fusion_annot_lib.gz:md5,23d82a5da81f91ca4e1ecd6481992a12", + "fusion_annot_lib.idx:md5,39ab4cd5eab589c5fce9b4bfed82c729", + "pfam_domains.dbm:md5,04c75694f8e36461b43d1c693fdf3414", + "ref_annot.cdna.fa:md5,fe801b66cfab298c575971401e93c18c", + "ref_annot.cdna.fa.idx:md5,3c19e33d5424174f1a3de1f21f6746ff", + "ref_annot.cds:md5,4376fb48bd8bf47f13854b3f6bba8297", + "ref_annot.cdsplus.fa:md5,71e98a5b5d8cf371ec4b5db32c19120e", + "ref_annot.cdsplus.fa.idx:md5,8cb47913bfb0d73d3f55cd5ceafa8bfe", + "ref_annot.gtf:md5,5ce8afe99ef3940a877a04caeacf9181", + "ref_annot.gtf.gene_spans:md5,f178dd54a1f81174dfd203e212937e63", + "ref_annot.gtf.mini.sortu:md5,1d29ccecdbb7b40a99c84a02d6c2c1be", + "ref_annot.gtf.mm2.splice.bed:md5,340585ea1843bf06bf555575ddecf28c", + "ref_annot.pep:md5,fe3f58b430d99c06c816521ac439c4df", + "ref_annot.prot_info.dbm:md5,6983b8dccd3e3f920566fab403a15090", + "ref_genome.fa:md5,ad699c56ed38566c7d3e9579486b1706", + "ref_genome.fa.fai:md5,e3f74a27219b33ae80fd5de5cbeaf32b", + "ref_genome.fa.mm2:md5,ce50979ea284748eb9f84ae88cfd930e", + "ref_genome.fa.ndb:md5,6ea574753b557610f62f6e4ab79e19f5", + "ref_genome.fa.nhr:md5,50f28dae71683c4394bfaf94a1ef4392", + "ref_genome.fa.nin:md5,167a13425cf4aac970ae936bd43cbd43", + "ref_genome.fa.njs:md5,7350b9e036410da25ed2c0d717649221", + "ref_genome.fa.not:md5,1e53e9d08f1d23af0299cfa87478a7bb", + "ref_genome.fa.nsq:md5,d2361e7871ce4cf51181c112a48f191b", + "ref_genome.fa.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb", + "ref_genome.fa.nto:md5,33cdeccccebe80329f1fdbee7f5874cb", + [ + "Genome:md5,9e3efdd0901cabb5a2d589664a63b372", + "Log.out:md5,944cceca8093617ab4ece780628459f5", + "SA:md5,7dd9083264be9c6a2194d990bc10d237", + "SAindex:md5,ac4711df685109e04356db9e9cb9fb7f", + "build.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,e02cd536b7281b894246863b160d5d06", + "chrName.txt:md5,f4d0d6595f423084e6b9472e40dfe6e8", + "chrNameLength.txt:md5,07a67d7ac441d7d30d80840b0927e717", + "chrStart.txt:md5,e2031239a74fe5ee9051e9364e4f608a", + "exonGeTrInfo.tab:md5,3c35618d07a8e35a0f9108699fcdda42", + "exonInfo.tab:md5,bcbb3f32fa31fe504cc737f337ad341c", + "geneInfo.tab:md5,db5db4b6e003904e9908fce7c05f0125", + "genomeParameters.txt:md5,aabd07882af60af4bba0438a475e4e1a", + "sjdbInfo.txt:md5,e4cc1bbf8bd687cfc3d7c2c702e6def7", + "sjdbList.fromGTF.out.tab:md5,8f3e8604b00d4067e4eb80aa476a8113", + "sjdbList.out.tab:md5,5d78dd49d5db24ca2c056b7ebe5c2059", + "transcriptInfo.tab:md5,b758c0ccaddcf0453bab5905b3cec4a2" + ], + "trans.blast.align_coords.align_coords.dat:md5,9f6b7a75aea03a9671190be25ecdd4c2", + "trans.blast.align_coords.align_coords.dbm:md5,6000e63ebabe57652cfa8858ef6cc484", + "trans.blast.dat.gz:md5,85ba5ea96c566f751ad83a3e4b8ab128" + ] + ], + "reference": [ + [ + "AnnotFilterRule.pm:md5,5391fcc58d9c71cd1f0e45668c5ec597", + "PFAM.domtblout.dat.gz:md5,6d8ba244e68072be23895ec22c8a8ce4", + [ + "_fusion_annot_lib.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_prot_info_db.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "annotfiltrule_cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gene_blast_pairs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_pfam_dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_pfam_hits.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "makeblastdb.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2.splice_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2_genome_idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "validate_ctat_genome_lib.ok:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "blast_pairs.dat.gz:md5,06d08c55cfa099ccb56d312ca0970729", + "blast_pairs.idx:md5,884cc8c9886409f7ec0fdf8586ffb78d", + "fusion_annot_lib.gz:md5,23d82a5da81f91ca4e1ecd6481992a12", + "fusion_annot_lib.idx:md5,39ab4cd5eab589c5fce9b4bfed82c729", + "pfam_domains.dbm:md5,04c75694f8e36461b43d1c693fdf3414", + "ref_annot.cdna.fa:md5,fe801b66cfab298c575971401e93c18c", + "ref_annot.cdna.fa.idx:md5,3c19e33d5424174f1a3de1f21f6746ff", + "ref_annot.cds:md5,4376fb48bd8bf47f13854b3f6bba8297", + "ref_annot.cdsplus.fa:md5,71e98a5b5d8cf371ec4b5db32c19120e", + "ref_annot.cdsplus.fa.idx:md5,8cb47913bfb0d73d3f55cd5ceafa8bfe", + "ref_annot.gtf:md5,5ce8afe99ef3940a877a04caeacf9181", + "ref_annot.gtf.gene_spans:md5,f178dd54a1f81174dfd203e212937e63", + "ref_annot.gtf.mini.sortu:md5,1d29ccecdbb7b40a99c84a02d6c2c1be", + "ref_annot.gtf.mm2.splice.bed:md5,340585ea1843bf06bf555575ddecf28c", + "ref_annot.pep:md5,fe3f58b430d99c06c816521ac439c4df", + "ref_annot.prot_info.dbm:md5,6983b8dccd3e3f920566fab403a15090", + "ref_genome.fa:md5,ad699c56ed38566c7d3e9579486b1706", + "ref_genome.fa.fai:md5,e3f74a27219b33ae80fd5de5cbeaf32b", + "ref_genome.fa.mm2:md5,ce50979ea284748eb9f84ae88cfd930e", + "ref_genome.fa.ndb:md5,6ea574753b557610f62f6e4ab79e19f5", + "ref_genome.fa.nhr:md5,50f28dae71683c4394bfaf94a1ef4392", + "ref_genome.fa.nin:md5,167a13425cf4aac970ae936bd43cbd43", + "ref_genome.fa.njs:md5,7350b9e036410da25ed2c0d717649221", + "ref_genome.fa.not:md5,1e53e9d08f1d23af0299cfa87478a7bb", + "ref_genome.fa.nsq:md5,d2361e7871ce4cf51181c112a48f191b", + "ref_genome.fa.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb", + "ref_genome.fa.nto:md5,33cdeccccebe80329f1fdbee7f5874cb", + [ + "Genome:md5,9e3efdd0901cabb5a2d589664a63b372", + "Log.out:md5,944cceca8093617ab4ece780628459f5", + "SA:md5,7dd9083264be9c6a2194d990bc10d237", + "SAindex:md5,ac4711df685109e04356db9e9cb9fb7f", + "build.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,e02cd536b7281b894246863b160d5d06", + "chrName.txt:md5,f4d0d6595f423084e6b9472e40dfe6e8", + "chrNameLength.txt:md5,07a67d7ac441d7d30d80840b0927e717", + "chrStart.txt:md5,e2031239a74fe5ee9051e9364e4f608a", + "exonGeTrInfo.tab:md5,3c35618d07a8e35a0f9108699fcdda42", + "exonInfo.tab:md5,bcbb3f32fa31fe504cc737f337ad341c", + "geneInfo.tab:md5,db5db4b6e003904e9908fce7c05f0125", + "genomeParameters.txt:md5,aabd07882af60af4bba0438a475e4e1a", + "sjdbInfo.txt:md5,e4cc1bbf8bd687cfc3d7c2c702e6def7", + "sjdbList.fromGTF.out.tab:md5,8f3e8604b00d4067e4eb80aa476a8113", + "sjdbList.out.tab:md5,5d78dd49d5db24ca2c056b7ebe5c2059", + "transcriptInfo.tab:md5,b758c0ccaddcf0453bab5905b3cec4a2" + ], + "trans.blast.align_coords.align_coords.dat:md5,9f6b7a75aea03a9671190be25ecdd4c2", + "trans.blast.align_coords.align_coords.dbm:md5,6000e63ebabe57652cfa8858ef6cc484", + "trans.blast.dat.gz:md5,85ba5ea96c566f751ad83a3e4b8ab128" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T22:11:39.460127957" + }, + "STARFUSION_BUILD - human - minigenome - stub": { + "content": [ + { + "0": [ + [ + "AnnotFilterRule.pm:md5,d41d8cd98f00b204e9800998ecf8427e", + "PFAM.domtblout.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "__chkpts:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "pfam_domains.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cds:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mm2.splice.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.pep:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.prot_info.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.mm2:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ndb:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nin:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.njs:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.not:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ntf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nto:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.star.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.align_coords.align_coords.dat:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.align_coords.align_coords.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reference": [ + [ + "AnnotFilterRule.pm:md5,d41d8cd98f00b204e9800998ecf8427e", + "PFAM.domtblout.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "__chkpts:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "pfam_domains.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cds:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mm2.splice.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.pep:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.prot_info.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.mm2:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ndb:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nin:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.njs:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.not:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ntf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nto:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.star.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.align_coords.align_coords.dat:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.align_coords.align_coords.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T22:12:14.50896742" + } +} \ No newline at end of file From 5f6857e8d865c8bf60aa7f9015065769337bd297 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 10 Dec 2024 22:18:32 +0000 Subject: [PATCH 08/24] refactor: revert add nf-test initial draft --- .../local/fusioninspector/tests/main.nf.test | 57 ------------------- 1 file changed, 57 deletions(-) delete mode 100644 modules/local/fusioninspector/tests/main.nf.test diff --git a/modules/local/fusioninspector/tests/main.nf.test b/modules/local/fusioninspector/tests/main.nf.test deleted file mode 100644 index 0afa34ac..00000000 --- a/modules/local/fusioninspector/tests/main.nf.test +++ /dev/null @@ -1,57 +0,0 @@ -nextflow_process { - - name "Test Process FUSIONINSPECTOR" - script "../main.nf" - process "FUSIONINSPECTOR" - - test("FUSIONINSPECTOR - human") { - - when { - process { - """ - input[0] = [ - [ id:'test_sample' ], - [ - file("https://raw.githubusercontent.com/FusionInspector/FusionInspector/master/test/test.reads_1.fastq.gz"), - file("https://raw.githubusercontent.com/FusionInspector/FusionInspector/master/test/test.reads_2.fastq.gz") - ], - file("https://raw.githubusercontent.com/FusionInspector/FusionInspector/master/test/fusion_targets.A.txt") - ] - input[1] = file("/path/to/reference/genome_lib") - """ - } - } - - then { - assert snapshot(process.out).match() - } - - } - - test("FUSIONINSPECTOR - human - stub") { - - options "-stub" - - when { - process { - """ - input[0] = [ - [ id:'test_stub' ], - [ - file("https://raw.githubusercontent.com/FusionInspector/FusionInspector/master/test/test.reads_1.fastq.gz"), - file("https://raw.githubusercontent.com/FusionInspector/FusionInspector/master/test/test.reads_2.fastq.gz") - ], - file("https://raw.githubusercontent.com/FusionInspector/FusionInspector/master/test/fusion_targets.A.txt") - ] - input[1] = file("https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh37_gencode_v19_CTAT_lib_Mar012021.STAR_v2.7.11a.plug-n-play.tar.gz") - """ - } - } - - then { - assert snapshot(process.out).match() - } - - } - -} From c257c62c7af34ddf0b3621fff1e0a9440cc57c20 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 11 Dec 2024 13:23:28 +0000 Subject: [PATCH 09/24] fix: add versions to dependencies --- modules/local/starfusion/build/environment.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/local/starfusion/build/environment.yml b/modules/local/starfusion/build/environment.yml index 3c0f4ac5..d02ab2d6 100644 --- a/modules/local/starfusion/build/environment.yml +++ b/modules/local/starfusion/build/environment.yml @@ -4,8 +4,8 @@ channels: dependencies: - bioconda::dfam=3.7 - bioconda::hmmer=3.4 - - bioconda::minimap2 - - bioconda::samtools - - bioconda::star + - bioconda::minimap2=2.28 + - bioconda::samtools=1.6 + - bioconda::star=2.7.11a - bioconda::star-fusion=1.14.0 - - bioconda::trinity + - bioconda::trinity=2.8.5 From f2df932841d056dfdc05821822e10848f0144267 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 11 Dec 2024 13:42:58 +0000 Subject: [PATCH 10/24] feat: add fusion annot lib as an external param --- nextflow.config | 1 + nextflow_schema.json | 5 +++++ subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf | 4 ++++ workflows/build_references.nf | 4 +++- 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index d436182c..2d148cf9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -30,6 +30,7 @@ params { read_length = 100 starfusion_build = true genomes = [:] + fusion_annot_lib = null // Genomes options fasta = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.all.fa" diff --git a/nextflow_schema.json b/nextflow_schema.json index 017a19fb..05df1956 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -362,6 +362,11 @@ "type": "boolean", "fa_icon": "far fa-file-code", "description": "Avoid using Cosmic DB (for example in clinical case applications where a paid license applies." + }, + "fusion_annot_lib": { + "type": "string", + "description": "Path to Fusion Annotation Library to be used in STARFUSION_BUILD.", + "fa_icon": "far fa-file-code" } } }, diff --git a/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf index a4aaf9c4..ad97686a 100644 --- a/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf @@ -161,6 +161,10 @@ def validateInputParameters() { log.warn("No cosmic credentials were provided. Skipping COSMIC DB download from `FUSIONREPORT_DOWNLOAD`") } + if (params.starfusion_build && !params.fusion_annot_lib) { + error("No fusion annotation library provided. `STARFUSION_BUILD` is unable to run.") + } + } // diff --git a/workflows/build_references.nf b/workflows/build_references.nf index 4886a7bb..037e8705 100644 --- a/workflows/build_references.nf +++ b/workflows/build_references.nf @@ -68,7 +68,9 @@ workflow BUILD_REFERENCES { if (params.starfusion || params.all) { if (params.starfusion_build){ - STARFUSION_BUILD( ENSEMBL_DOWNLOAD.out.primary_assembly, ENSEMBL_DOWNLOAD.out.gtf ) + val_species = Channel.value("human") + ch_fusion_annot_lib = params.fusion_annot_lib + STARFUSION_BUILD( ENSEMBL_DOWNLOAD.out.primary_assembly, ENSEMBL_DOWNLOAD.out.gtf, ch_fusion_annot_lib, val_species) } else { STARFUSION_DOWNLOAD() } From 9dc94cbd968bf00b72501bc07b57b5c6d316a91c Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 11 Dec 2024 15:19:43 +0000 Subject: [PATCH 11/24] docs: add param type and where to get it --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 2d148cf9..32ccc1d0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -30,7 +30,7 @@ params { read_length = 100 starfusion_build = true genomes = [:] - fusion_annot_lib = null + fusion_annot_lib = null // path to dat.gz CTAT genome lib (e.g. https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh38_gencode_v44_CTAT_lib_Oct292023.plug-n-play.tar.gz) // Genomes options fasta = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.all.fa" From d2a7bd4e75746efeb8c193a8836792648e7fc7e7 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 11 Dec 2024 15:34:31 +0000 Subject: [PATCH 12/24] test: run starfusion in test_build --- conf/test_build.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/conf/test_build.config b/conf/test_build.config index 616d734f..2449ee9a 100644 --- a/conf/test_build.config +++ b/conf/test_build.config @@ -21,4 +21,7 @@ params { all = true skip_salmon_index = true + starfusion_build = true + fusion_annot_lib = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz' + } From cf71e6265f86a966f12a5487e5dfa32b988bda66 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 11 Dec 2024 15:34:58 +0000 Subject: [PATCH 13/24] refactor: add metamap to output --- modules/local/starfusion/build/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/starfusion/build/main.nf b/modules/local/starfusion/build/main.nf index 80b52911..f3014ca9 100644 --- a/modules/local/starfusion/build/main.nf +++ b/modules/local/starfusion/build/main.nf @@ -12,7 +12,7 @@ process STARFUSION_BUILD { val dfam_species output: - path "ctat_genome_lib_build_dir" , emit: reference + tuple val(meta), path("ctat_genome_lib_build_dir"), emit: reference script: if (dfam_species != "human" && dfam_species != "mouse") { From ef36aedf24c9206bfe24763ee5bb4cf01aa97a69 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 11 Dec 2024 16:00:41 +0000 Subject: [PATCH 14/24] test: update tests --- .../local/starfusion/build/tests/main.nf.test | 77 +++- .../starfusion/build/tests/main.nf.test.snap | 388 +++++++----------- 2 files changed, 231 insertions(+), 234 deletions(-) diff --git a/modules/local/starfusion/build/tests/main.nf.test b/modules/local/starfusion/build/tests/main.nf.test index 75993c00..8eb1b6f6 100644 --- a/modules/local/starfusion/build/tests/main.nf.test +++ b/modules/local/starfusion/build/tests/main.nf.test @@ -26,7 +26,82 @@ nextflow_process { } then { - assert snapshot(process.out).match() + assert snapshot( + path(process.out.reference[0][1]).resolve("AnnotFilterRule.pm"), + path(process.out.reference[0][1]).resolve("blast_pairs.dat.gz").exists(), + path(process.out.reference[0][1]).resolve("blast_pairs.idx").exists(), + path(process.out.reference[0][1]).resolve("__chkpts/annotfiltrule_cp.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/blast_pairs.idx.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/cp_gene_blast_pairs.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/cp_pfam_dat.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/cp_ref_annot_cdna.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/fusion_annot_lib.cp.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/_fusion_annot_lib.idx.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/index_pfam_hits.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/index_ref_annot_cdna.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/makeblastdb.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/mm2_genome_idx.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/mm2.splice_bed.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/_prot_info_db.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.gene_spans.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.mini.sortu.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_genome_fai.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/ref_genome.fa.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/trans.blast.dat.cp.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/trans.blast.dat.index.ok"), + path(process.out.reference[0][1]).resolve("__chkpts/validate_ctat_genome_lib.ok"), + path(process.out.reference[0][1]).resolve("fusion_annot_lib.gz"), + path(process.out.reference[0][1]).resolve("fusion_annot_lib.idx").exists(), + path(process.out.reference[0][1]).resolve("pfam_domains.dbm").exists(), + path(process.out.reference[0][1]).resolve("PFAM.domtblout.dat.gz").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cdna.fa").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cdna.fa.idx").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cds").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cdsplus.fa").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.cdsplus.fa.idx").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.gtf"), + path(process.out.reference[0][1]).resolve("ref_annot.gtf.gene_spans").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.gtf.mini.sortu"), + path(process.out.reference[0][1]).resolve("ref_annot.gtf.mm2.splice.bed"), + path(process.out.reference[0][1]).resolve("ref_annot.pep").exists(), + path(process.out.reference[0][1]).resolve("ref_annot.prot_info.dbm").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.fai"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.mm2"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.ndb"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.nhr"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.nin").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.njs").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.not"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.nsq"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.ntf"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.nto"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/build.ok"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrLength.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrNameLength.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrName.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrStart.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/exonGeTrInfo.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/exonInfo.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/geneInfo.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/Genome"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/genomeParameters.txt").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/Log.out").exists(), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/SA"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/SAindex"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbInfo.txt"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbList.out.tab"), + path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/transcriptInfo.tab"), + path(process.out.reference[0][1]).resolve("trans.blast.align_coords.align_coords.dat"), + path(process.out.reference[0][1]).resolve("trans.blast.align_coords.align_coords.dbm").exists(), + path(process.out.reference[0][1]).resolve("trans.blast.dat.gz"), + process.out.versions + ).match() } } diff --git a/modules/local/starfusion/build/tests/main.nf.test.snap b/modules/local/starfusion/build/tests/main.nf.test.snap index 5e5e99ac..6ccede31 100644 --- a/modules/local/starfusion/build/tests/main.nf.test.snap +++ b/modules/local/starfusion/build/tests/main.nf.test.snap @@ -1,252 +1,174 @@ { "STARFUSION_BUILD - human - minigenome": { "content": [ - { - "0": [ - [ - "AnnotFilterRule.pm:md5,5391fcc58d9c71cd1f0e45668c5ec597", - "PFAM.domtblout.dat.gz:md5,6d8ba244e68072be23895ec22c8a8ce4", - [ - "_fusion_annot_lib.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "_prot_info_db.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "annotfiltrule_cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "blast_pairs.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "cp_gene_blast_pairs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "cp_pfam_dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "cp_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "fusion_annot_lib.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "index_pfam_hits.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "index_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "makeblastdb.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "mm2.splice_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "mm2_genome_idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.cdsplus.dfam_masked.fa.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.cdsplus.dfam_masked.fa.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf.gene_spans.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf.mini.sortu.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "trans.blast.dat.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "trans.blast.dat.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "validate_ctat_genome_lib.ok:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "blast_pairs.dat.gz:md5,06d08c55cfa099ccb56d312ca0970729", - "blast_pairs.idx:md5,884cc8c9886409f7ec0fdf8586ffb78d", - "fusion_annot_lib.gz:md5,23d82a5da81f91ca4e1ecd6481992a12", - "fusion_annot_lib.idx:md5,39ab4cd5eab589c5fce9b4bfed82c729", - "pfam_domains.dbm:md5,04c75694f8e36461b43d1c693fdf3414", - "ref_annot.cdna.fa:md5,fe801b66cfab298c575971401e93c18c", - "ref_annot.cdna.fa.idx:md5,3c19e33d5424174f1a3de1f21f6746ff", - "ref_annot.cds:md5,4376fb48bd8bf47f13854b3f6bba8297", - "ref_annot.cdsplus.fa:md5,71e98a5b5d8cf371ec4b5db32c19120e", - "ref_annot.cdsplus.fa.idx:md5,8cb47913bfb0d73d3f55cd5ceafa8bfe", - "ref_annot.gtf:md5,5ce8afe99ef3940a877a04caeacf9181", - "ref_annot.gtf.gene_spans:md5,f178dd54a1f81174dfd203e212937e63", - "ref_annot.gtf.mini.sortu:md5,1d29ccecdbb7b40a99c84a02d6c2c1be", - "ref_annot.gtf.mm2.splice.bed:md5,340585ea1843bf06bf555575ddecf28c", - "ref_annot.pep:md5,fe3f58b430d99c06c816521ac439c4df", - "ref_annot.prot_info.dbm:md5,6983b8dccd3e3f920566fab403a15090", - "ref_genome.fa:md5,ad699c56ed38566c7d3e9579486b1706", - "ref_genome.fa.fai:md5,e3f74a27219b33ae80fd5de5cbeaf32b", - "ref_genome.fa.mm2:md5,ce50979ea284748eb9f84ae88cfd930e", - "ref_genome.fa.ndb:md5,6ea574753b557610f62f6e4ab79e19f5", - "ref_genome.fa.nhr:md5,50f28dae71683c4394bfaf94a1ef4392", - "ref_genome.fa.nin:md5,167a13425cf4aac970ae936bd43cbd43", - "ref_genome.fa.njs:md5,7350b9e036410da25ed2c0d717649221", - "ref_genome.fa.not:md5,1e53e9d08f1d23af0299cfa87478a7bb", - "ref_genome.fa.nsq:md5,d2361e7871ce4cf51181c112a48f191b", - "ref_genome.fa.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb", - "ref_genome.fa.nto:md5,33cdeccccebe80329f1fdbee7f5874cb", - [ - "Genome:md5,9e3efdd0901cabb5a2d589664a63b372", - "Log.out:md5,944cceca8093617ab4ece780628459f5", - "SA:md5,7dd9083264be9c6a2194d990bc10d237", - "SAindex:md5,ac4711df685109e04356db9e9cb9fb7f", - "build.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "chrLength.txt:md5,e02cd536b7281b894246863b160d5d06", - "chrName.txt:md5,f4d0d6595f423084e6b9472e40dfe6e8", - "chrNameLength.txt:md5,07a67d7ac441d7d30d80840b0927e717", - "chrStart.txt:md5,e2031239a74fe5ee9051e9364e4f608a", - "exonGeTrInfo.tab:md5,3c35618d07a8e35a0f9108699fcdda42", - "exonInfo.tab:md5,bcbb3f32fa31fe504cc737f337ad341c", - "geneInfo.tab:md5,db5db4b6e003904e9908fce7c05f0125", - "genomeParameters.txt:md5,aabd07882af60af4bba0438a475e4e1a", - "sjdbInfo.txt:md5,e4cc1bbf8bd687cfc3d7c2c702e6def7", - "sjdbList.fromGTF.out.tab:md5,8f3e8604b00d4067e4eb80aa476a8113", - "sjdbList.out.tab:md5,5d78dd49d5db24ca2c056b7ebe5c2059", - "transcriptInfo.tab:md5,b758c0ccaddcf0453bab5905b3cec4a2" - ], - "trans.blast.align_coords.align_coords.dat:md5,9f6b7a75aea03a9671190be25ecdd4c2", - "trans.blast.align_coords.align_coords.dbm:md5,6000e63ebabe57652cfa8858ef6cc484", - "trans.blast.dat.gz:md5,85ba5ea96c566f751ad83a3e4b8ab128" - ] - ], - "reference": [ - [ - "AnnotFilterRule.pm:md5,5391fcc58d9c71cd1f0e45668c5ec597", - "PFAM.domtblout.dat.gz:md5,6d8ba244e68072be23895ec22c8a8ce4", - [ - "_fusion_annot_lib.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "_prot_info_db.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "annotfiltrule_cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "blast_pairs.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "cp_gene_blast_pairs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "cp_pfam_dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "cp_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "fusion_annot_lib.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "index_pfam_hits.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "index_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "makeblastdb.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "mm2.splice_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "mm2_genome_idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.cdsplus.dfam_masked.fa.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.cdsplus.dfam_masked.fa.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf.gene_spans.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf.mini.sortu.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "trans.blast.dat.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "trans.blast.dat.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "validate_ctat_genome_lib.ok:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "blast_pairs.dat.gz:md5,06d08c55cfa099ccb56d312ca0970729", - "blast_pairs.idx:md5,884cc8c9886409f7ec0fdf8586ffb78d", - "fusion_annot_lib.gz:md5,23d82a5da81f91ca4e1ecd6481992a12", - "fusion_annot_lib.idx:md5,39ab4cd5eab589c5fce9b4bfed82c729", - "pfam_domains.dbm:md5,04c75694f8e36461b43d1c693fdf3414", - "ref_annot.cdna.fa:md5,fe801b66cfab298c575971401e93c18c", - "ref_annot.cdna.fa.idx:md5,3c19e33d5424174f1a3de1f21f6746ff", - "ref_annot.cds:md5,4376fb48bd8bf47f13854b3f6bba8297", - "ref_annot.cdsplus.fa:md5,71e98a5b5d8cf371ec4b5db32c19120e", - "ref_annot.cdsplus.fa.idx:md5,8cb47913bfb0d73d3f55cd5ceafa8bfe", - "ref_annot.gtf:md5,5ce8afe99ef3940a877a04caeacf9181", - "ref_annot.gtf.gene_spans:md5,f178dd54a1f81174dfd203e212937e63", - "ref_annot.gtf.mini.sortu:md5,1d29ccecdbb7b40a99c84a02d6c2c1be", - "ref_annot.gtf.mm2.splice.bed:md5,340585ea1843bf06bf555575ddecf28c", - "ref_annot.pep:md5,fe3f58b430d99c06c816521ac439c4df", - "ref_annot.prot_info.dbm:md5,6983b8dccd3e3f920566fab403a15090", - "ref_genome.fa:md5,ad699c56ed38566c7d3e9579486b1706", - "ref_genome.fa.fai:md5,e3f74a27219b33ae80fd5de5cbeaf32b", - "ref_genome.fa.mm2:md5,ce50979ea284748eb9f84ae88cfd930e", - "ref_genome.fa.ndb:md5,6ea574753b557610f62f6e4ab79e19f5", - "ref_genome.fa.nhr:md5,50f28dae71683c4394bfaf94a1ef4392", - "ref_genome.fa.nin:md5,167a13425cf4aac970ae936bd43cbd43", - "ref_genome.fa.njs:md5,7350b9e036410da25ed2c0d717649221", - "ref_genome.fa.not:md5,1e53e9d08f1d23af0299cfa87478a7bb", - "ref_genome.fa.nsq:md5,d2361e7871ce4cf51181c112a48f191b", - "ref_genome.fa.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb", - "ref_genome.fa.nto:md5,33cdeccccebe80329f1fdbee7f5874cb", - [ - "Genome:md5,9e3efdd0901cabb5a2d589664a63b372", - "Log.out:md5,944cceca8093617ab4ece780628459f5", - "SA:md5,7dd9083264be9c6a2194d990bc10d237", - "SAindex:md5,ac4711df685109e04356db9e9cb9fb7f", - "build.ok:md5,d41d8cd98f00b204e9800998ecf8427e", - "chrLength.txt:md5,e02cd536b7281b894246863b160d5d06", - "chrName.txt:md5,f4d0d6595f423084e6b9472e40dfe6e8", - "chrNameLength.txt:md5,07a67d7ac441d7d30d80840b0927e717", - "chrStart.txt:md5,e2031239a74fe5ee9051e9364e4f608a", - "exonGeTrInfo.tab:md5,3c35618d07a8e35a0f9108699fcdda42", - "exonInfo.tab:md5,bcbb3f32fa31fe504cc737f337ad341c", - "geneInfo.tab:md5,db5db4b6e003904e9908fce7c05f0125", - "genomeParameters.txt:md5,aabd07882af60af4bba0438a475e4e1a", - "sjdbInfo.txt:md5,e4cc1bbf8bd687cfc3d7c2c702e6def7", - "sjdbList.fromGTF.out.tab:md5,8f3e8604b00d4067e4eb80aa476a8113", - "sjdbList.out.tab:md5,5d78dd49d5db24ca2c056b7ebe5c2059", - "transcriptInfo.tab:md5,b758c0ccaddcf0453bab5905b3cec4a2" - ], - "trans.blast.align_coords.align_coords.dat:md5,9f6b7a75aea03a9671190be25ecdd4c2", - "trans.blast.align_coords.align_coords.dbm:md5,6000e63ebabe57652cfa8858ef6cc484", - "trans.blast.dat.gz:md5,85ba5ea96c566f751ad83a3e4b8ab128" - ] - ] - } + "AnnotFilterRule.pm:md5,5391fcc58d9c71cd1f0e45668c5ec597", + true, + true, + "annotfiltrule_cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gene_blast_pairs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_pfam_dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_fusion_annot_lib.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_pfam_hits.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "makeblastdb.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2_genome_idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2.splice_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_prot_info_db.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "validate_ctat_genome_lib.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.gz:md5,23d82a5da81f91ca4e1ecd6481992a12", + true, + true, + true, + true, + true, + true, + true, + true, + "ref_annot.gtf:md5,5ce8afe99ef3940a877a04caeacf9181", + true, + "ref_annot.gtf.mini.sortu:md5,1d29ccecdbb7b40a99c84a02d6c2c1be", + "ref_annot.gtf.mm2.splice.bed:md5,340585ea1843bf06bf555575ddecf28c", + true, + true, + "ref_genome.fa:md5,ad699c56ed38566c7d3e9579486b1706", + "ref_genome.fa.fai:md5,e3f74a27219b33ae80fd5de5cbeaf32b", + "ref_genome.fa.mm2:md5,ce50979ea284748eb9f84ae88cfd930e", + "ref_genome.fa.ndb:md5,6ea574753b557610f62f6e4ab79e19f5", + "ref_genome.fa.nhr:md5,50f28dae71683c4394bfaf94a1ef4392", + true, + true, + "ref_genome.fa.not:md5,1e53e9d08f1d23af0299cfa87478a7bb", + "ref_genome.fa.nsq:md5,d2361e7871ce4cf51181c112a48f191b", + "ref_genome.fa.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb", + "ref_genome.fa.nto:md5,33cdeccccebe80329f1fdbee7f5874cb", + true, + "build.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,e02cd536b7281b894246863b160d5d06", + "chrNameLength.txt:md5,07a67d7ac441d7d30d80840b0927e717", + "chrName.txt:md5,f4d0d6595f423084e6b9472e40dfe6e8", + "chrStart.txt:md5,e2031239a74fe5ee9051e9364e4f608a", + "exonGeTrInfo.tab:md5,3c35618d07a8e35a0f9108699fcdda42", + "exonInfo.tab:md5,bcbb3f32fa31fe504cc737f337ad341c", + "geneInfo.tab:md5,db5db4b6e003904e9908fce7c05f0125", + "Genome:md5,9e3efdd0901cabb5a2d589664a63b372", + true, + true, + "SA:md5,7dd9083264be9c6a2194d990bc10d237", + "SAindex:md5,ac4711df685109e04356db9e9cb9fb7f", + "sjdbInfo.txt:md5,e4cc1bbf8bd687cfc3d7c2c702e6def7", + "sjdbList.fromGTF.out.tab:md5,8f3e8604b00d4067e4eb80aa476a8113", + "sjdbList.out.tab:md5,5d78dd49d5db24ca2c056b7ebe5c2059", + "transcriptInfo.tab:md5,b758c0ccaddcf0453bab5905b3cec4a2", + "trans.blast.align_coords.align_coords.dat:md5,9f6b7a75aea03a9671190be25ecdd4c2", + true, + "trans.blast.dat.gz:md5,85ba5ea96c566f751ad83a3e4b8ab128", + null ], "meta": { "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-10T22:11:39.460127957" + "timestamp": "2024-12-11T15:32:32.307712922" }, "STARFUSION_BUILD - human - minigenome - stub": { "content": [ { "0": [ [ - "AnnotFilterRule.pm:md5,d41d8cd98f00b204e9800998ecf8427e", - "PFAM.domtblout.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", - "__chkpts:md5,d41d8cd98f00b204e9800998ecf8427e", - "blast_pairs.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", - "blast_pairs.idx:md5,d41d8cd98f00b204e9800998ecf8427e", - "fusion_annot_lib.gz:md5,d41d8cd98f00b204e9800998ecf8427e", - "fusion_annot_lib.idx:md5,d41d8cd98f00b204e9800998ecf8427e", - "pfam_domains.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.cdna.fa:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.cdna.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.cds:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.cdsplus.fa:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.cdsplus.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf.gene_spans:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf.mini.sortu:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf.mm2.splice.bed:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.pep:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.prot_info.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.mm2:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.ndb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.nin:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.njs:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.not:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.ntf:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.nto:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.star.idx:md5,d41d8cd98f00b204e9800998ecf8427e", - "trans.blast.align_coords.align_coords.dat:md5,d41d8cd98f00b204e9800998ecf8427e", - "trans.blast.align_coords.align_coords.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", - "trans.blast.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + { + "id": "minigenome fasta" + }, + [ + "AnnotFilterRule.pm:md5,d41d8cd98f00b204e9800998ecf8427e", + "PFAM.domtblout.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "__chkpts:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "pfam_domains.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cds:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mm2.splice.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.pep:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.prot_info.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.mm2:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ndb:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nin:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.njs:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.not:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ntf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nto:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.star.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.align_coords.align_coords.dat:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.align_coords.align_coords.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ], "reference": [ [ - "AnnotFilterRule.pm:md5,d41d8cd98f00b204e9800998ecf8427e", - "PFAM.domtblout.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", - "__chkpts:md5,d41d8cd98f00b204e9800998ecf8427e", - "blast_pairs.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", - "blast_pairs.idx:md5,d41d8cd98f00b204e9800998ecf8427e", - "fusion_annot_lib.gz:md5,d41d8cd98f00b204e9800998ecf8427e", - "fusion_annot_lib.idx:md5,d41d8cd98f00b204e9800998ecf8427e", - "pfam_domains.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.cdna.fa:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.cdna.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.cds:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.cdsplus.fa:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.cdsplus.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf.gene_spans:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf.mini.sortu:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf.mm2.splice.bed:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.pep:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.prot_info.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.mm2:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.ndb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.nin:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.njs:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.not:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.ntf:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.nto:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.star.idx:md5,d41d8cd98f00b204e9800998ecf8427e", - "trans.blast.align_coords.align_coords.dat:md5,d41d8cd98f00b204e9800998ecf8427e", - "trans.blast.align_coords.align_coords.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", - "trans.blast.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + { + "id": "minigenome fasta" + }, + [ + "AnnotFilterRule.pm:md5,d41d8cd98f00b204e9800998ecf8427e", + "PFAM.domtblout.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "__chkpts:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "pfam_domains.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdna.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cds:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.fa.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mm2.splice.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.pep:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.prot_info.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.mm2:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ndb:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nin:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.njs:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.not:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ntf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nto:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.star.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.align_coords.align_coords.dat:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.align_coords.align_coords.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ] } @@ -255,6 +177,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-10T22:12:14.50896742" + "timestamp": "2024-12-11T15:05:16.428455255" } -} \ No newline at end of file +} From 1b649b46013bc48d09ddc630799b99c02d4f1eb2 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 11 Dec 2024 16:59:05 +0000 Subject: [PATCH 15/24] test: add fusion_annot_lib so that stub does not fail --- tests/test_stub.nf.test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_stub.nf.test b/tests/test_stub.nf.test index 41c7194f..b0601a9f 100644 --- a/tests/test_stub.nf.test +++ b/tests/test_stub.nf.test @@ -14,6 +14,7 @@ nextflow_pipeline { params { outdir = "$outputDir" fastp_trim = true + fusion_annot_lib = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz' } } @@ -44,6 +45,7 @@ nextflow_pipeline { when { params { outdir = "$outputDir" + fusion_annot_lib = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz' } } From ec373b317aadcfc7db831c632bc308569469400c Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Thu, 12 Dec 2024 14:18:50 +0000 Subject: [PATCH 16/24] fix: add dependencies versions and update container --- modules/local/starfusion/build/environment.yml | 2 +- modules/local/starfusion/build/main.nf | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/modules/local/starfusion/build/environment.yml b/modules/local/starfusion/build/environment.yml index d02ab2d6..eacee484 100644 --- a/modules/local/starfusion/build/environment.yml +++ b/modules/local/starfusion/build/environment.yml @@ -6,6 +6,6 @@ dependencies: - bioconda::hmmer=3.4 - bioconda::minimap2=2.28 - bioconda::samtools=1.6 - - bioconda::star=2.7.11a - bioconda::star-fusion=1.14.0 + - bioconda::star=2.7.11a - bioconda::trinity=2.8.5 diff --git a/modules/local/starfusion/build/main.nf b/modules/local/starfusion/build/main.nf index f3014ca9..6dc654d0 100644 --- a/modules/local/starfusion/build/main.nf +++ b/modules/local/starfusion/build/main.nf @@ -3,7 +3,7 @@ process STARFUSION_BUILD { label 'process_high' conda "${moduleDir}/environment.yml" - container 'community.wave.seqera.io/library/dfam_hmmer_minimap2_samtools_pruned:63e3d21ca68ea531' + container 'community.wave.seqera.io/library/dfam_hmmer_minimap2_samtools_pruned:bd39df228dad7086' input: tuple val(meta), path(fasta) @@ -15,9 +15,6 @@ process STARFUSION_BUILD { tuple val(meta), path("ctat_genome_lib_build_dir"), emit: reference script: - if (dfam_species != "human" && dfam_species != "mouse") { - error "Invalid species for --dfam_db. Only 'human' or 'mouse' are accepted. Provided: ${dfam_species}" - } def args = task.ext.args ?: '' """ prep_genome_lib.pl \\ From 99033b5d342a92362b6414bf7e03ee2260fd277b Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Thu, 12 Dec 2024 14:19:25 +0000 Subject: [PATCH 17/24] refactor: add readlength as ext.args --- conf/modules.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/modules.config b/conf/modules.config index abe0e79d..0eb1441d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -329,6 +329,7 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] + ext.args = "--max_readlength ${params.read_length}" } withName: 'STARFUSION_DOWNLOAD' { From 17dd8b4b4ede749cc9a66bad5846859b232bbf19 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Thu, 12 Dec 2024 15:03:59 +0000 Subject: [PATCH 18/24] test: update stub structure --- modules/local/starfusion/build/main.nf | 97 +++++++++++----- .../starfusion/build/tests/main.nf.test.snap | 106 ++++++++++++++++-- 2 files changed, 167 insertions(+), 36 deletions(-) diff --git a/modules/local/starfusion/build/main.nf b/modules/local/starfusion/build/main.nf index 6dc654d0..484389b1 100644 --- a/modules/local/starfusion/build/main.nf +++ b/modules/local/starfusion/build/main.nf @@ -34,41 +34,88 @@ process STARFUSION_BUILD { stub: """ - mkdir ctat_genome_lib_build_dir + mkdir -p ctat_genome_lib_build_dir + touch ctat_genome_lib_build_dir/AnnotFilterRule.pm - gzip -c /dev/null > ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz - touch ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans - touch ctat_genome_lib_build_dir/ref_genome.fa.mm2 - touch ctat_genome_lib_build_dir/ref_genome.fa.ntf gzip -c /dev/null > ctat_genome_lib_build_dir/blast_pairs.dat.gz - touch ctat_genome_lib_build_dir/ref_annot.cdna.fa - touch ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu - touch ctat_genome_lib_build_dir/ref_genome.fa.ndb - touch ctat_genome_lib_build_dir/ref_genome.fa.nto touch ctat_genome_lib_build_dir/blast_pairs.idx + + mkdir -p ctat_genome_lib_build_dir/__chkpts + touch ctat_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok + touch ctat_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok + touch ctat_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok + touch ctat_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok + touch ctat_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok + touch ctat_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok + touch ctat_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok + touch ctat_genome_lib_build_dir/__chkpts/index_pfam_hits.ok + touch ctat_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok + touch ctat_genome_lib_build_dir/__chkpts/makeblastdb.ok + touch ctat_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok + touch ctat_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok + touch ctat_genome_lib_build_dir/__chkpts/_prot_info_db.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_genome_fai.ok + touch ctat_genome_lib_build_dir/__chkpts/ref_genome.fa.ok + touch ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok + touch ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok + touch ctat_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok + + gzip -c /dev/null > ctat_genome_lib_build_dir/fusion_annot_lib.gz + touch ctat_genome_lib_build_dir/fusion_annot_lib.idx + touch ctat_genome_lib_build_dir/pfam_domains.dbm + gzip -c /dev/null > ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz + + touch ctat_genome_lib_build_dir/ref_annot.cdna.fa touch ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx - touch ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed - touch ctat_genome_lib_build_dir/ref_genome.fa.nhr - touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx - touch ctat_genome_lib_build_dir/__chkpts touch ctat_genome_lib_build_dir/ref_annot.cds - touch ctat_genome_lib_build_dir/ref_annot.pep - touch ctat_genome_lib_build_dir/ref_genome.fa.nin - touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat - gzip -c /dev/null > ctat_genome_lib_build_dir/fusion_annot_lib.gz touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa - touch ctat_genome_lib_build_dir/ref_annot.prot_info.dbm - touch ctat_genome_lib_build_dir/ref_genome.fa.njs - touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm - touch ctat_genome_lib_build_dir/fusion_annot_lib.idx touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx - touch ctat_genome_lib_build_dir/ref_genome.fa - touch ctat_genome_lib_build_dir/ref_genome.fa.not - gzip -c /dev/null > ctat_genome_lib_build_dir/trans.blast.dat.gz - touch ctat_genome_lib_build_dir/pfam_domains.dbm touch ctat_genome_lib_build_dir/ref_annot.gtf + touch ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans + touch ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu + touch ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed + touch ctat_genome_lib_build_dir/ref_annot.pep + touch ctat_genome_lib_build_dir/ref_annot.prot_info.dbm + + touch ctat_genome_lib_build_dir/ref_genome.fa touch ctat_genome_lib_build_dir/ref_genome.fa.fai + touch ctat_genome_lib_build_dir/ref_genome.fa.mm2 + touch ctat_genome_lib_build_dir/ref_genome.fa.ndb + touch ctat_genome_lib_build_dir/ref_genome.fa.nhr + touch ctat_genome_lib_build_dir/ref_genome.fa.nin + touch ctat_genome_lib_build_dir/ref_genome.fa.njs + touch ctat_genome_lib_build_dir/ref_genome.fa.not touch ctat_genome_lib_build_dir/ref_genome.fa.nsq + touch ctat_genome_lib_build_dir/ref_genome.fa.ntf + touch ctat_genome_lib_build_dir/ref_genome.fa.nto + + mkdir -p ctat_genome_lib_build_dir/ref_genome.fa.star.idx + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Genome + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SA + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab + touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab + + touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat + touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm + gzip -c /dev/null > ctat_genome_lib_build_dir/trans.blast.dat.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/starfusion/build/tests/main.nf.test.snap b/modules/local/starfusion/build/tests/main.nf.test.snap index 6ccede31..e50db541 100644 --- a/modules/local/starfusion/build/tests/main.nf.test.snap +++ b/modules/local/starfusion/build/tests/main.nf.test.snap @@ -62,12 +62,12 @@ "exonGeTrInfo.tab:md5,3c35618d07a8e35a0f9108699fcdda42", "exonInfo.tab:md5,bcbb3f32fa31fe504cc737f337ad341c", "geneInfo.tab:md5,db5db4b6e003904e9908fce7c05f0125", - "Genome:md5,9e3efdd0901cabb5a2d589664a63b372", + "Genome:md5,21aac51c5062f73d6305ae77691f5d4e", true, true, - "SA:md5,7dd9083264be9c6a2194d990bc10d237", - "SAindex:md5,ac4711df685109e04356db9e9cb9fb7f", - "sjdbInfo.txt:md5,e4cc1bbf8bd687cfc3d7c2c702e6def7", + "SA:md5,4d6ee7c82328aa304300e51ab5677ff9", + "SAindex:md5,2d3498f7d84a69bbc65fe9de1887820c", + "sjdbInfo.txt:md5,0ad0c108e6d02ec541d6adeffbdea6fd", "sjdbList.fromGTF.out.tab:md5,8f3e8604b00d4067e4eb80aa476a8113", "sjdbList.out.tab:md5,5d78dd49d5db24ca2c056b7ebe5c2059", "transcriptInfo.tab:md5,b758c0ccaddcf0453bab5905b3cec4a2", @@ -80,7 +80,7 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-11T15:32:32.307712922" + "timestamp": "2024-12-12T14:57:13.528134845" }, "STARFUSION_BUILD - human - minigenome - stub": { "content": [ @@ -93,7 +93,31 @@ [ "AnnotFilterRule.pm:md5,d41d8cd98f00b204e9800998ecf8427e", "PFAM.domtblout.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", - "__chkpts:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "_fusion_annot_lib.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_prot_info_db.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "annotfiltrule_cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gene_blast_pairs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_pfam_dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_pfam_hits.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "makeblastdb.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2.splice_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2_genome_idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "validate_ctat_genome_lib.ok:md5,d41d8cd98f00b204e9800998ecf8427e" + ], "blast_pairs.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", "blast_pairs.idx:md5,d41d8cd98f00b204e9800998ecf8427e", "fusion_annot_lib.gz:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -121,7 +145,25 @@ "ref_genome.fa.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", "ref_genome.fa.ntf:md5,d41d8cd98f00b204e9800998ecf8427e", "ref_genome.fa.nto:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.star.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "build.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ], "trans.blast.align_coords.align_coords.dat:md5,d41d8cd98f00b204e9800998ecf8427e", "trans.blast.align_coords.align_coords.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", "trans.blast.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e" @@ -136,7 +178,31 @@ [ "AnnotFilterRule.pm:md5,d41d8cd98f00b204e9800998ecf8427e", "PFAM.domtblout.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", - "__chkpts:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "_fusion_annot_lib.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "_prot_info_db.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "annotfiltrule_cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "blast_pairs.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_gene_blast_pairs.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_pfam_dat.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "cp_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "fusion_annot_lib.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_pfam_hits.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "index_ref_annot_cdna.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "makeblastdb.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2.splice_bed.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "mm2_genome_idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.cdsplus.dfam_masked.fa.idx.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.gene_spans.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome_fai.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.cp.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.dat.index.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "validate_ctat_genome_lib.ok:md5,d41d8cd98f00b204e9800998ecf8427e" + ], "blast_pairs.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e", "blast_pairs.idx:md5,d41d8cd98f00b204e9800998ecf8427e", "fusion_annot_lib.gz:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -164,7 +230,25 @@ "ref_genome.fa.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", "ref_genome.fa.ntf:md5,d41d8cd98f00b204e9800998ecf8427e", "ref_genome.fa.nto:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.star.idx:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "build.ok:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ], "trans.blast.align_coords.align_coords.dat:md5,d41d8cd98f00b204e9800998ecf8427e", "trans.blast.align_coords.align_coords.dbm:md5,d41d8cd98f00b204e9800998ecf8427e", "trans.blast.dat.gz:md5,d41d8cd98f00b204e9800998ecf8427e" @@ -177,6 +261,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-11T15:05:16.428455255" + "timestamp": "2024-12-12T14:57:33.861849482" } -} +} \ No newline at end of file From f2b5ed59d611a9340225a1e436c4bf6ad35d393f Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:14:20 +0000 Subject: [PATCH 19/24] fix: update starfusion container declaration to latest nf-core standards --- modules/local/starfusion/build/main.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/local/starfusion/build/main.nf b/modules/local/starfusion/build/main.nf index 484389b1..becd98d7 100644 --- a/modules/local/starfusion/build/main.nf +++ b/modules/local/starfusion/build/main.nf @@ -3,7 +3,9 @@ process STARFUSION_BUILD { label 'process_high' conda "${moduleDir}/environment.yml" - container 'community.wave.seqera.io/library/dfam_hmmer_minimap2_samtools_pruned:bd39df228dad7086' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/09/0941210949303990f23c63831c7f23ceec099d52ffbe5f1ac2152964d328747e/data': + 'community.wave.seqera.io/library/dfam_hmmer_minimap2_samtools_pruned:bd39df228dad7086' }" input: tuple val(meta), path(fasta) From 00673c6af1d920721ff2177dff5fb13af444f52f Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Thu, 19 Dec 2024 11:14:31 -0300 Subject: [PATCH 20/24] Update conf/modules.config Co-authored-by: Annick Renevey <47788523+rannick@users.noreply.github.com> --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 5e5e0a01..f241f879 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -329,7 +329,7 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] - ext.args = "--max_readlength ${params.read_length}" + ext.args = "--max_readlength ${params.read_length} --human_gencode_filter" } withName: 'STARFUSION_DOWNLOAD' { From 2c01b3c2311069aa9fc91a50518be77d8526863e Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:07:15 +0100 Subject: [PATCH 21/24] use placeholder and add TODO for update --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 32ccc1d0..a7ab06b9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -30,7 +30,7 @@ params { read_length = 100 starfusion_build = true genomes = [:] - fusion_annot_lib = null // path to dat.gz CTAT genome lib (e.g. https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh38_gencode_v44_CTAT_lib_Oct292023.plug-n-play.tar.gz) + fusion_annot_lib = 'https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh38_gencode_v44_CTAT_lib_Oct292023.plug-n-play.tar.gz)' //TODO: update to latest version // Genomes options fasta = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.all.fa" From 1237fefd30a47b82ac614d278a5f11291173d80c Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:42:58 +0000 Subject: [PATCH 22/24] fix: add full path to module to avoid changing nf-tests --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index f241f879..dcad2f40 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -320,7 +320,7 @@ process { ] } - withName: 'STARFUSION_BUILD' { + withName: 'NFCORE_RNAFUSION:BUILD_REFERENCES:STARFUSION_BUILD' { cpus = { 24 * task.attempt } memory = { 100.GB * task.attempt } time = { 2.d * task.attempt } From 89b26494a46658a17f099b2c1f9fc30bcf54dc52 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:43:36 +0000 Subject: [PATCH 23/24] feat: use previous `.dat.gz` file as default --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 32ccc1d0..fabbbdba 100644 --- a/nextflow.config +++ b/nextflow.config @@ -30,7 +30,7 @@ params { read_length = 100 starfusion_build = true genomes = [:] - fusion_annot_lib = null // path to dat.gz CTAT genome lib (e.g. https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh38_gencode_v44_CTAT_lib_Oct292023.plug-n-play.tar.gz) + fusion_annot_lib = "https://github.com/FusionAnnotator/CTAT_HumanFusionLib/releases/download/v0.3.0/fusion_lib.Mar2021.dat.gz" // path to dat.gz CTAT genome lib // TODO: Update to latest with s3 link when available // Genomes options fasta = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.all.fa" From fb64ca6e557ca548eac8fce398c893b1f9c85ec6 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Thu, 19 Dec 2024 17:21:47 +0000 Subject: [PATCH 24/24] test: update snapshot --- .../local/starfusion/build/tests/main.nf.test.snap | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/local/starfusion/build/tests/main.nf.test.snap b/modules/local/starfusion/build/tests/main.nf.test.snap index e50db541..b08f2519 100644 --- a/modules/local/starfusion/build/tests/main.nf.test.snap +++ b/modules/local/starfusion/build/tests/main.nf.test.snap @@ -62,12 +62,12 @@ "exonGeTrInfo.tab:md5,3c35618d07a8e35a0f9108699fcdda42", "exonInfo.tab:md5,bcbb3f32fa31fe504cc737f337ad341c", "geneInfo.tab:md5,db5db4b6e003904e9908fce7c05f0125", - "Genome:md5,21aac51c5062f73d6305ae77691f5d4e", + "Genome:md5,9e3efdd0901cabb5a2d589664a63b372", true, true, - "SA:md5,4d6ee7c82328aa304300e51ab5677ff9", - "SAindex:md5,2d3498f7d84a69bbc65fe9de1887820c", - "sjdbInfo.txt:md5,0ad0c108e6d02ec541d6adeffbdea6fd", + "SA:md5,7dd9083264be9c6a2194d990bc10d237", + "SAindex:md5,ac4711df685109e04356db9e9cb9fb7f", + "sjdbInfo.txt:md5,e4cc1bbf8bd687cfc3d7c2c702e6def7", "sjdbList.fromGTF.out.tab:md5,8f3e8604b00d4067e4eb80aa476a8113", "sjdbList.out.tab:md5,5d78dd49d5db24ca2c056b7ebe5c2059", "transcriptInfo.tab:md5,b758c0ccaddcf0453bab5905b3cec4a2", @@ -77,10 +77,10 @@ null ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.2" + "nf-test": "0.9.2", + "nextflow": "24.10.3" }, - "timestamp": "2024-12-12T14:57:13.528134845" + "timestamp": "2024-12-19T17:03:12.812884291" }, "STARFUSION_BUILD - human - minigenome - stub": { "content": [