Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nf-test to STARFUSION_BUILD and refactor module #583

Merged
merged 27 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
032583a
fix: fix container versions
atrigila Dec 10, 2024
6dcac4f
test: add nf-test initial draft
atrigila Dec 10, 2024
58aab76
fix: simplify starfusion module
atrigila Dec 10, 2024
7ff8be1
tests: update nf-test starfusion
atrigila Dec 10, 2024
0770939
refactor: add stubs and ext.args
atrigila Dec 10, 2024
5b0b9e1
docs: update meta.yml
atrigila Dec 10, 2024
572e8df
test: update snapshots
atrigila Dec 10, 2024
5f6857e
refactor: revert add nf-test initial draft
atrigila Dec 10, 2024
c257c62
fix: add versions to dependencies
atrigila Dec 11, 2024
f2df932
feat: add fusion annot lib as an external param
atrigila Dec 11, 2024
9dc94cb
docs: add param type and where to get it
atrigila Dec 11, 2024
d2a7bd4
test: run starfusion in test_build
atrigila Dec 11, 2024
cf71e62
refactor: add metamap to output
atrigila Dec 11, 2024
ef36aed
test: update tests
atrigila Dec 11, 2024
1b649b4
test: add fusion_annot_lib so that stub does not fail
atrigila Dec 11, 2024
ec373b3
fix: add dependencies versions and update container
atrigila Dec 12, 2024
99033b5
refactor: add readlength as ext.args
atrigila Dec 12, 2024
17dd8b4
test: update stub structure
atrigila Dec 12, 2024
b6db271
Merge branch 'dev' into fusioninspector
rannick Dec 18, 2024
f2b5ed5
fix: update starfusion container declaration to latest nf-core standards
atrigila Dec 18, 2024
0596694
Merge branch 'dev' into fusioninspector
rannick Dec 19, 2024
00673c6
Update conf/modules.config
atrigila Dec 19, 2024
2c01b3c
use placeholder and add TODO for update
rannick Dec 19, 2024
1237fef
fix: add full path to module to avoid changing nf-tests
atrigila Dec 19, 2024
89b2649
feat: use previous `.dat.gz` file as default
atrigila Dec 19, 2024
4d9b7c8
Merge branch 'fusioninspector' of https://github.com/atrigila/rnafusi…
atrigila Dec 19, 2024
fb64ca6
test: update snapshot
atrigila Dec 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ process {
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
ext.args = "--max_readlength ${params.read_length}"
atrigila marked this conversation as resolved.
Show resolved Hide resolved
}

withName: 'STARFUSION_DOWNLOAD' {
Expand Down
3 changes: 3 additions & 0 deletions conf/test_build.config
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,7 @@ params {
all = true

skip_salmon_index = true
starfusion_build = true
fusion_annot_lib = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz'

}
111 changes: 92 additions & 19 deletions modules/local/starfusion/build/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
process STARFUSION_BUILD {
tag 'star-fusion'
tag "$meta.id"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
Expand All @@ -9,30 +10,22 @@ process STARFUSION_BUILD {
input:
tuple val(meta), path(fasta)
tuple val(meta2), path(gtf)
path fusion_annot_lib
val dfam_species

output:
path "*" , emit: reference
tuple val(meta), path("ctat_genome_lib_build_dir"), emit: reference

script:
def args = task.ext.args ?: ''
"""
export TMPDIR=/tmp
wget http://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam34.0/Pfam-A.hmm.gz --no-check-certificate
wget https://github.com/FusionAnnotator/CTAT_HumanFusionLib/releases/download/v0.3.0/fusion_lib.Mar2021.dat.gz -O CTAT_HumanFusionLib_Mar2021.dat.gz --no-check-certificate
wget https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/AnnotFilterRule.pm -O AnnotFilterRule.pm --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3f --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3i --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3m --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3p --no-check-certificate
gunzip Pfam-A.hmm.gz && hmmpress Pfam-A.hmm
prep_genome_lib.pl \\
--genome_fa $fasta \\
--gtf $gtf \\
--annot_filter_rule AnnotFilterRule.pm \\
--fusion_annot_lib CTAT_HumanFusionLib_Mar2021.dat.gz \\
--pfam_db Pfam-A.hmm \\
--dfam_db homo_sapiens_dfam.hmm \\
--max_readlength $params.read_length \\
--dfam_db ${dfam_species} \\
--pfam_db current \\
--fusion_annot_lib $fusion_annot_lib \\
${args} \\
atrigila marked this conversation as resolved.
Show resolved Hide resolved
--CPU $task.cpus

cat <<-END_VERSIONS > versions.yml
Expand All @@ -43,8 +36,88 @@ process STARFUSION_BUILD {

stub:
"""
mkdir ctat_genome_lib_build_dir
touch ref_annot.cdna.fa
mkdir -p ctat_genome_lib_build_dir

touch ctat_genome_lib_build_dir/AnnotFilterRule.pm
gzip -c /dev/null > ctat_genome_lib_build_dir/blast_pairs.dat.gz
touch ctat_genome_lib_build_dir/blast_pairs.idx

mkdir -p ctat_genome_lib_build_dir/__chkpts
touch ctat_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok
touch ctat_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok
touch ctat_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok
touch ctat_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok
touch ctat_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok
touch ctat_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok
touch ctat_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok
touch ctat_genome_lib_build_dir/__chkpts/index_pfam_hits.ok
touch ctat_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok
touch ctat_genome_lib_build_dir/__chkpts/makeblastdb.ok
touch ctat_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok
touch ctat_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok
touch ctat_genome_lib_build_dir/__chkpts/_prot_info_db.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_genome_fai.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_genome.fa.ok
touch ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok
touch ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok
touch ctat_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok

gzip -c /dev/null > ctat_genome_lib_build_dir/fusion_annot_lib.gz
touch ctat_genome_lib_build_dir/fusion_annot_lib.idx
touch ctat_genome_lib_build_dir/pfam_domains.dbm
gzip -c /dev/null > ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz

touch ctat_genome_lib_build_dir/ref_annot.cdna.fa
touch ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx
touch ctat_genome_lib_build_dir/ref_annot.cds
touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa
touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx
touch ctat_genome_lib_build_dir/ref_annot.gtf
touch ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans
touch ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu
touch ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed
touch ctat_genome_lib_build_dir/ref_annot.pep
touch ctat_genome_lib_build_dir/ref_annot.prot_info.dbm

touch ctat_genome_lib_build_dir/ref_genome.fa
touch ctat_genome_lib_build_dir/ref_genome.fa.fai
touch ctat_genome_lib_build_dir/ref_genome.fa.mm2
touch ctat_genome_lib_build_dir/ref_genome.fa.ndb
touch ctat_genome_lib_build_dir/ref_genome.fa.nhr
touch ctat_genome_lib_build_dir/ref_genome.fa.nin
touch ctat_genome_lib_build_dir/ref_genome.fa.njs
touch ctat_genome_lib_build_dir/ref_genome.fa.not
touch ctat_genome_lib_build_dir/ref_genome.fa.nsq
touch ctat_genome_lib_build_dir/ref_genome.fa.ntf
touch ctat_genome_lib_build_dir/ref_genome.fa.nto

mkdir -p ctat_genome_lib_build_dir/ref_genome.fa.star.idx
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Genome
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SA
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab

touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat
touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm
gzip -c /dev/null > ctat_genome_lib_build_dir/trans.blast.dat.gz

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
11 changes: 9 additions & 2 deletions modules/local/starfusion/build/meta.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: starfusion_downloadgenome
name: starfusion_build
description: Download STAR-fusion genome resource required to run STAR-Fusion caller
keywords:
- downoad
- download
tools:
- star-fusion:
description: Fusion calling algorithm for RNAseq data
Expand All @@ -20,6 +20,13 @@ input:
type: file
description: genome gtf file
pattern: "*.{gtf}"
- fusion_annot_lib:
type: file
description: Fusion annotation library (key/val pairs, tab-delimited).
pattern: "*.dat.gz"
- dfam_species:
type: string
description: DNA transposable element database (Dfam.hmm), required for repeat masking. Only 'human' or 'mouse' are accepted (will automatically pull the resources from dfam).

output:
- reference:
Expand Down
138 changes: 138 additions & 0 deletions modules/local/starfusion/build/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
nextflow_process {

name "Test Process STARFUSION_BUILD"
script "../main.nf"
process "STARFUSION_BUILD"

test("STARFUSION_BUILD - human - minigenome") {

when {
process {
"""
input[0] = [
[ id:'minigenome fasta' ],
file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa")
]
input[1] = [
[ id:'minigenome gtf' ],
file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf")
]

input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz")
input [3] = "human"

"""
}
}

then {
assert snapshot(
path(process.out.reference[0][1]).resolve("AnnotFilterRule.pm"),
path(process.out.reference[0][1]).resolve("blast_pairs.dat.gz").exists(),
path(process.out.reference[0][1]).resolve("blast_pairs.idx").exists(),
path(process.out.reference[0][1]).resolve("__chkpts/annotfiltrule_cp.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/blast_pairs.idx.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/cp_gene_blast_pairs.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/cp_pfam_dat.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/cp_ref_annot_cdna.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/fusion_annot_lib.cp.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/_fusion_annot_lib.idx.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/index_pfam_hits.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/index_ref_annot_cdna.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/makeblastdb.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/mm2_genome_idx.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/mm2.splice_bed.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/_prot_info_db.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.gene_spans.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.mini.sortu.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_genome_fai.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_genome.fa.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/trans.blast.dat.cp.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/trans.blast.dat.index.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/validate_ctat_genome_lib.ok"),
path(process.out.reference[0][1]).resolve("fusion_annot_lib.gz"),
path(process.out.reference[0][1]).resolve("fusion_annot_lib.idx").exists(),
path(process.out.reference[0][1]).resolve("pfam_domains.dbm").exists(),
path(process.out.reference[0][1]).resolve("PFAM.domtblout.dat.gz").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cdna.fa").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cdna.fa.idx").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cds").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cdsplus.fa").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cdsplus.fa.idx").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.gtf"),
path(process.out.reference[0][1]).resolve("ref_annot.gtf.gene_spans").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.gtf.mini.sortu"),
path(process.out.reference[0][1]).resolve("ref_annot.gtf.mm2.splice.bed"),
path(process.out.reference[0][1]).resolve("ref_annot.pep").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.prot_info.dbm").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.fai"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.mm2"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.ndb"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.nhr"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.nin").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.njs").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.not"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.nsq"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.ntf"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.nto"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/build.ok"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrLength.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrNameLength.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrName.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrStart.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/exonGeTrInfo.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/exonInfo.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/geneInfo.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/Genome"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/genomeParameters.txt").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/Log.out").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/SA"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/SAindex"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbInfo.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbList.out.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/transcriptInfo.tab"),
path(process.out.reference[0][1]).resolve("trans.blast.align_coords.align_coords.dat"),
path(process.out.reference[0][1]).resolve("trans.blast.align_coords.align_coords.dbm").exists(),
path(process.out.reference[0][1]).resolve("trans.blast.dat.gz"),
process.out.versions
).match()
}

}

test("STARFUSION_BUILD - human - minigenome - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'minigenome fasta' ],
file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa")
]
input[1] = [
[ id:'minigenome gtf' ],
file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf")
]

input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz")
input [3] = "human"

"""
}
}

then {
assert snapshot(process.out).match()
}

}

}
Loading
Loading