From 2ebe102cb0503c918beed02c6b0d4146080a33d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20B=C3=A4uerle?= <45968370+famosab@users.noreply.github.com> Date: Mon, 2 Dec 2024 23:59:18 +0100 Subject: [PATCH] Add module `muse/sump` (#5629) * start work on muse/sump * update to include tbi * uodate snap * update paths * prettier * exclude conda test * change to bioconda * update meta and snap * add conda test * update tests * change ext args to new convention * update containers and version extraction and add gzip * add bgzip * Update modules/nf-core/muse/sump/meta.yml Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> * update test * finalize bgzip and specify tests * fix meta --------- Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> --- modules/nf-core/muse/sump/environment.yml | 6 ++ modules/nf-core/muse/sump/main.nf | 54 +++++++++++++ modules/nf-core/muse/sump/meta.yml | 67 ++++++++++++++++ modules/nf-core/muse/sump/tests/main.nf.test | 78 +++++++++++++++++++ .../nf-core/muse/sump/tests/main.nf.test.snap | 49 ++++++++++++ .../nf-core/muse/sump/tests/nextflow.config | 12 +++ modules/nf-core/muse/sump/tests/tags.yml | 2 + 7 files changed, 268 insertions(+) create mode 100644 modules/nf-core/muse/sump/environment.yml create mode 100644 modules/nf-core/muse/sump/main.nf create mode 100644 modules/nf-core/muse/sump/meta.yml create mode 100644 modules/nf-core/muse/sump/tests/main.nf.test create mode 100644 modules/nf-core/muse/sump/tests/main.nf.test.snap create mode 100644 modules/nf-core/muse/sump/tests/nextflow.config create mode 100644 modules/nf-core/muse/sump/tests/tags.yml diff --git a/modules/nf-core/muse/sump/environment.yml b/modules/nf-core/muse/sump/environment.yml new file mode 100644 index 00000000000..4c481284eae --- /dev/null +++ b/modules/nf-core/muse/sump/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::muse=2.1.2 + - bioconda::tabix=1.11 # needed for bgzip diff --git a/modules/nf-core/muse/sump/main.nf b/modules/nf-core/muse/sump/main.nf new file mode 100644 index 00000000000..1980003b842 --- /dev/null +++ b/modules/nf-core/muse/sump/main.nf @@ -0,0 +1,54 @@ +process MUSE_SUMP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/35/3567f6162ff718c648175c5e7b5f848eaa27811d0cb3ad53def8f0a1c8893efa/data': + 'community.wave.seqera.io/library/muse_tabix:df58ca78bd9447b7' }" + + input: + tuple val(meta), path(muse_call_txt) + tuple val(meta2), path(ref_vcf), path(ref_vcf_tbi) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' // -G for WGS data and -E for WES data + def args2 = task.ext.args2 ?: '' // args for bgzip + def prefix = task.ext.prefix ?: "${meta.id}" + """ + MuSE \\ + sump \\ + $args \\ + -I $muse_call_txt \\ + -n $task.cpus \\ + -D $ref_vcf \\ + -O ${prefix}.vcf + + bgzip $args2 --threads $task.cpus ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MuSE: \$( MuSE --version | sed -e "s/MuSE, version //g" ) + bgzip: \$( bgzip --version | sed -e "s/bgzip (htslib) //g" ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MuSE: \$( MuSE --version | sed -e "s/MuSE, version //g" ) + bgzip: \$( bgzip --version | sed -e "s/bgzip (htslib) //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/muse/sump/meta.yml b/modules/nf-core/muse/sump/meta.yml new file mode 100644 index 00000000000..9e938e11140 --- /dev/null +++ b/modules/nf-core/muse/sump/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "muse_sump" +description: Computes tier-based cutoffs from a sample-specific error model which + is generated by muse/call and reports the finalized variants +keywords: + - variant calling + - somatic + - wgs + - wxs + - vcf +tools: + - "MuSE": + description: "Somatic point mutation caller based on Markov substitution model + for molecular evolution" + homepage: "https://bioinformatics.mdanderson.org/public-software/muse/" + documentation: "https://github.com/wwylab/MuSE" + tool_dev_url: "https://github.com/wwylab/MuSE" + doi: "10.1101/gr.278456.123" + licence: ["https://github.com/danielfan/MuSE/blob/master/LICENSE"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - muse_call_txt: + type: file + description: single input file generated by 'MuSE call' + pattern: "*.MuSE.txt" + - - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. `[ id:'test' ]` + - ref_vcf: + type: file + description: | + dbSNP vcf file that should be bgzip compressed, tabix indexed and + based on the same reference genome used in 'MuSE call' + pattern: ".vcf.gz" + - ref_vcf_tbi: + type: file + description: Tabix index for the dbSNP vcf file + pattern: ".vcf.gz.tbi" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + pattern: "*.vcf" + - "*.vcf.gz": + type: map + description: bgzipped vcf file with called variants + pattern: "*.vcf" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@famosab" +maintainers: + - "@famosab" diff --git a/modules/nf-core/muse/sump/tests/main.nf.test b/modules/nf-core/muse/sump/tests/main.nf.test new file mode 100644 index 00000000000..47640d3bd8c --- /dev/null +++ b/modules/nf-core/muse/sump/tests/main.nf.test @@ -0,0 +1,78 @@ +nextflow_process { + + name "Test Process MUSE_SUMP" + script "../main.nf" + process "MUSE_SUMP" + + tag "modules" + tag "modules_nfcore" + tag "muse" + tag "muse/sump" + + test("human - txt") { + + config "./nextflow.config" + + when { + params { + module_args = '-E' + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/muse/MuSE-call.chr21.hg38.paired_end.recal.MuSE.txt', checkIfExists: true) + ] + input[1] = [ + [ id:'reference' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + path(process.out.vcf.get(0).get(1)).vcf.header.getColumnCount(), + path(process.out.vcf.get(0).get(1)).vcf.summary + ).match() + } + ) + } + + } + + test("human - txt - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/muse/MuSE-call.chr21.hg38.paired_end.recal.MuSE.txt', checkIfExists: true) + ] + input[1] = [ + [ id:'reference' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/muse/sump/tests/main.nf.test.snap b/modules/nf-core/muse/sump/tests/main.nf.test.snap new file mode 100644 index 00000000000..4da43561484 --- /dev/null +++ b/modules/nf-core/muse/sump/tests/main.nf.test.snap @@ -0,0 +1,49 @@ +{ + "human - txt - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,4d667cb8f2f96c5705b1e44affdd7330" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,4d667cb8f2f96c5705b1e44affdd7330" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-29T14:52:29.011666" + }, + "human - txt": { + "content": [ + [ + "versions.yml:md5,4d667cb8f2f96c5705b1e44affdd7330" + ], + 11, + "VcfFile [chromosomes=[], sampleCount=2, variantCount=0, phased=true, phasedAutodetect=true]" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-02T16:29:22.506313" + } +} \ No newline at end of file diff --git a/modules/nf-core/muse/sump/tests/nextflow.config b/modules/nf-core/muse/sump/tests/nextflow.config new file mode 100644 index 00000000000..6d29ad9187b --- /dev/null +++ b/modules/nf-core/muse/sump/tests/nextflow.config @@ -0,0 +1,12 @@ +process { + + withName: 'MUSE_SUMP' { + ext.args = params.module_args + stageInMode = 'copy' + } + + withName: 'MUSE_SUMP_WGS' { + ext.args = '-G' + } + +} diff --git a/modules/nf-core/muse/sump/tests/tags.yml b/modules/nf-core/muse/sump/tests/tags.yml new file mode 100644 index 00000000000..6c833ca13ca --- /dev/null +++ b/modules/nf-core/muse/sump/tests/tags.yml @@ -0,0 +1,2 @@ +muse/sump: + - "modules/nf-core/muse/sump/**"