Skip to content

Commit

Permalink
Modify DAS_Tool input structure, bump version, switch to nf-test (#7163)
Browse files Browse the repository at this point in the history
* Update DAS_Tool and add nf-test tests

* Remove old tests

* Fix tags and remove dastool from pytest modules YAML

* Fix meta.yml

* Update snapshot

* Update test

* Add stubs to dastool/dastool, dastool/fastatocontig2bin, and metabat2/metabat2. Add tests for dastool stub runs. Add test for log file output from dastool/dastool

* Fix linting + change test ext args config to match documentation

* Move config inside tests as it's not needed for the stub

* Deprecate dastool/fastatoscaffolds2bin

* Fix test
  • Loading branch information
prototaxites authored Dec 11, 2024
1 parent 356e5e6 commit 6d73658
Show file tree
Hide file tree
Showing 23 changed files with 916 additions and 262 deletions.
2 changes: 1 addition & 1 deletion modules/nf-core/dastool/dastool/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ channels:
- conda-forge
- bioconda
dependencies:
- bioconda::das_tool=1.1.6
- bioconda::das_tool=1.1.7
36 changes: 28 additions & 8 deletions modules/nf-core/dastool/dastool/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@ process DASTOOL_DASTOOL {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/das_tool:1.1.6--r42hdfd78af_0' :
'biocontainers/das_tool:1.1.6--r42hdfd78af_0' }"
'https://depot.galaxyproject.org/singularity/das_tool:1.1.7--r43hdfd78af_0' :
'biocontainers/das_tool:1.1.7--r43hdfd78af_0' }"

input:
tuple val(meta), path(contigs), path(bins)
path(proteins)
tuple val(meta), path(contigs), path(bins), path(proteins)
path(db_directory)

output:
Expand All @@ -19,8 +18,8 @@ process DASTOOL_DASTOOL {
tuple val(meta), path("*.eval") , optional: true, emit: eval
tuple val(meta), path("*_DASTool_bins/*.fa") , optional: true, emit: bins
tuple val(meta), path("*.pdf") , optional: true, emit: pdfs
tuple val(meta), path("*.candidates.faa") , optional: true, emit: fasta_proteins
tuple val(meta), path("*.faa") , optional: true, emit: candidates_faa
tuple val(meta), path("*.candidates.faa") , optional: true, emit: candidates_faa
tuple val(meta), path("*_proteins.faa") , optional: true, emit: fasta_proteins
tuple val(meta), path("*.archaea.scg") , optional: true, emit: fasta_archaea_scg
tuple val(meta), path("*.bacteria.scg") , optional: true, emit: fasta_bacteria_scg
tuple val(meta), path("*.b6") , optional: true, emit: b6
Expand All @@ -38,9 +37,8 @@ process DASTOOL_DASTOOL {
def clean_contigs = contigs.toString() - ".gz"
def decompress_contigs = contigs.toString() == clean_contigs ? "" : "gunzip -q -f $contigs"
def clean_proteins = proteins ? proteins.toString() - ".gz" : ""
def decompress_proteins = proteins ? "gunzip -f $proteins" : ""
def decompress_proteins = (proteins && (proteins.toString() != clean_proteins)) ? "gunzip -f $proteins" : ""
def proteins_pred = proteins ? "-p $clean_proteins" : ""

"""
$decompress_proteins
$decompress_contigs
Expand All @@ -59,4 +57,26 @@ process DASTOOL_DASTOOL {
dastool: \$( DAS_Tool --version 2>&1 | grep "DAS Tool" | sed 's/DAS Tool //' )
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}_DASTool.log
touch ${prefix}_DASTool_summary.tsv
touch ${prefix}_DASTool_contig2bin.tsv
touch ${prefix}_allBins.eval
touch ${prefix}_proteins.faa.scg.candidates.faa
touch ${prefix}_proteins.faa
touch ${prefix}_proteins.faa.archaea.scg
touch ${prefix}_proteins.faa.bacteria.scg
touch ${prefix}_proteins.faa.findSCG.b6
touch ${prefix}.seqlength
mkdir ${prefix}_DASTool_bins
touch ${prefix}_DASTool_bins/${prefix}.1.fa
cat <<-END_VERSIONS > versions.yml
"${task.process}":
dastool: \$( DAS_Tool --version 2>&1 | grep "DAS Tool" | sed 's/DAS Tool //' )
END_VERSIONS
"""
}
11 changes: 6 additions & 5 deletions modules/nf-core/dastool/dastool/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,12 @@ input:
type: file
description: fasta file
pattern: "*.{fa.gz,fas.gz,fasta.gz}"

- bins:
type: file
description: "FastaToContig2Bin tabular file generated with dastool/fastatocontig2bin"
pattern: "*.tsv"
- - proteins:
- proteins:
type: file
description: Predicted proteins in prodigal fasta format (>scaffoldID_geneNo)
pattern: "*.{fa.gz,fas.gz,fasta.gz}"
Expand Down Expand Up @@ -110,20 +111,20 @@ output:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "*.candidates.faa":
- "*_proteins.faa":
type: file
description: Output from prodigal if not already supplied
pattern: "*.proteins.faa"
pattern: "*_proteins.faa"
- candidates_faa:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "*.faa":
- "*.candidates.faa":
type: file
description: FAA file
pattern: "*.faa"
pattern: "*.candidates.faa"
- fasta_archaea_scg:
- meta:
type: map
Expand Down
177 changes: 177 additions & 0 deletions modules/nf-core/dastool/dastool/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
nextflow_process {

name "Test Process DASTOOL_DASTOOL"
script "../main.nf"
process "DASTOOL_DASTOOL"

tag "modules"
tag "modules_nfcore"
tag "dastool"
tag "dastool/dastool"
tag "dastool/fastatocontig2bin"
tag "metabat2/metabat2"
tag "metabat2/jgisummarizebamcontigdepths"
tag "prodigal"

setup {
run("METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS") {
script "../../../metabat2/jgisummarizebamcontigdepths/main.nf"
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test1.sorted.bam", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/illumina/bam/test1.sorted.bam.bai", checkIfExists: true),
]
"""
}
}
run("METABAT2_METABAT2") {
script "../../../metabat2/metabat2/main.nf"
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ],
file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz", checkIfExists: true)
]).join(METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS.out.depth)
"""
}
}
run("DASTOOL_FASTATOCONTIG2BIN") {
script "../../fastatocontig2bin/main.nf"
process {
"""
input[0] = METABAT2_METABAT2.out.fasta.collect()
input[1] = "fa"
"""
}
}
run("PRODIGAL") {
script "../../../prodigal/main.nf"
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ],
file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz", checkIfExists: true)
])
input[1] = "gff"
"""
}
}
}

test("dastool dastool - bacteroides fragilis - noproteins") {

config "./nextflow.config"

when {
params {
module_args = '--write_bins --write_bin_evals'
}
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ],
file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz", checkIfExists: true)
]).join(DASTOOL_FASTATOCONTIG2BIN.out.fastatocontig2bin)
.map { meta, contigs, c2b -> [ meta, contigs, c2b, [] ]}
input[1] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert path(process.out.log.get(0).get(1)).text.contains("Dereplicating, aggregating, and scoring bins") },
{ assert path(process.out.fasta_proteins.get(0).get(1)).text.contains("MKIGIITICKVNNYGAELQAFATQKKLEQMGHNAEIINYLYYKDWHFKDTPLSQPFVPLD") },
{ assert snapshot(
process.out.summary,
process.out.contig2bin,
process.out.eval,
process.out.bins,
process.out.pdfs,
process.out.candidates_faa,
process.out.fasta_archaea_scg,
process.out.fasta_bacteria_scg,
process.out.b6,
process.out.seqlength,
process.out.versions
).match() }
)
}

}

test("dastool dastool - bacteroides fragilis - proteins") {

config "./nextflow.config"

when {
params {
module_args = '--write_bins --write_bin_evals'
}
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ],
file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz", checkIfExists: true)
])
.join(DASTOOL_FASTATOCONTIG2BIN.out.fastatocontig2bin)
.join(PRODIGAL.out.amino_acid_fasta)
input[1] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert path(process.out.log.get(0).get(1)).text.contains("Dereplicating, aggregating, and scoring bins") },
{ assert snapshot(
process.out.summary,
process.out.contig2bin,
process.out.eval,
process.out.bins,
process.out.pdfs,
process.out.fasta_proteins,
process.out.candidates_faa,
process.out.fasta_archaea_scg,
process.out.fasta_bacteria_scg,
process.out.b6,
process.out.seqlength,
process.out.versions
).match() }
)
}

}

test("dastool dastool - bacteroides fragilis - stub") {

options "-stub"

when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ],
file(params.modules_testdata_base_path + "genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz", checkIfExists: true)
])
.join(DASTOOL_FASTATOCONTIG2BIN.out.fastatocontig2bin)
.join(PRODIGAL.out.amino_acid_fasta)
input[1] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}
}

Loading

0 comments on commit 6d73658

Please sign in to comment.