From 18398146e60fa2855c1d1ed31a3897ce0acac0c2 Mon Sep 17 00:00:00 2001 From: luisas Date: Thu, 5 Dec 2024 16:47:46 +0100 Subject: [PATCH 1/8] up --- .params_2024-12-02_11-11-58.json | 50 +++++++ assets/samplesheet.csv | 3 + assets/toolsheet.csv | 2 + bin/pdbs_to_fasta.py | 35 +++++ conf/test_pdb.config | 13 +- modules.json | 131 +++++++++++++----- modules/local/add_pdbheader.nf | 22 +++ modules/local/custom_pdbtofasta.nf | 42 ++++++ .../nf-core/fastavalidator/environment.yml | 7 + modules/nf-core/fastavalidator/main.nf | 62 +++++++++ modules/nf-core/fastavalidator/meta.yml | 61 ++++++++ .../nf-core/fastavalidator/tests/main.nf.test | 60 ++++++++ .../fastavalidator/tests/main.nf.test.snap | 76 ++++++++++ modules/nf-core/fastavalidator/tests/tags.yml | 2 + modules/nf-core/foldmason/easymsa/main.nf | 13 +- modules/nf-core/foldmason/easymsa/meta.yml | 9 ++ .../foldmason/easymsa/tests/main.nf.test | 89 +++++++----- .../foldmason/easymsa/tests/main.nf.test.snap | 83 +++++++++-- nextflow.config | 3 + nextflow_schema.json | 6 + subworkflows/local/align.nf | 40 +++++- subworkflows/local/compute_trees.nf | 35 ++++- subworkflows/local/preprocess_optionaldata.nf | 26 ++++ test.csv | 3 + test_merged.csv | 1 + test_merging.groovy | 129 +++++++++++++++++ udo systemctl enable docker | 21 +++ workflows/multiplesequencealign.nf | 21 ++- 28 files changed, 944 insertions(+), 101 deletions(-) create mode 100644 .params_2024-12-02_11-11-58.json create mode 100644 assets/samplesheet.csv create mode 100644 assets/toolsheet.csv create mode 100755 bin/pdbs_to_fasta.py create mode 100644 modules/local/add_pdbheader.nf create mode 100644 modules/local/custom_pdbtofasta.nf create mode 100644 modules/nf-core/fastavalidator/environment.yml create mode 100644 modules/nf-core/fastavalidator/main.nf create mode 100644 modules/nf-core/fastavalidator/meta.yml create mode 100644 modules/nf-core/fastavalidator/tests/main.nf.test create mode 100644 modules/nf-core/fastavalidator/tests/main.nf.test.snap create mode 100644 modules/nf-core/fastavalidator/tests/tags.yml create mode 100644 subworkflows/local/preprocess_optionaldata.nf create mode 100644 test.csv create mode 100644 test_merged.csv create mode 100644 test_merging.groovy create mode 100644 udo systemctl enable docker diff --git a/.params_2024-12-02_11-11-58.json b/.params_2024-12-02_11-11-58.json new file mode 100644 index 00000000..538e9479 --- /dev/null +++ b/.params_2024-12-02_11-11-58.json @@ -0,0 +1,50 @@ +{ + "input": "https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/samplesheet/v1.0/samplesheet_test_af2.csv", + "tools": "https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/toolsheet/v1.0/toolsheet_small.csv", + "templates_suffix": ".pdb", + "optional_data_dir": null, + "build_consensus": true, + "skip_stats": false, + "calc_sim": true, + "calc_seq_stats": true, + "extract_plddt": true, + "skip_eval": false, + "calc_sp": true, + "calc_tc": true, + "calc_irmsd": true, + "calc_gaps": true, + "calc_tcs": true, + "skip_compression": false, + "multiqc_config": null, + "multiqc_title": null, + "multiqc_logo": null, + "max_multiqc_email_size": "25.MB", + "multiqc_methods_description": null, + "skip_multiqc": false, + "outdir": "results", + "publish_dir_mode": "copy", + "email": null, + "email_on_fail": null, + "plaintext_email": false, + "monochrome_logs": false, + "hook_url": null, + "help": false, + "help_full": false, + "show_hidden": false, + "version": false, + "pipelines_testdata_base_path": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "shiny_app": "/home/luisasantus/Desktop/multiplesequencealign/bin/shiny_app", + "skip_shiny": false, + "shiny_trace_mode": "latest", + "config_profile_name": "Full test profile", + "config_profile_description": "Full test dataset to check pipeline function", + "custom_config_version": "master", + "custom_config_base": "https://raw.githubusercontent.com/nf-core/configs/master", + "config_profile_contact": null, + "config_profile_url": null, + "validate_params": true, + "igenomes_ignore": true, + "genomes": { + + } +} \ No newline at end of file diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv new file mode 100644 index 00000000..34b76618 --- /dev/null +++ b/assets/samplesheet.csv @@ -0,0 +1,3 @@ +id,fasta,reference,optional_data +seatoxin-ref,,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz +toxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin.ref, \ No newline at end of file diff --git a/assets/toolsheet.csv b/assets/toolsheet.csv new file mode 100644 index 00000000..2a779362 --- /dev/null +++ b/assets/toolsheet.csv @@ -0,0 +1,2 @@ +tree,args_tree,aligner,args_aligner +FAMSA,,FOLDMASON, \ No newline at end of file diff --git a/bin/pdbs_to_fasta.py b/bin/pdbs_to_fasta.py new file mode 100755 index 00000000..5ad66d23 --- /dev/null +++ b/bin/pdbs_to_fasta.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +# read in multiple pdb files, extract the sequence and write to a fasta file +import sys +from Bio import PDB +from Bio.SeqUtils import seq1 + + +def pdb_to_fasta(pdb_file): + """ + Extract the sequence from a PDB file and format it in FASTA. + """ + parser = PDB.PDBParser(QUIET=True) + structure = parser.get_structure(pdb_file, pdb_file) + fasta_sequences = [] + file_id = pdb_file.rsplit(".", 1)[0] # Use the file name without extension as ID + + for model in structure: + for chain in model: + sequence = [] + for residue in chain: + if PDB.is_aa(residue, standard=True): + sequence.append(seq1(residue.resname)) + if sequence: + fasta_sequences.append(f">{file_id}\n{''.join(sequence)}") + return "\n".join(fasta_sequences) + +def main(): + pdb_files = sys.argv[1:] + for pdb_file in pdb_files: + fasta = pdb_to_fasta(pdb_file) + print(f"{fasta}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/conf/test_pdb.config b/conf/test_pdb.config index f0eb3fd6..b958bd67 100644 --- a/conf/test_pdb.config +++ b/conf/test_pdb.config @@ -24,12 +24,13 @@ params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' - skip_stats = true - calc_irmsd = true - calc_sp = false - calc_tc = false - calc_gaps = false - calc_tcs = false + skip_preprocessing = false + skip_stats = true + calc_irmsd = true + calc_sp = false + calc_tc = false + calc_gaps = false + calc_tcs = false // Input data input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test.csv' diff --git a/modules.json b/modules.json index 3b7e8f53..02f0483e 100644 --- a/modules.json +++ b/modules.json @@ -8,139 +8,200 @@ "clustalo/align": { "branch": "master", "git_sha": "2a8530b890878747f5063a894bad9fb2abd5c071", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "clustalo/guidetree": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "csvtk/concat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "csvtk/join": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/csvtk/join/csvtk-join.diff" }, "famsa/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "famsa/guidetree": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, - "foldmason/easymsa": { + "fastavalidator": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "foldmason/easymsa": { + "branch": "master", + "git_sha": "8541ec46706d6610b032748fa51acf4b3094ced8", + "installed_by": [ + "modules" + ] }, "kalign/align": { "branch": "master", "git_sha": "cadb9bbfe56001ac421e0ee87808b0ccc754593a", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/kalign/align/kalign-align.diff" }, "learnmsa/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mafft": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mafft/align": { "branch": "master", "git_sha": "868cb0d7fc4862991fb7c2b4cd7289806cd53f81", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mafft/guidetree": { "branch": "master", "git_sha": "968b494e20f439a9ed3d23c89274e6a4a625eb92", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "magus/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mtmalign/align": { "branch": "master", "git_sha": "4eecd9a0c06fa508ae314c06ac952c161c019679", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "muscle5/super5": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pigz/compress": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pigz/uncompress": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/align": { "branch": "master", "git_sha": "66b22564bc1bc0db7292f2073cdef954ead773e7", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/alncompare": { "branch": "master", "git_sha": "ffa000ab3c33df25a165b5f9a039c4cbb665a77b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/consensus": { "branch": "master", "git_sha": "023e51187884ea6cc7290767486f551565f1b77a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/irmsd": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/regressive": { "branch": "master", "git_sha": "66b22564bc1bc0db7292f2073cdef954ead773e7", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/seqreformat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/tcs": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "upp/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -149,20 +210,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "1b89f75f1aa2021ec3360d0deccd0f6e97240551", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/local/add_pdbheader.nf b/modules/local/add_pdbheader.nf new file mode 100644 index 00000000..b3106c41 --- /dev/null +++ b/modules/local/add_pdbheader.nf @@ -0,0 +1,22 @@ +process ADD_PDBHEADER{ + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0': + 'biocontainers/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0' }" + + label "process_low" + + input: + tuple val(meta), path(pdb) + + output: + tuple val(meta), path("${pdb.baseName}.pdb"), emit: pdb + + script: + """ + export TEMP='./' + # Add the headers + mkdir pdbs_unprocessed + mv $pdb pdbs_unprocessed + t_coffee -other_pg extract_from_pdb -infile pdbs_unprocessed/$pdb > ${pdb.baseName}.pdb + """ +} \ No newline at end of file diff --git a/modules/local/custom_pdbtofasta.nf b/modules/local/custom_pdbtofasta.nf new file mode 100644 index 00000000..17bb256d --- /dev/null +++ b/modules/local/custom_pdbtofasta.nf @@ -0,0 +1,42 @@ +process CUSTOM_PDBSTOFASTA { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::python=3.11.0 conda-forge::biopython=1.80 conda-forge::pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' : + 'biocontainers/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' }" + + + input: + tuple val(meta), path(structures) + + output: + tuple val (meta), path("${prefix}.fa"), emit: fasta + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + """ + pdbs_to_fasta.py ${structures} > ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastavalidator/environment.yml b/modules/nf-core/fastavalidator/environment.yml new file mode 100644 index 00000000..44d55c1f --- /dev/null +++ b/modules/nf-core/fastavalidator/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::py_fasta_validator=0.6" diff --git a/modules/nf-core/fastavalidator/main.nf b/modules/nf-core/fastavalidator/main.nf new file mode 100644 index 00000000..ac5470fb --- /dev/null +++ b/modules/nf-core/fastavalidator/main.nf @@ -0,0 +1,62 @@ +process FASTAVALIDATOR { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/py_fasta_validator:0.6--py37h595c7a6_0': + 'biocontainers/py_fasta_validator:0.6--py37h595c7a6_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.success.log') , emit: success_log , optional: true + tuple val(meta), path('*.error.log') , emit: error_log , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + py_fasta_validator \\ + -f $fasta \\ + 2> "${prefix}.error.log" \\ + || echo "Errors from fasta_validate printed to ${prefix}.error.log" + + if [ \$(cat "${prefix}.error.log" | wc -l) -gt 0 ]; then + echo "Validation failed..." + + cat \\ + "${prefix}.error.log" + else + echo "Validation successful..." + + mv \\ + "${prefix}.error.log" \\ + fasta_validate.stderr + + echo "Validation successful..." \\ + > "${prefix}.success.log" + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + py_fasta_validator: \$(py_fasta_validator -v | sed 's/.* version //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "Validation successful..." \\ + > "${prefix}.success.log" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + py_fasta_validator: \$(py_fasta_validator -v | sed 's/.* version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastavalidator/meta.yml b/modules/nf-core/fastavalidator/meta.yml new file mode 100644 index 00000000..94198e62 --- /dev/null +++ b/modules/nf-core/fastavalidator/meta.yml @@ -0,0 +1,61 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "fastavalidator" +description: | + "Python C-extension for a simple validator for fasta files. The module emits the validated file or an + error log upon validation failure." +keywords: + - fasta + - validation + - genome +tools: + - fasta_validate: + description: | + "Python C-extension for a simple C code to validate a fasta file. It only checks a few things, + and by default only sets its response via the return code, + so you will need to check that!" + homepage: "https://github.com/linsalrob/py_fasta_validator" + documentation: "https://github.com/linsalrob/py_fasta_validator" + tool_dev_url: "https://github.com/linsalrob/py_fasta_validator" + doi: "10.5281/zenodo.5002710" + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test' ] + - fasta: + type: file + description: Input fasta file + pattern: "*.fasta" +output: + - success_log: + - meta: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test' ] + - "*.success.log": + type: file + description: Log file for successful validation + pattern: "*.success.log" + - error_log: + - meta: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test' ] + - "*.error.log": + type: file + description: Log file for failed validation + pattern: "*.error.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@gallvp" +maintainers: + - "@gallvp" diff --git a/modules/nf-core/fastavalidator/tests/main.nf.test b/modules/nf-core/fastavalidator/tests/main.nf.test new file mode 100644 index 00000000..39b00d8b --- /dev/null +++ b/modules/nf-core/fastavalidator/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process FASTAVALIDATOR" + script "../main.nf" + process "FASTAVALIDATOR" + + tag "modules" + tag "modules_nfcore" + tag "fastavalidator" + + test("sarscov2-fasta-valid") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.success_log != null }, + { assert process.out.error_log == [] }, + { assert path(process.out.success_log.get(0).get(1)).getText().contains("Validation successful...") } + ) + } + + } + + test("sarscov2-gff3-invalid") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.success_log == [] }, + { assert process.out.error_log != null }, + { assert path(process.out.error_log.get(0).get(1)).getText().contains("genome.gff3 does not start with a >") } + ) + } + + } +} diff --git a/modules/nf-core/fastavalidator/tests/main.nf.test.snap b/modules/nf-core/fastavalidator/tests/main.nf.test.snap new file mode 100644 index 00000000..382dee72 --- /dev/null +++ b/modules/nf-core/fastavalidator/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "sarscov2-fasta-valid": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.success.log:md5,b0b859eda1db5cd43915846e00ebc22c" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ], + "error_log": [ + + ], + "success_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.success.log:md5,b0b859eda1db5cd43915846e00ebc22c" + ] + ], + "versions": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ] + } + ], + "timestamp": "2023-11-28T11:23:25.106872" + }, + "sarscov2-gff3-invalid": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.error.log:md5,531d520c0e7767176f743f197f1f87b3" + ] + ], + "2": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ], + "error_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.error.log:md5,531d520c0e7767176f743f197f1f87b3" + ] + ], + "success_log": [ + + ], + "versions": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ] + } + ], + "timestamp": "2023-11-28T11:23:29.40324" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastavalidator/tests/tags.yml b/modules/nf-core/fastavalidator/tests/tags.yml new file mode 100644 index 00000000..c3c77576 --- /dev/null +++ b/modules/nf-core/fastavalidator/tests/tags.yml @@ -0,0 +1,2 @@ +fastavalidator: + - "modules/nf-core/fastavalidator/**" diff --git a/modules/nf-core/foldmason/easymsa/main.nf b/modules/nf-core/foldmason/easymsa/main.nf index a1e4e910..bb1737f0 100644 --- a/modules/nf-core/foldmason/easymsa/main.nf +++ b/modules/nf-core/foldmason/easymsa/main.nf @@ -3,10 +3,11 @@ process FOLDMASON_EASYMSA { label 'process_medium' conda "${moduleDir}/environment.yml" - container "community.wave.seqera.io/library/foldmason_pigz:97b3311addb0f4a7" + container "community.wave.seqera.io/library/foldmason_pigz:54849036d93c89ed" input: - tuple val(meta), path(pdbs) + tuple val(meta) , path(pdbs) + tuple val(meta2), path(tree) val(compress) output: @@ -20,13 +21,15 @@ process FOLDMASON_EASYMSA { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" + def options_tree = tree ? "--guide-tree $tree" : "" """ foldmason easy-msa \\ - $args \\ - --threads $task.cpus \\ ${pdbs} \\ ${prefix} \\ - tmp + tmp \\ + ${options_tree} \\ + $args \\ + --threads $task.cpus if ${compress}; then pigz -p ${task.cpus} ${prefix}_3di.fa diff --git a/modules/nf-core/foldmason/easymsa/meta.yml b/modules/nf-core/foldmason/easymsa/meta.yml index cc00f44c..140eb81e 100644 --- a/modules/nf-core/foldmason/easymsa/meta.yml +++ b/modules/nf-core/foldmason/easymsa/meta.yml @@ -26,6 +26,15 @@ input: type: file description: Input protein structures in PDB format. pattern: "*.{pdb,mmcif}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - tree: + type: file + description: Input guide tree in Newick format. + pattern: "*.{dnd,nwk}" - - compress: type: boolean description: Flag representing whether the output MSA should be compressed. diff --git a/modules/nf-core/foldmason/easymsa/tests/main.nf.test b/modules/nf-core/foldmason/easymsa/tests/main.nf.test index 663c6975..d6db9bca 100644 --- a/modules/nf-core/foldmason/easymsa/tests/main.nf.test +++ b/modules/nf-core/foldmason/easymsa/tests/main.nf.test @@ -9,20 +9,35 @@ nextflow_process { tag "foldmason" tag "foldmason/easymsa" tag "untar" + tag "famsa" + tag "famsa/guidetree" + + setup { + + run("UNTAR") { + script "../../../../../modules/nf-core/untar/main.nf" + process { + """ + archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/af2_structures/seatoxin-ref.tar.gz", checkIfExists: true) + input[0] = Channel.of(tuple([id:'test'], archive)) + """ + } + } - test("Test on seatoxin dataset - uncompressed") { - setup { - - run("UNTAR") { - script "../../../../../modules/nf-core/untar/main.nf" - process { - """ - archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) - input[0] = Channel.of(tuple([id:'test'], archive)) - """ - } + run("FAMSA_GUIDETREE") { + script "../../../../../modules/nf-core/famsa/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test_tree' ], // meta map + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ } } + } + + + test("Test on seatoxin dataset - uncompressed") { when { params { @@ -30,7 +45,8 @@ nextflow_process { process { """ input[0] = UNTAR.out.untar.map { meta,dir -> [meta, file(dir).listFiles().collect()]} - input[1] = false + input[1] = [[:],[]] + input[2] = false """ } } @@ -44,26 +60,37 @@ nextflow_process { } test("Test on seatoxin dataset - compressed") { - setup { - - run("UNTAR") { - script "../../../../../modules/nf-core/untar/main.nf" - process { - """ - archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) - input[0] = Channel.of(tuple([id:'test'], archive)) - """ - } + + when { + params { + } + process { + """ + input[0] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file(dir).listFiles().collect()]} + input[1] = [[:],[]] + input[2] = true + """ } } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Test on seatoxin dataset - guide_tree") { + when { params { } process { """ input[0] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file(dir).listFiles().collect()]} - input[1] = true + input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_tree'], tree]} + input[2] = false """ } } @@ -79,26 +106,14 @@ nextflow_process { test("Stub run") { options "-stub" - setup { - - run("UNTAR") { - script "../../../../../modules/nf-core/untar/main.nf" - process { - """ - archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) - input[0] = Channel.of(tuple([id:'test'], archive)) - """ - } - } - } - when { params { } process { """ input[0] = UNTAR.out.untar.map { meta,dir -> [meta, file(dir).listFiles().collect()]} - input[1] = false + input[1] = [[:],[]] + input[2] = false """ } } diff --git a/modules/nf-core/foldmason/easymsa/tests/main.nf.test.snap b/modules/nf-core/foldmason/easymsa/tests/main.nf.test.snap index 50af9b7f..384d2021 100644 --- a/modules/nf-core/foldmason/easymsa/tests/main.nf.test.snap +++ b/modules/nf-core/foldmason/easymsa/tests/main.nf.test.snap @@ -42,7 +42,11 @@ ] } ], - "timestamp": "2024-08-28T09:10:12.591561643" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T10:56:12.836231763" }, "Test on seatoxin dataset - uncompressed": { "content": [ @@ -52,7 +56,7 @@ { "id": "test" }, - "test_3di.fa:md5,ba1b6c8c5df11bdebfea12070bd9cb17" + "test_3di.fa:md5,e7da437c6ddb5ced60ff7c49b147a65d" ] ], "1": [ @@ -60,7 +64,7 @@ { "id": "test" }, - "test_aa.fa:md5,33e93479603115b46ef76af3f6a20cf1" + "test_aa.fa:md5,2d90cd080424db8024e6c404c1cfc0b4" ] ], "2": [ @@ -71,7 +75,7 @@ { "id": "test" }, - "test_3di.fa:md5,ba1b6c8c5df11bdebfea12070bd9cb17" + "test_3di.fa:md5,e7da437c6ddb5ced60ff7c49b147a65d" ] ], "msa_aa": [ @@ -79,7 +83,7 @@ { "id": "test" }, - "test_aa.fa:md5,33e93479603115b46ef76af3f6a20cf1" + "test_aa.fa:md5,2d90cd080424db8024e6c404c1cfc0b4" ] ], "versions": [ @@ -87,7 +91,11 @@ ] } ], - "timestamp": "2024-08-28T08:58:09.52040475" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T10:55:41.89060384" }, "Test on seatoxin dataset - compressed": { "content": [ @@ -97,7 +105,7 @@ { "id": "test" }, - "test_3di.fa.gz:md5,ba1b6c8c5df11bdebfea12070bd9cb17" + "test_3di.fa.gz:md5,e7da437c6ddb5ced60ff7c49b147a65d" ] ], "1": [ @@ -105,7 +113,7 @@ { "id": "test" }, - "test_aa.fa.gz:md5,33e93479603115b46ef76af3f6a20cf1" + "test_aa.fa.gz:md5,2d90cd080424db8024e6c404c1cfc0b4" ] ], "2": [ @@ -116,7 +124,7 @@ { "id": "test" }, - "test_3di.fa.gz:md5,ba1b6c8c5df11bdebfea12070bd9cb17" + "test_3di.fa.gz:md5,e7da437c6ddb5ced60ff7c49b147a65d" ] ], "msa_aa": [ @@ -124,7 +132,7 @@ { "id": "test" }, - "test_aa.fa.gz:md5,33e93479603115b46ef76af3f6a20cf1" + "test_aa.fa.gz:md5,2d90cd080424db8024e6c404c1cfc0b4" ] ], "versions": [ @@ -132,6 +140,59 @@ ] } ], - "timestamp": "2024-08-29T13:44:02.750191" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T10:55:52.135344443" + }, + "Test on seatoxin dataset - guide_tree": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_3di.fa:md5,46fa911158bb736c054dfad0378832b4" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_aa.fa:md5,7ada48f0152342787a46505b9e8a2fae" + ] + ], + "2": [ + "versions.yml:md5,da4694171d1b0bb9559f7049334126ed" + ], + "msa_3di": [ + [ + { + "id": "test" + }, + "test_3di.fa:md5,46fa911158bb736c054dfad0378832b4" + ] + ], + "msa_aa": [ + [ + { + "id": "test" + }, + "test_aa.fa:md5,7ada48f0152342787a46505b9e8a2fae" + ] + ], + "versions": [ + "versions.yml:md5,da4694171d1b0bb9559f7049334126ed" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T10:56:02.473496089" } } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index ddeae18f..b3ccbac4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,6 +15,9 @@ params { templates_suffix = ".pdb" optional_data_dir = null + // Preprocessing + skip_preprocessing = true + // Alignment build_consensus = false diff --git a/nextflow_schema.json b/nextflow_schema.json index a64a9b96..e74d0f99 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -48,6 +48,12 @@ "help_text": "This is a folder with all the optional data files to be used in the pipeline.", "fa_icon": "fas fa-folder-open" }, + "skip_preprocessing": { + "type": "boolean", + "description": "Skip the preprocessing step for the input files.", + "fa_icon": "fas fa-fast-forward", + "help_text": "Skip the preprocessing step and use the files directly." + }, "outdir": { "type": "string", "format": "directory-path", diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 9a98fb46..9107f6eb 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -54,6 +54,7 @@ workflow ALIGN { // ------------------------------------------------ COMPUTE_TREES ( ch_fastas, + ch_optional_data, ch_tools_split.tree.unique() ) trees = COMPUTE_TREES.out.trees @@ -68,9 +69,15 @@ workflow ALIGN { // ------------------------------------------------ // Add back trees to the fasta channel + // And prepare the inout channels for the aligners // ------------------------------------------------ + + // Tools that accept sequence and tree ch_fasta_tools .join(trees, by: [0], remainder:true ) + .filter{ + it[1] != null + } .map { metafasta_tree, metaalign, fasta, tree -> [ metafasta_tree + metaalign, fasta, tree ] @@ -95,6 +102,8 @@ workflow ALIGN { } .set { ch_fasta_trees } + + // tools that accept only optional data ch_optional_data.combine(ch_tools) .map { metadependency, template, dependency, metatree, metaalign -> @@ -102,10 +111,26 @@ workflow ALIGN { } .branch { mtmalign: it[0]["aligner"] == "MTMALIGN" - foldmason: it[0]["aligner"] == "FOLDMASON" } .set { ch_optional_data_tools } + + // tools that accept optional data and tree + ch_optional_data.combine(ch_tools) + .map { + metadependency, template, dependency, metatree, metaalign -> + [ metadependency+metatree , metaalign, template, dependency ] + } + .combine(trees, by: 0) + .map { + metratreeanddep, metaalign, template, dependency, tree -> + [ metratreeanddep+metaalign, tree, template, dependency ] + } + .branch { + foldmason: it[0]["aligner"] == "FOLDMASON" + } + .set { ch_optional_data_tools_tree } + // ------------------------------------------------ // Compute the alignments // ------------------------------------------------ @@ -283,7 +308,7 @@ workflow ALIGN { ch_msa = ch_msa.mix(UPP_ALIGN.out.alignment) ch_versions = ch_versions.mix(UPP_ALIGN.out.versions.first()) - // 2. SEQUENCE + STRUCTURE BASED + // // 2. SEQUENCE + STRUCTURE BASED if(params.templates_suffix == ".pdb"){ // ----------------- 3DCOFFEE ------------------ @@ -324,15 +349,20 @@ workflow ALIGN { ch_msa = ch_msa.mix(MTMALIGN_ALIGN.out.alignment) ch_versions = ch_versions.mix(MTMALIGN_ALIGN.out.versions.first()) - ch_optional_data_tools.foldmason + + // ----------------- FOLDMASON ------------------ + + ch_optional_data_tools_tree.foldmason .multiMap { - meta, template, dependency -> - pdbs: [ meta, dependency ] + meta, tree, template, dependency -> + pdbs: [ meta, dependency ] + trees: [ meta, tree ] } .set { ch_pdb_foldmason } FOLDMASON_EASYMSA ( ch_pdb_foldmason.pdbs, + ch_pdb_foldmason.trees, compress ) ch_msa = ch_msa.mix(FOLDMASON_EASYMSA.out.msa_aa) diff --git a/subworkflows/local/compute_trees.nf b/subworkflows/local/compute_trees.nf index edd497a8..406b91c4 100644 --- a/subworkflows/local/compute_trees.nf +++ b/subworkflows/local/compute_trees.nf @@ -6,19 +6,47 @@ include { FAMSA_GUIDETREE } from '../../modules/nf-core/famsa/guidetree/main' include { CLUSTALO_GUIDETREE } from '../../modules/nf-core/clustalo/guidetree/main' include { MAFFT_GUIDETREE } from '../../modules/nf-core/mafft/guidetree/main' +include { CUSTOM_PDBSTOFASTA } from '../../modules/local/custom_pdbtofasta.nf' +include { FASTAVALIDATOR } from '../../modules/nf-core/fastavalidator/main' + workflow COMPUTE_TREES { take: - ch_fastas //channel: [ meta, /path/to/file.fasta ] - tree_tools //channel: [ meta ] ( tools to be run: meta.tree, meta.args_tree ) + ch_fastas //channel: [ meta, /path/to/file.fasta ] + ch_optional_data //channel: [ meta, template, [ /path/to/file1, /path/to/file2, ... ] ] + tree_tools //channel: [ meta ] ( tools to be run: meta.tree, meta.args_tree ) main: ch_versions = Channel.empty() + ch_trees = Channel.empty() + + // + // For the inputs that only have optional data but not a fasta + // we need to generate the fasta file + // + ch_optional_data + .join(ch_fastas, remainder:true) + .filter{ + it[-1] == null + } + .map{ + it -> [it[0], it[2]] + }.set { ch_optional_data_no_fasta } + + CUSTOM_PDBSTOFASTA(ch_optional_data_no_fasta) + + if(!params.skip_preprocessing){ + FASTAVALIDATOR(CUSTOM_PDBSTOFASTA.out.fasta) + ch_versions = ch_versions.mix(FASTAVALIDATOR.out.versions) + } + ch_fastas_all = ch_fastas.mix(CUSTOM_PDBSTOFASTA.out.fasta) + + // // Render the required guide trees // - ch_fastas + ch_fastas_all .combine(tree_tools) .map { metafasta, fasta, metatree -> @@ -30,6 +58,7 @@ workflow COMPUTE_TREES { mafft: it[0]["tree"] == "MAFFT" } .set { ch_fastas_fortrees } + FAMSA_GUIDETREE (ch_fastas_fortrees.famsa) ch_trees = FAMSA_GUIDETREE.out.tree diff --git a/subworkflows/local/preprocess_optionaldata.nf b/subworkflows/local/preprocess_optionaldata.nf new file mode 100644 index 00000000..f352f541 --- /dev/null +++ b/subworkflows/local/preprocess_optionaldata.nf @@ -0,0 +1,26 @@ + +include { ADD_PDBHEADER } from '../../modules/local/add_pdbheader.nf' + +workflow PREPROCESS_OPTIONALDATA { + take: + ch_optional_data //channel: [ meta, [file1, ] ] + + main: + + ch_versions = Channel.empty() + ch_preprocessed_data = Channel.empty() + + if(params.templates_suffix == ".pdb"){ + // If the optional data is a pdb file, we can preprocess them to make + // them compatible with all the alignment tools + ADD_PDBHEADER(ch_optional_data.transpose()) + ADD_PDBHEADER.out.pdb + .groupTuple() + .set { ch_preprocessed_data } + } + + emit: + preprocessed_optionaldata = ch_preprocessed_data + versions = ch_versions + +} \ No newline at end of file diff --git a/test.csv b/test.csv new file mode 100644 index 00000000..32b52aab --- /dev/null +++ b/test.csv @@ -0,0 +1,3 @@ +id,fasta,length +1,seq1,100 +2,seq2,200 diff --git a/test_merged.csv b/test_merged.csv new file mode 100644 index 00000000..5859d151 --- /dev/null +++ b/test_merged.csv @@ -0,0 +1 @@ +[id,seqlength_mean,seqlength_median,seqlength_max,n_sequences,perc_sim,tree,args_tree,args_tree_clean,aligner,args_aligner,args_aligner_clean,total_gaps,avg_gaps,tc,sp,TCS, seatoxin-ref,47.0,48.0,49,5,46.20,CLUSTALO,,,REGRESSIVE,-reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln,-reg_-reg_method_famsa_msa_-reg_nseq_1000_-output_fasta_aln,20,4,46.9,81.0,835, seatoxin-ref,47.0,48.0,49,5,46.20,,,,MAFFT,,,35,7,59.2,86.3,811, seatoxin-ref,47.0,48.0,49,5,46.20,FAMSA,-gt upgma -medoidtree,-gt_upgma_-medoidtree,FAMSA,,,20,4,46.9,81.0,835, seatoxin-ref,47.0,48.0,49,5,46.20,,,,FAMSA,,,20,4,46.9,81.0,835, seatoxin-ref,47.0,48.0,49,5,46.20,,,,TCOFFEE,-output fasta_aln,-output_fasta_aln,20,4,51.0,81.9,827, seatoxin-ref,47.0,48.0,49,5,46.20,,,,CLUSTALO,,,20,4,51.0,81.9,835, seatoxin-ref,47.0,48.0,49,5,46.20,,,,MAGUS,,,35,7,55.1,85.4,813, seatoxin-ref,47.0,48.0,49,5,46.20,,,,REGRESSIVE,-reg_nseq 3 -reg -reg_method famsa_msa -output fasta_aln,-reg_nseq_3_-reg_-reg_method_famsa_msa_-output_fasta_aln,20,4,46.9,81.7,834, seatoxin-ref,47.0,48.0,49,5,46.20,,,null,CONSENSUS,,null,35,7,59.2,85.4,819, toxin-ref,63.5,61.0,74,20,44.45,CLUSTALO,,,REGRESSIVE,-reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln,-reg_-reg_method_famsa_msa_-reg_nseq_1000_-output_fasta_aln,330,16.5,61.0,89.3,802, toxin-ref,63.5,61.0,74,20,44.45,FAMSA,-gt upgma -medoidtree,-gt_upgma_-medoidtree,FAMSA,,,310,15.5,51.9,89.2,810, toxin-ref,63.5,61.0,74,20,44.45,,,,CLUSTALO,,,290,14.5,64.9,92.2,801, toxin-ref,63.5,61.0,74,20,44.45,,,,MAGUS,,,310,15.5,61.0,94.8,814, toxin-ref,63.5,61.0,74,20,44.45,,,,MAFFT,,,310,15.5,54.5,89.4,796, toxin-ref,63.5,61.0,74,20,44.45,,,,FAMSA,,,330,16.5,61.0,89.3,802, toxin-ref,63.5,61.0,74,20,44.45,,,,TCOFFEE,-output fasta_aln,-output_fasta_aln,310,15.5,54.5,92.5,826, toxin-ref,63.5,61.0,74,20,44.45,,,,REGRESSIVE,-reg_nseq 3 -reg -reg_method famsa_msa -output fasta_aln,-reg_nseq_3_-reg_-reg_method_famsa_msa_-output_fasta_aln,290,14.5,54.5,92.5,801, toxin-ref,63.5,61.0,74,20,44.45,,,,MUSCLE5,,,310,15.5,63.6,93.8,815, toxin-ref,63.5,61.0,74,20,44.45,,,null,CONSENSUS,,null,290,14.5,62.3,95.0,815] \ No newline at end of file diff --git a/test_merging.groovy b/test_merging.groovy new file mode 100644 index 00000000..5066bb6d --- /dev/null +++ b/test_merging.groovy @@ -0,0 +1,129 @@ +@Grab('com.xlson.groovycsv:groovycsv:1.3') +import static com.xlson.groovycsv.CsvParser.parseCsv + +def cleanTrace(trace) { + // Convert each row into a mutable map for dynamic property addition + def cleanedTrace = trace.collect { row -> + def mutableRow = row.toMap() + + // Extract the tag from the 'name' column using a regex pattern + def tagMatch = (mutableRow.name =~ /\((.*)\)/) + mutableRow.tag = tagMatch ? tagMatch[0][1] : null + + // Extract 'id' and 'args' from the tag safely + mutableRow.id = mutableRow.tag?.tokenize(' ')?.first() + mutableRow.args = mutableRow.tag?.split("args:")?.with { it.size() > 1 ? it[1].trim() : null } + + // Process the 'full_name' to extract workflow and process details + mutableRow.full_name = mutableRow.name.split(/\(/)?.first()?.trim() + def nameParts = mutableRow.full_name?.tokenize(':') ?: [] + mutableRow.process = nameParts ? nameParts.last() : null + mutableRow.subworkflow = nameParts.size() > 1 ? nameParts[-2] : null + + // Replace "null" strings with actual null values + mutableRow.each { key, value -> + if (value == 'null') { + mutableRow[key] = null + } + } + + return mutableRow + } + + // Return the cleaned trace + return cleanedTrace.findAll { it != null } +} + +// Utility function to convert time strings to minutes +def convertTime(String timeStr) { + def pattern = /((?\d+(\.\d+)?)h)?\s*((?\d+(\.\d+)?)m)?\s*((?\d+(\.\d+)?)s)?\s*((?\d+(\.\d+)?)ms)?/ + def matcher = timeStr.trim() =~ pattern + + if (!matcher.matches()) { + throw new IllegalArgumentException("Time string is not in the correct format: $timeStr") + } + + def hours = matcher.group('hours')?.toDouble() ?: 0.0 + def minutes = matcher.group('minutes')?.toDouble() ?: 0.0 + def seconds = matcher.group('seconds')?.toDouble() ?: 0.0 + def milliseconds = matcher.group('milliseconds')?.toDouble() ?: 0.0 + + return (hours * 60) + minutes + (seconds / 60) + (milliseconds / 60000) +} + +// Utility function to convert memory to GB +def convertMemory(String memory) { + if (!memory) return null + + if (memory.contains("GB")) { + return memory.replace("GB", "").toDouble() + } else if (memory.contains("MB")) { + return memory.replace("MB", "").toDouble() / 1000 + } else if (memory.contains("KB")) { + return memory.replace("KB", "").toDouble() / 1000000 + } + return null +} + +// Prepare trace trees +def prepTreeTrace(trace) { + def traceTrees = trace.findAll { it.subworkflow == "COMPUTE_TREES" } + traceTrees.each { row -> + row.args_tree = row.args + row.tree = row.process.replace("_GUIDETREE", "") + row.time_tree = convertTime(row.realtime) + row.memory_tree = convertMemory(row.rss) + row.cpus_tree = row.cpus + } + return traceTrees +} + +// Prepare align traces +def prepAlignTrace(trace) { + def traceAlign = trace.findAll { it.subworkflow == "ALIGN" } + traceAlign.each { row -> + row.args_aligner = row.args + row.aligner = row.process.replace("_ALIGN", "") + row.time_align = convertTime(row.realtime) + row.memory_align = convertMemory(row.rss) + row.cpus_align = row.cpus + } + return traceAlign +} + +def merge_summary_and_traces(summary_file, trace_dir_path, outFileName){ + + // Read the summary file with the scientific evaluation + def data = new File(summary_file).readLines() + + // Identify and parse the latest trace file + def trace_file = new File("${trace_dir_path}").listFiles().findAll { it.name.startsWith("execution_trace") }.sort { -it.lastModified() }.take(1)[0] + + // Keep only the lines that report running times related to evaluation + def header = trace_file.readLines()[0].replaceAll("\t", ",") + def trace_file_align = trace_file.readLines().findAll { it.contains("CACHED") && it.contains("MULTIPLESEQUENCEALIGN:ALIGN") }.collect { it.replaceAll("\t", ",") }.join("\n") + def trace = header + "\n" + trace_file_align + def trace_csv = parseCsv(trace) + + def cleanTraceData = cleanTrace(trace_csv) + def traceTrees = prepTreeTrace(cleanTraceData) + def traceAlign = prepAlignTrace(cleanTraceData) + + def mergedData = [] + data.each { row -> + def treeMatch = traceTrees.find { it.id == row.id && it.tree == row.tree && it.args_tree == row.args_tree } + def alignMatch = traceAlign.find { it.id == row.id && it.aligner == row.aligner && it.args_aligner == row.args_aligner } + def mergedRow = row + (treeMatch ?: [:]) + (alignMatch ?: [:]) + mergedData << mergedRow + } + new File(outFileName).withWriter { writer -> writer.write(mergedData as String) } + +} + +outdir = "/home/luisasantus/Desktop/multiplesequencealign/results" + +def summary_file = "${outdir}/summary/complete_summary_stats_eval.csv" +def outFileName = "${outdir}/../test_merged.csv" +def trace_dir_path = "${outdir}/pipeline_info/" + +merge_summary_and_traces(summary_file, trace_dir_path, outFileName) \ No newline at end of file diff --git a/udo systemctl enable docker b/udo systemctl enable docker new file mode 100644 index 00000000..476b9dc8 --- /dev/null +++ b/udo systemctl enable docker @@ -0,0 +1,21 @@ +● docker.service - Docker Application Container Engine + Loaded: loaded (/lib/systemd/system/docker.service; enabled; vendor preset: enabled) + Active: active (running) since Thu 2024-12-05 10:54:31 CET; 50s ago +TriggeredBy: ● docker.socket + Docs: https://docs.docker.com + Main PID: 547336 (dockerd) + Tasks: 12 + Memory: 29.6M + CGroup: /system.slice/docker.service + └─547336 /usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock + +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR systemd[1]: Starting Docker Application Container Engine... +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.616479842+01:00" level=info msg="Starting up" +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.617217460+01:00" level=info msg="detected 127.0.0.53 nameserver, assuming systemd-resolved, so using resolv.conf: /run/systemd/resolve/resolv.conf" +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.688746687+01:00" level=info msg="Loading containers: start." +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.755640983+01:00" level=info msg="Default bridge (docker0) is assigned with an IP address 10.220.0.0/24. Daemon option --bip can be used to set a preferred IP address" +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.788831417+01:00" level=info msg="Loading containers: done." +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.811014156+01:00" level=info msg="Docker daemon" commit=41ca978 containerd-snapshotter=false storage-driver=overlay2 version=27.3.1 +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.811239366+01:00" level=info msg="Daemon has completed initialization" +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.859048276+01:00" level=info msg="API listen on /run/docker.sock" +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR systemd[1]: Started Docker Application Container Engine. diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index 3c42ec16..7411b556 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -22,6 +22,7 @@ include { STATS } from '../subworkflows/local/stats' include { ALIGN } from '../subworkflows/local/align' include { EVALUATE } from '../subworkflows/local/evaluate' include { TEMPLATES } from '../subworkflows/local/templates' +include { PREPROCESS_OPTIONALDATA} from '../subworkflows/local/preprocess_optionaldata' // // MODULE: local modules @@ -41,6 +42,7 @@ include { PREPARE_SHINY } from '../modules/local/prepare_shiny' include { UNTAR } from '../modules/nf-core/untar/main' include { CSVTK_JOIN as MERGE_STATS_EVAL } from '../modules/nf-core/csvtk/join/main.nf' include { PIGZ_COMPRESS } from '../modules/nf-core/pigz/compress/main' +include { FASTAVALIDATOR } from '../modules/nf-core/fastavalidator/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -63,15 +65,16 @@ workflow MULTIPLESEQUENCEALIGN { ch_shiny_stats = Channel.empty() ch_refs = Channel.empty() ch_templates = Channel.empty() - ch_optional_data = Channel.empty() + ch_optional_data = Channel.empty() ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() ch_input + .filter { it[1].size() > 0} .map { meta, fasta, ref, str, template -> - [ meta, file(fasta) ] + [ meta, file(fasta) ] } .set { ch_seqs } @@ -166,6 +169,20 @@ workflow MULTIPLESEQUENCEALIGN { ch_versions = ch_versions.mix(UNTAR.out.versions) } + // + // PREPROCESS INPUT FILES + // + + if(!params.skip_preprocessing){ + FASTAVALIDATOR(ch_seqs) + ch_versions = ch_versions.mix(FASTAVALIDATOR.out.versions) + + PREPROCESS_OPTIONALDATA(ch_optional_data) + ch_optional_data = PREPROCESS_OPTIONALDATA.out.preprocessed_optionaldata + ch_versions = ch_versions.mix(PREPROCESS_OPTIONALDATA.out.versions) + } + + // // TEMPLATES // From 7eb9b6606e9b5e8dbfc4836fa4ba5c472146c8e4 Mon Sep 17 00:00:00 2001 From: luisas Date: Mon, 9 Dec 2024 11:05:40 +0100 Subject: [PATCH 2/8] add extractfrom pdb --- modules.json | 7 ++ modules/local/add_pdbheader.nf | 22 ------ .../tcoffee/extractfrompdb/environment.yml | 5 ++ .../nf-core/tcoffee/extractfrompdb/main.nf | 52 ++++++++++++++ .../nf-core/tcoffee/extractfrompdb/meta.yml | 43 ++++++++++++ .../tcoffee/extractfrompdb/tests/main.nf.test | 69 +++++++++++++++++++ .../extractfrompdb/tests/main.nf.test.snap | 68 ++++++++++++++++++ nextflow.config | 2 +- subworkflows/local/preprocess_optionaldata.nf | 7 +- workflows/multiplesequencealign.nf | 8 +-- 10 files changed, 252 insertions(+), 31 deletions(-) delete mode 100644 modules/local/add_pdbheader.nf create mode 100644 modules/nf-core/tcoffee/extractfrompdb/environment.yml create mode 100644 modules/nf-core/tcoffee/extractfrompdb/main.nf create mode 100644 modules/nf-core/tcoffee/extractfrompdb/meta.yml create mode 100644 modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test create mode 100644 modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test.snap diff --git a/modules.json b/modules.json index 02f0483e..b1c145da 100644 --- a/modules.json +++ b/modules.json @@ -161,6 +161,13 @@ "modules" ] }, + "tcoffee/extractfrompdb": { + "branch": "master", + "git_sha": "1f94c91de2b9e9c6b42fca53e823cada9a8b8465", + "installed_by": [ + "modules" + ] + }, "tcoffee/irmsd": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/local/add_pdbheader.nf b/modules/local/add_pdbheader.nf deleted file mode 100644 index b3106c41..00000000 --- a/modules/local/add_pdbheader.nf +++ /dev/null @@ -1,22 +0,0 @@ -process ADD_PDBHEADER{ - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0': - 'biocontainers/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0' }" - - label "process_low" - - input: - tuple val(meta), path(pdb) - - output: - tuple val(meta), path("${pdb.baseName}.pdb"), emit: pdb - - script: - """ - export TEMP='./' - # Add the headers - mkdir pdbs_unprocessed - mv $pdb pdbs_unprocessed - t_coffee -other_pg extract_from_pdb -infile pdbs_unprocessed/$pdb > ${pdb.baseName}.pdb - """ -} \ No newline at end of file diff --git a/modules/nf-core/tcoffee/extractfrompdb/environment.yml b/modules/nf-core/tcoffee/extractfrompdb/environment.yml new file mode 100644 index 00000000..7cc504fc --- /dev/null +++ b/modules/nf-core/tcoffee/extractfrompdb/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::t-coffee=13.46.0.919e8c6b diff --git a/modules/nf-core/tcoffee/extractfrompdb/main.nf b/modules/nf-core/tcoffee/extractfrompdb/main.nf new file mode 100644 index 00000000..919ba381 --- /dev/null +++ b/modules/nf-core/tcoffee/extractfrompdb/main.nf @@ -0,0 +1,52 @@ +process TCOFFEE_EXTRACTFROMPDB { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/t-coffee:13.46.0.919e8c6b--hfc96bf3_0': + 'biocontainers/t-coffee:13.46.0.919e8c6b--hfc96bf3_0' }" + + input: + tuple val(meta), path(pdb) + + output: + tuple val(meta), path("${pdb.baseName}.pdb"), emit: formatted_pdb + path "versions.yml" , emit: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + export TEMP='./' + mkdir old + mv ${pdb} old/ + t_coffee -other_pg extract_from_pdb \ + -infile old/${pdb} \ + $args \ + > "${pdb.baseName}.pdb" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + # Otherwise, tcoffee will crash when calling its version + export TEMP='./' + touch "${pdb.baseName}.pdb" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tcoffee/extractfrompdb/meta.yml b/modules/nf-core/tcoffee/extractfrompdb/meta.yml new file mode 100644 index 00000000..107eabe1 --- /dev/null +++ b/modules/nf-core/tcoffee/extractfrompdb/meta.yml @@ -0,0 +1,43 @@ +name: "tcoffee_extractfrompdb" +description: Reformats the header of PDB files with t-coffee +keywords: + - reformatting + - pdb + - genomics +tools: + - "tcoffee": + description: "A collection of tools for Computing, Evaluating and Manipulating + Multiple Alignments of DNA, RNA, Protein Sequences and Structures." + homepage: "http://www.tcoffee.org/Projects/tcoffee/" + documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" + tool_dev_url: "https://github.com/cbcrg/tcoffee" + doi: "10.1006/jmbi.2000.4042" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - pdb: + type: file + description: Input pdb to be reformatted +output: + - formatted_pdb: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - ${prefix}.pdb: + type: file + description: Formatted pdb file + pattern: "*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" diff --git a/modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test b/modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test new file mode 100644 index 00000000..939ff19a --- /dev/null +++ b/modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_process { + + name "Test Process TCOFFEE_EXTRACTFROMPDB" + script "../main.nf" + process "TCOFFEE_EXTRACTFROMPDB" + + tag "modules" + tag "modules_nfcore" + tag "tcoffee" + tag "tcoffee/extractfrompdb" + tag "untar" + + setup { + + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) + ] + + """ + } + } + } + + test("seatoxin ") { + + when { + process { + """ + input[0] = UNTAR.out.untar.collect{ meta, dir -> file(dir).listFiles().collect().first() }.map{ pdb -> [[ id: 'test'], pdb]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("seatoxin -stub ") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.fromPath('empty_file.pdb').map{ pdb -> [[ id: 'test'], pdb]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + +} diff --git a/modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test.snap b/modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test.snap new file mode 100644 index 00000000..5c6ae6dc --- /dev/null +++ b/modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "seatoxin -stub ": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.pdb:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,3ee943561db268aeeb8ebae110debd83" + ], + "formatted_pdb": [ + [ + { + "id": "test" + }, + "test.pdb:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3ee943561db268aeeb8ebae110debd83" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-09T09:23:26.228225523" + }, + "seatoxin ": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.pdb:md5,f4d68827f3a77d8439a6f82036a0bda2" + ] + ], + "1": [ + "versions.yml:md5,3ee943561db268aeeb8ebae110debd83" + ], + "formatted_pdb": [ + [ + { + "id": "test" + }, + "test.pdb:md5,f4d68827f3a77d8439a6f82036a0bda2" + ] + ], + "versions": [ + "versions.yml:md5,3ee943561db268aeeb8ebae110debd83" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-09T09:23:07.991961475" + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index b3ccbac4..04ef12ff 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,7 +16,7 @@ params { optional_data_dir = null // Preprocessing - skip_preprocessing = true + skip_preprocessing = false // Alignment build_consensus = false diff --git a/subworkflows/local/preprocess_optionaldata.nf b/subworkflows/local/preprocess_optionaldata.nf index f352f541..8a7bf24b 100644 --- a/subworkflows/local/preprocess_optionaldata.nf +++ b/subworkflows/local/preprocess_optionaldata.nf @@ -1,6 +1,5 @@ -include { ADD_PDBHEADER } from '../../modules/local/add_pdbheader.nf' - +include { TCOFFEE_EXTRACTFROMPDB } from '../../modules/nf-core/tcoffee/extractfrompdb/main' workflow PREPROCESS_OPTIONALDATA { take: ch_optional_data //channel: [ meta, [file1, ] ] @@ -13,8 +12,8 @@ workflow PREPROCESS_OPTIONALDATA { if(params.templates_suffix == ".pdb"){ // If the optional data is a pdb file, we can preprocess them to make // them compatible with all the alignment tools - ADD_PDBHEADER(ch_optional_data.transpose()) - ADD_PDBHEADER.out.pdb + TCOFFEE_EXTRACTFROMPDB(ch_optional_data.transpose()) + TCOFFEE_EXTRACTFROMPDB.out.formatted_pdb .groupTuple() .set { ch_preprocessed_data } } diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index 7411b556..75bd4c47 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -170,13 +170,13 @@ workflow MULTIPLESEQUENCEALIGN { } // - // PREPROCESS INPUT FILES + // VALIDATE AND PREPROCESS INPUT FILES // - if(!params.skip_preprocessing){ - FASTAVALIDATOR(ch_seqs) - ch_versions = ch_versions.mix(FASTAVALIDATOR.out.versions) + FASTAVALIDATOR(ch_seqs) + ch_versions = ch_versions.mix(FASTAVALIDATOR.out.versions) + if(!params.skip_preprocessing){ PREPROCESS_OPTIONALDATA(ch_optional_data) ch_optional_data = PREPROCESS_OPTIONALDATA.out.preprocessed_optionaldata ch_versions = ch_versions.mix(PREPROCESS_OPTIONALDATA.out.versions) From fffd09e11a16c1206bd0435554cd574f0fb7be20 Mon Sep 17 00:00:00 2001 From: luisas Date: Mon, 9 Dec 2024 14:24:02 +0100 Subject: [PATCH 3/8] add visualization --- assets/toolsheet.csv | 2 +- conf/modules.config | 12 ++++ modules.json | 3 +- modules/local/foldmason_createdb.nf | 45 ++++++++++++ modules/local/foldmason_msa2lddtreport.nf | 51 ++++++++++++++ .../tcoffee-extractfrompdb.diff | 43 ++++++++++++ nextflow.config | 3 + nextflow_schema.json | 5 ++ subworkflows/local/align.nf | 1 + ...eprocess_optionaldata.nf => preprocess.nf} | 3 +- subworkflows/local/visualization.nf | 69 +++++++++++++++++++ workflows/multiplesequencealign.nf | 19 +++-- 12 files changed, 249 insertions(+), 7 deletions(-) create mode 100644 modules/local/foldmason_createdb.nf create mode 100644 modules/local/foldmason_msa2lddtreport.nf create mode 100644 modules/nf-core/tcoffee/extractfrompdb/tcoffee-extractfrompdb.diff rename subworkflows/local/{preprocess_optionaldata.nf => preprocess.nf} (95%) create mode 100644 subworkflows/local/visualization.nf diff --git a/assets/toolsheet.csv b/assets/toolsheet.csv index 2a779362..2979035d 100644 --- a/assets/toolsheet.csv +++ b/assets/toolsheet.csv @@ -1,2 +1,2 @@ tree,args_tree,aligner,args_aligner -FAMSA,,FOLDMASON, \ No newline at end of file +FAMSA,,FAMSA, \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index daf1df3d..6a7c1433 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -269,4 +269,16 @@ saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + // + // Visualization + // + withName: 'FOLDMASON_MSA2LDDTREPORT' { + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } + publishDir = [ + path: { "${params.outdir}/visualization" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/modules.json b/modules.json index b1c145da..fd8091ae 100644 --- a/modules.json +++ b/modules.json @@ -166,7 +166,8 @@ "git_sha": "1f94c91de2b9e9c6b42fca53e823cada9a8b8465", "installed_by": [ "modules" - ] + ], + "patch": "modules/nf-core/tcoffee/extractfrompdb/tcoffee-extractfrompdb.diff" }, "tcoffee/irmsd": { "branch": "master", diff --git a/modules/local/foldmason_createdb.nf b/modules/local/foldmason_createdb.nf new file mode 100644 index 00000000..dee2d4c8 --- /dev/null +++ b/modules/local/foldmason_createdb.nf @@ -0,0 +1,45 @@ +process FOLDMASON_CREATEDB { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "community.wave.seqera.io/library/foldmason:512dd7b3e2453a75" + + input: + tuple val(meta) , path(pdbs) + + output: + tuple val(meta), path("${prefix}*"), emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + foldmason createdb \\ + ${pdbs} \\ + ${prefix} \\ + $args \\ + --threads $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + foldmason: \$(foldmason | grep "foldmason Version:" | cut -d":" -f 2 | awk '{\$1=\$1;print}') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + foldmason: \$(foldmason | grep "foldmason Version:" | cut -d":" -f 2 | awk '{\$1=\$1;print}') + END_VERSIONS + """ +} diff --git a/modules/local/foldmason_msa2lddtreport.nf b/modules/local/foldmason_msa2lddtreport.nf new file mode 100644 index 00000000..51b2a175 --- /dev/null +++ b/modules/local/foldmason_msa2lddtreport.nf @@ -0,0 +1,51 @@ +process FOLDMASON_MSA2LDDTREPORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "community.wave.seqera.io/library/foldmason:512dd7b3e2453a75" + + input: + tuple val(meta) , path(msa) + tuple val(meta2) , path(db) + tuple val(meta3) , path(pdbs) + tuple val(meta4) , path(tree) + + output: + tuple val(meta), path("${prefix}.html"), emit: html + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def options_tree = tree ? "--guide-tree $tree" : "" + """ + foldmason msa2lddtreport \\ + ${meta.id} \\ + ${msa} \\ + ${prefix}.html \\ + $args \\ + ${options_tree} \\ + --threads $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + foldmason: \$(foldmason | grep "foldmason Version:" | cut -d":" -f 2 | awk '{\$1=\$1;print}') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + foldmason: \$(foldmason | grep "foldmason Version:" | cut -d":" -f 2 | awk '{\$1=\$1;print}') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tcoffee/extractfrompdb/tcoffee-extractfrompdb.diff b/modules/nf-core/tcoffee/extractfrompdb/tcoffee-extractfrompdb.diff new file mode 100644 index 00000000..302657ef --- /dev/null +++ b/modules/nf-core/tcoffee/extractfrompdb/tcoffee-extractfrompdb.diff @@ -0,0 +1,43 @@ +Changes in module 'nf-core/tcoffee/extractfrompdb' +Changes in 'tcoffee/extractfrompdb/main.nf': +--- modules/nf-core/tcoffee/extractfrompdb/main.nf ++++ modules/nf-core/tcoffee/extractfrompdb/main.nf +@@ -11,7 +11,7 @@ + tuple val(meta), path(pdb) + + output: +- tuple val(meta), path("${prefix}.pdb"), emit: formatted_pdb ++ tuple val(meta), path("${pdb.baseName}.pdb"), emit: formatted_pdb + path "versions.yml" , emit: versions + + +@@ -23,10 +23,12 @@ + prefix = task.ext.prefix ?: "${meta.id}" + """ + export TEMP='./' ++ mkdir old ++ mv ${pdb} old/ + t_coffee -other_pg extract_from_pdb \ +- -infile ${pdb} \ ++ -infile old/${pdb} \ + $args \ +- > "${prefix}.pdb" ++ > "${pdb.baseName}.pdb" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": +@@ -40,7 +42,7 @@ + """ + # Otherwise, tcoffee will crash when calling its version + export TEMP='./' +- touch "${prefix}.pdb" ++ touch "${pdb.baseName}.pdb" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +'modules/nf-core/tcoffee/extractfrompdb/meta.yml' is unchanged +'modules/nf-core/tcoffee/extractfrompdb/environment.yml' is unchanged +'modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test.snap' is unchanged +'modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test' is unchanged +************************************************************ diff --git a/nextflow.config b/nextflow.config index 04ef12ff..9858e9b9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -64,6 +64,9 @@ params { skip_shiny = false shiny_trace_mode = "latest" // all, latest + // Visualisation options + skip_visualisation = false + // Config options config_profile_name = null config_profile_description = null diff --git a/nextflow_schema.json b/nextflow_schema.json index e74d0f99..801909a5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -209,6 +209,11 @@ "shiny_trace_mode": { "type": "string", "description": "variable containing the shiny_trace mode to be used." + }, + "skip_visualisation": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip the visualization generation." } } }, diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 9107f6eb..c10042e1 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -384,5 +384,6 @@ workflow ALIGN { emit: msa = ch_msa // channel: [ val(meta), path(msa) ] + trees = trees // channel: [ val(meta), path(tree) ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/preprocess_optionaldata.nf b/subworkflows/local/preprocess.nf similarity index 95% rename from subworkflows/local/preprocess_optionaldata.nf rename to subworkflows/local/preprocess.nf index 8a7bf24b..ef17950f 100644 --- a/subworkflows/local/preprocess_optionaldata.nf +++ b/subworkflows/local/preprocess.nf @@ -1,6 +1,7 @@ include { TCOFFEE_EXTRACTFROMPDB } from '../../modules/nf-core/tcoffee/extractfrompdb/main' -workflow PREPROCESS_OPTIONALDATA { + +workflow PREPROCESS { take: ch_optional_data //channel: [ meta, [file1, ] ] diff --git a/subworkflows/local/visualization.nf b/subworkflows/local/visualization.nf new file mode 100644 index 00000000..51e9fe0b --- /dev/null +++ b/subworkflows/local/visualization.nf @@ -0,0 +1,69 @@ +include {FOLDMASON_CREATEDB } from '../../modules/local/foldmason_createdb.nf' +include {FOLDMASON_MSA2LDDTREPORT } from '../../modules/local/foldmason_msa2lddtreport.nf' + +workflow VISUALIZATION { + + take: + ch_msa // channel: [ meta, /path/to/file.* ] + ch_trees // channel: [ meta, /path/to/file.* ] + ch_optional_data // channel: [ meta, /path/to/file.* ] + + main: + + ch_versions = Channel.empty() + ch_html = Channel.empty() + + + // Merge the msa and tree + // split the msa meta to be able to merge with the tree meta + ch_msa + .map{ + meta, file -> [meta.subMap(["id", "tree", "args_tree", "args_tree_clean"]), meta, file] + } + .join( ch_trees, by:0, remainder: true) + .filter { it.size() == 4 } + .map{ + tree_meta, meta, msa, tree -> [meta.subMap(["id"]), meta, msa, tree] + } + .join( ch_optional_data, by:0) + .set { ch_msa_tree_data } + + + ch_optional_data.view() + // + // FOLDMASON VISUALISATION + // + + FOLDMASON_CREATEDB( + ch_optional_data + ) + + + ch_msa_tree_data + .combine(FOLDMASON_CREATEDB.out.db, by:0) + .multiMap{ + id, meta, msafile, treefile, pdb, dbfiles -> + msa: [meta, msafile] + db: [id , dbfiles] + pdbs: [id , pdb] + tree: [meta, treefile == null ? [] : treefile] + }.set{ + ch_msa_db_tree + } + + ch_msa_db_tree.tree.view() + + FOLDMASON_MSA2LDDTREPORT( + ch_msa_db_tree.msa, + ch_msa_db_tree.db, + ch_msa_db_tree.pdbs, + ch_msa_db_tree.tree + ) + + ch_html = FOLDMASON_MSA2LDDTREPORT.out.html + + emit: + html = ch_html + versions = ch_versions + +} diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index 75bd4c47..af8fb240 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -22,7 +22,9 @@ include { STATS } from '../subworkflows/local/stats' include { ALIGN } from '../subworkflows/local/align' include { EVALUATE } from '../subworkflows/local/evaluate' include { TEMPLATES } from '../subworkflows/local/templates' -include { PREPROCESS_OPTIONALDATA} from '../subworkflows/local/preprocess_optionaldata' +include { PREPROCESS } from '../subworkflows/local/preprocess' +include { VISUALIZATION } from '../subworkflows/local/visualization' + // // MODULE: local modules @@ -177,9 +179,9 @@ workflow MULTIPLESEQUENCEALIGN { ch_versions = ch_versions.mix(FASTAVALIDATOR.out.versions) if(!params.skip_preprocessing){ - PREPROCESS_OPTIONALDATA(ch_optional_data) - ch_optional_data = PREPROCESS_OPTIONALDATA.out.preprocessed_optionaldata - ch_versions = ch_versions.mix(PREPROCESS_OPTIONALDATA.out.versions) + PREPROCESS(ch_optional_data) + ch_optional_data = PREPROCESS.out.preprocessed_optionaldata + ch_versions = ch_versions.mix(PREPROCESS.out.versions) } @@ -259,6 +261,15 @@ workflow MULTIPLESEQUENCEALIGN { ch_versions = ch_versions.mix(PREPARE_SHINY.out.versions) } + + if (!params.skip_visualisation) { + VISUALIZATION ( + ALIGN.out.msa, + ALIGN.out.trees, + ch_optional_data + ) + } + softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info", From ff1d9697d9582cd448602da5d0a75d9828e55e30 Mon Sep 17 00:00:00 2001 From: luisas Date: Mon, 9 Dec 2024 17:05:10 +0100 Subject: [PATCH 4/8] update --- assets/toolsheet.csv | 2 +- modules.json | 3 +- modules/local/foldmason_createdb.nf | 5 +- modules/local/foldmason_msa2lddtreport.nf | 5 +- .../foldmason/easymsa/foldmason-easymsa.diff | 50 +++++++++++++++++++ modules/nf-core/foldmason/easymsa/main.nf | 6 +-- modules/nf-core/foldmason/easymsa/meta.yml | 2 +- subworkflows/local/align.nf | 9 +++- subworkflows/local/visualization.nf | 8 +-- 9 files changed, 73 insertions(+), 17 deletions(-) create mode 100644 modules/nf-core/foldmason/easymsa/foldmason-easymsa.diff diff --git a/assets/toolsheet.csv b/assets/toolsheet.csv index 2979035d..2a779362 100644 --- a/assets/toolsheet.csv +++ b/assets/toolsheet.csv @@ -1,2 +1,2 @@ tree,args_tree,aligner,args_aligner -FAMSA,,FAMSA, \ No newline at end of file +FAMSA,,FOLDMASON, \ No newline at end of file diff --git a/modules.json b/modules.json index fd8091ae..204e7eff 100644 --- a/modules.json +++ b/modules.json @@ -60,7 +60,8 @@ "git_sha": "8541ec46706d6610b032748fa51acf4b3094ced8", "installed_by": [ "modules" - ] + ], + "patch": "modules/nf-core/foldmason/easymsa/foldmason-easymsa.diff" }, "kalign/align": { "branch": "master", diff --git a/modules/local/foldmason_createdb.nf b/modules/local/foldmason_createdb.nf index dee2d4c8..f527ae64 100644 --- a/modules/local/foldmason_createdb.nf +++ b/modules/local/foldmason_createdb.nf @@ -3,8 +3,9 @@ process FOLDMASON_CREATEDB { label 'process_medium' conda "${moduleDir}/environment.yml" - container "community.wave.seqera.io/library/foldmason:512dd7b3e2453a75" - + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/foldmason:2.7bd21ed--a45f76ed12b391e6': + 'community.wave.seqera.io/library/foldmason:2.7bd21ed--e7f739473ad6578d' }" input: tuple val(meta) , path(pdbs) diff --git a/modules/local/foldmason_msa2lddtreport.nf b/modules/local/foldmason_msa2lddtreport.nf index 51b2a175..38315582 100644 --- a/modules/local/foldmason_msa2lddtreport.nf +++ b/modules/local/foldmason_msa2lddtreport.nf @@ -3,8 +3,9 @@ process FOLDMASON_MSA2LDDTREPORT { label 'process_medium' conda "${moduleDir}/environment.yml" - container "community.wave.seqera.io/library/foldmason:512dd7b3e2453a75" - + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/foldmason:2.7bd21ed--a45f76ed12b391e6': + 'community.wave.seqera.io/library/foldmason:2.7bd21ed--e7f739473ad6578d' }" input: tuple val(meta) , path(msa) tuple val(meta2) , path(db) diff --git a/modules/nf-core/foldmason/easymsa/foldmason-easymsa.diff b/modules/nf-core/foldmason/easymsa/foldmason-easymsa.diff new file mode 100644 index 00000000..eed347f6 --- /dev/null +++ b/modules/nf-core/foldmason/easymsa/foldmason-easymsa.diff @@ -0,0 +1,50 @@ +Changes in module 'nf-core/foldmason/easymsa' +Changes in 'foldmason/easymsa/main.nf': +--- modules/nf-core/foldmason/easymsa/main.nf ++++ modules/nf-core/foldmason/easymsa/main.nf +@@ -12,7 +12,7 @@ + + output: + tuple val(meta), path("${prefix}_3di.fa${compress ? '.gz' : ''}"), emit: msa_3di +- tuple val(meta), path("${prefix}_aa.fa${compress ? '.gz' : ''}") , emit: msa_aa ++ tuple val(meta), path("${prefix}.fa${compress ? '.gz' : ''}") , emit: msa_aa + path "versions.yml" , emit: versions + + when: +@@ -33,7 +33,7 @@ + + if ${compress}; then + pigz -p ${task.cpus} ${prefix}_3di.fa +- pigz -p ${task.cpus} ${prefix}_aa.fa ++ pigz -p ${task.cpus} ${prefix}.fa + fi + + cat <<-END_VERSIONS > versions.yml +@@ -48,7 +48,7 @@ + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" ${compress ? '| gzip' : ''} > ${prefix}_3di.fa${compress ? '.gz' : ''} +- echo "" ${compress ? '| gzip' : ''} > ${prefix}_aa.fa${compress ? '.gz' : ''} ++ echo "" ${compress ? '| gzip' : ''} > ${prefix}.fa${compress ? '.gz' : ''} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +Changes in 'foldmason/easymsa/meta.yml': +--- modules/nf-core/foldmason/easymsa/meta.yml ++++ modules/nf-core/foldmason/easymsa/meta.yml +@@ -58,7 +58,7 @@ + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` +- - "${prefix}_aa.fa${compress ? '.gz' : ''}": ++ - "${prefix}.fa${compress ? '.gz' : ''}": + type: file + description: Fasta file containing the multiple sequence alignment with Amino + Acid alphabet + +'modules/nf-core/foldmason/easymsa/environment.yml' is unchanged +'modules/nf-core/foldmason/easymsa/tests/main.nf.test.snap' is unchanged +'modules/nf-core/foldmason/easymsa/tests/tags.yml' is unchanged +'modules/nf-core/foldmason/easymsa/tests/main.nf.test' is unchanged +************************************************************ diff --git a/modules/nf-core/foldmason/easymsa/main.nf b/modules/nf-core/foldmason/easymsa/main.nf index bb1737f0..9712b96b 100644 --- a/modules/nf-core/foldmason/easymsa/main.nf +++ b/modules/nf-core/foldmason/easymsa/main.nf @@ -12,7 +12,7 @@ process FOLDMASON_EASYMSA { output: tuple val(meta), path("${prefix}_3di.fa${compress ? '.gz' : ''}"), emit: msa_3di - tuple val(meta), path("${prefix}_aa.fa${compress ? '.gz' : ''}") , emit: msa_aa + tuple val(meta), path("${prefix}.fa${compress ? '.gz' : ''}") , emit: msa_aa path "versions.yml" , emit: versions when: @@ -33,7 +33,7 @@ process FOLDMASON_EASYMSA { if ${compress}; then pigz -p ${task.cpus} ${prefix}_3di.fa - pigz -p ${task.cpus} ${prefix}_aa.fa + pigz -p ${task.cpus} ${prefix}.fa fi cat <<-END_VERSIONS > versions.yml @@ -48,7 +48,7 @@ process FOLDMASON_EASYMSA { prefix = task.ext.prefix ?: "${meta.id}" """ echo "" ${compress ? '| gzip' : ''} > ${prefix}_3di.fa${compress ? '.gz' : ''} - echo "" ${compress ? '| gzip' : ''} > ${prefix}_aa.fa${compress ? '.gz' : ''} + echo "" ${compress ? '| gzip' : ''} > ${prefix}.fa${compress ? '.gz' : ''} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/foldmason/easymsa/meta.yml b/modules/nf-core/foldmason/easymsa/meta.yml index 140eb81e..90322559 100644 --- a/modules/nf-core/foldmason/easymsa/meta.yml +++ b/modules/nf-core/foldmason/easymsa/meta.yml @@ -58,7 +58,7 @@ output: description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - "${prefix}_aa.fa${compress ? '.gz' : ''}": + - "${prefix}.fa${compress ? '.gz' : ''}": type: file description: Fasta file containing the multiple sequence alignment with Amino Acid alphabet diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index c10042e1..1fb5bcef 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -121,7 +121,7 @@ workflow ALIGN { metadependency, template, dependency, metatree, metaalign -> [ metadependency+metatree , metaalign, template, dependency ] } - .combine(trees, by: 0) + .join(trees, by: 0) .map { metratreeanddep, metaalign, template, dependency, tree -> [ metratreeanddep+metaalign, tree, template, dependency ] @@ -130,6 +130,8 @@ workflow ALIGN { foldmason: it[0]["aligner"] == "FOLDMASON" } .set { ch_optional_data_tools_tree } + + // ------------------------------------------------ // Compute the alignments @@ -369,12 +371,17 @@ workflow ALIGN { ch_versions = ch_versions.mix(FOLDMASON_EASYMSA.out.versions.first()) } + + ch_msa.view() + // ----------------- CONSENSUS ------------------ if(params.build_consensus){ ch_msa.map{ meta, msa -> [ meta["id"], msa]} .groupTuple() .map{ id_meta, msas -> [ ["id": id_meta, "tree":"", "args_tree":"", "args_tree_clean":null, "aligner":"CONSENSUS", "args_aligner":"", "args_aligner_clean":null ], msas ]} .set{ ch_msa_consensus } + + ch_msa_consensus.view() CONSENSUS(ch_msa_consensus, [[:],[]], compress) ch_msa = ch_msa.mix(CONSENSUS.out.alignment) diff --git a/subworkflows/local/visualization.nf b/subworkflows/local/visualization.nf index 51e9fe0b..c11d5faf 100644 --- a/subworkflows/local/visualization.nf +++ b/subworkflows/local/visualization.nf @@ -13,7 +13,6 @@ workflow VISUALIZATION { ch_versions = Channel.empty() ch_html = Channel.empty() - // Merge the msa and tree // split the msa meta to be able to merge with the tree meta ch_msa @@ -29,7 +28,6 @@ workflow VISUALIZATION { .set { ch_msa_tree_data } - ch_optional_data.view() // // FOLDMASON VISUALISATION // @@ -38,7 +36,6 @@ workflow VISUALIZATION { ch_optional_data ) - ch_msa_tree_data .combine(FOLDMASON_CREATEDB.out.db, by:0) .multiMap{ @@ -51,13 +48,12 @@ workflow VISUALIZATION { ch_msa_db_tree } - ch_msa_db_tree.tree.view() - + ch_msa_db_tree.msa.view() FOLDMASON_MSA2LDDTREPORT( ch_msa_db_tree.msa, ch_msa_db_tree.db, ch_msa_db_tree.pdbs, - ch_msa_db_tree.tree + [[:],[]] ) ch_html = FOLDMASON_MSA2LDDTREPORT.out.html From 0a354f4d4877e9f8ee7d2def60eef830f893ae65 Mon Sep 17 00:00:00 2001 From: luisas Date: Mon, 9 Dec 2024 17:20:28 +0100 Subject: [PATCH 5/8] upd --- modules/nf-core/foldmason/easymsa/main.nf | 2 ++ subworkflows/local/align.nf | 4 +--- .../local/utils_nfcore_multiplesequencealign_pipeline/main.nf | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/nf-core/foldmason/easymsa/main.nf b/modules/nf-core/foldmason/easymsa/main.nf index 9712b96b..b3357364 100644 --- a/modules/nf-core/foldmason/easymsa/main.nf +++ b/modules/nf-core/foldmason/easymsa/main.nf @@ -31,6 +31,8 @@ process FOLDMASON_EASYMSA { $args \\ --threads $task.cpus + mv ${prefix}_aa.fa ${prefix}.fa + if ${compress}; then pigz -p ${task.cpus} ${prefix}_3di.fa pigz -p ${task.cpus} ${prefix}.fa diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 1fb5bcef..f9e1086b 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -372,17 +372,15 @@ workflow ALIGN { } - ch_msa.view() // ----------------- CONSENSUS ------------------ if(params.build_consensus){ ch_msa.map{ meta, msa -> [ meta["id"], msa]} .groupTuple() + .filter{ it[1].size() > 1 } .map{ id_meta, msas -> [ ["id": id_meta, "tree":"", "args_tree":"", "args_tree_clean":null, "aligner":"CONSENSUS", "args_aligner":"", "args_aligner_clean":null ], msas ]} .set{ ch_msa_consensus } - ch_msa_consensus.view() - CONSENSUS(ch_msa_consensus, [[:],[]], compress) ch_msa = ch_msa.mix(CONSENSUS.out.alignment) ch_versions = ch_versions.mix(CONSENSUS.out.versions.first()) diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index d3dd64e4..12af446c 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -81,7 +81,7 @@ workflow PIPELINE_INITIALISATION { align_map["aligner"] = meta_clone["aligner"] align_map["args_aligner"] = Utils.check_required_args(meta_clone["aligner"], meta_clone["args_aligner"]) - align_map["args_aligner_clean"] = Utils.cleanArgs(align_map["args_aligner"]) + align_map["args_aligner_clean"] = Utils.cleanArgs(meta_clone.args_aligner) [ tree_map, align_map ] }.unique() From 37763b74c1165cd730e23d259a17ced0d9263bb3 Mon Sep 17 00:00:00 2001 From: luisas Date: Tue, 10 Dec 2024 17:53:49 +0100 Subject: [PATCH 6/8] pd --- assets/samplesheet.csv | 3 +- assets/toolsheet.csv | 3 +- modules.json | 14 ++ .../foldmason/createdb/environment.yml | 5 + .../foldmason/createdb/main.nf} | 11 +- modules/nf-core/foldmason/createdb/meta.yml | 48 +++++++ .../foldmason/createdb/tests/main.nf.test | 66 +++++++++ .../createdb/tests/main.nf.test.snap | 128 ++++++++++++++++++ .../foldmason/msa2lddtreport/environment.yml | 5 + .../foldmason/msa2lddtreport/main.nf} | 10 +- .../nf-core/foldmason/msa2lddtreport/meta.yml | 74 ++++++++++ .../msa2lddtreport/tests/main.nf.test | 101 ++++++++++++++ .../msa2lddtreport/tests/main.nf.test.snap | 68 ++++++++++ subworkflows/local/align.nf | 7 +- subworkflows/local/visualization.nf | 14 +- 15 files changed, 534 insertions(+), 23 deletions(-) create mode 100644 modules/nf-core/foldmason/createdb/environment.yml rename modules/{local/foldmason_createdb.nf => nf-core/foldmason/createdb/main.nf} (78%) create mode 100644 modules/nf-core/foldmason/createdb/meta.yml create mode 100644 modules/nf-core/foldmason/createdb/tests/main.nf.test create mode 100644 modules/nf-core/foldmason/createdb/tests/main.nf.test.snap create mode 100644 modules/nf-core/foldmason/msa2lddtreport/environment.yml rename modules/{local/foldmason_msa2lddtreport.nf => nf-core/foldmason/msa2lddtreport/main.nf} (82%) create mode 100644 modules/nf-core/foldmason/msa2lddtreport/meta.yml create mode 100644 modules/nf-core/foldmason/msa2lddtreport/tests/main.nf.test create mode 100644 modules/nf-core/foldmason/msa2lddtreport/tests/main.nf.test.snap diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 34b76618..dda13367 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,2 @@ id,fasta,reference,optional_data -seatoxin-ref,,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz -toxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin.ref, \ No newline at end of file +seatoxin-ref,,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz \ No newline at end of file diff --git a/assets/toolsheet.csv b/assets/toolsheet.csv index 2a779362..a5f1b6e7 100644 --- a/assets/toolsheet.csv +++ b/assets/toolsheet.csv @@ -1,2 +1,3 @@ tree,args_tree,aligner,args_aligner -FAMSA,,FOLDMASON, \ No newline at end of file +,,FOLDMASON, +FAMSA,,FAMSA, \ No newline at end of file diff --git a/modules.json b/modules.json index 204e7eff..7a9db7e2 100644 --- a/modules.json +++ b/modules.json @@ -55,6 +55,13 @@ "modules" ] }, + "foldmason/createdb": { + "branch": "master", + "git_sha": "0270c0fbbbb09456d7823605e4285c4a2c5bbf40", + "installed_by": [ + "modules" + ] + }, "foldmason/easymsa": { "branch": "master", "git_sha": "8541ec46706d6610b032748fa51acf4b3094ced8", @@ -63,6 +70,13 @@ ], "patch": "modules/nf-core/foldmason/easymsa/foldmason-easymsa.diff" }, + "foldmason/msa2lddtreport": { + "branch": "master", + "git_sha": "d3555a4a33ae94269b65f79f7066ac2fcb836005", + "installed_by": [ + "modules" + ] + }, "kalign/align": { "branch": "master", "git_sha": "cadb9bbfe56001ac421e0ee87808b0ccc754593a", diff --git a/modules/nf-core/foldmason/createdb/environment.yml b/modules/nf-core/foldmason/createdb/environment.yml new file mode 100644 index 00000000..80d4dd37 --- /dev/null +++ b/modules/nf-core/foldmason/createdb/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::foldmason=2.7bd21ed diff --git a/modules/local/foldmason_createdb.nf b/modules/nf-core/foldmason/createdb/main.nf similarity index 78% rename from modules/local/foldmason_createdb.nf rename to modules/nf-core/foldmason/createdb/main.nf index f527ae64..c54b45f3 100644 --- a/modules/local/foldmason_createdb.nf +++ b/modules/nf-core/foldmason/createdb/main.nf @@ -1,17 +1,18 @@ process FOLDMASON_CREATEDB { tag "$meta.id" - label 'process_medium' + label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://community.wave.seqera.io/library/foldmason:2.7bd21ed--a45f76ed12b391e6': + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a8/a88d162c3f39a1518d48c3faec235e6fcde750586da868b62fc5f0a08a89aa9d/data' : 'community.wave.seqera.io/library/foldmason:2.7bd21ed--e7f739473ad6578d' }" + input: - tuple val(meta) , path(pdbs) + tuple val(meta) , path(structures) output: tuple val(meta), path("${prefix}*"), emit: db - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -21,7 +22,7 @@ process FOLDMASON_CREATEDB { prefix = task.ext.prefix ?: "${meta.id}" """ foldmason createdb \\ - ${pdbs} \\ + ${structures} \\ ${prefix} \\ $args \\ --threads $task.cpus diff --git a/modules/nf-core/foldmason/createdb/meta.yml b/modules/nf-core/foldmason/createdb/meta.yml new file mode 100644 index 00000000..fd47efe2 --- /dev/null +++ b/modules/nf-core/foldmason/createdb/meta.yml @@ -0,0 +1,48 @@ +name: "foldmason_createdb" +description: Creates a database for Foldmason. +keywords: + - alignment + - MSA + - genomics + - structure +tools: + - "foldmason": + description: "Multiple Protein Structure Alignment at Scale with FoldMason" + homepage: "https://github.com/steineggerlab/foldmason" + documentation: "https://github.com/steineggerlab/foldmason" + tool_dev_url: "https://github.com/steineggerlab/foldmason" + doi: "10.1101/2024.08.01.606130" + licence: ["GPL v3"] + identifier: biotools:foldmason + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - structures: + type: file + description: Input protein structures in `PDB` or `mmCIF` format. + pattern: "*.{pdb,mmcif}" + +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "${prefix}*": + type: file + description: All database files created by Foldmason + pattern: "*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" +maintainers: + - "@luisas" diff --git a/modules/nf-core/foldmason/createdb/tests/main.nf.test b/modules/nf-core/foldmason/createdb/tests/main.nf.test new file mode 100644 index 00000000..9ac567a7 --- /dev/null +++ b/modules/nf-core/foldmason/createdb/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + + name "Test Process FOLDMASON_CREATEDB" + script "../main.nf" + process "FOLDMASON_CREATEDB" + + tag "modules" + tag "modules_nfcore" + tag "foldmason" + tag "foldmason/createdb" + tag "untar" + + setup { + + run("UNTAR") { + script "../../../../../modules/nf-core/untar/main.nf" + process { + """ + archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/af2_structures/seatoxin-ref.tar.gz", checkIfExists: true) + input[0] = Channel.of(tuple([id:'test'], archive)) + """ + } + } + } + + test("seatoxin") { + + when { + + process { + """ + input[0] = UNTAR.out.untar.map { meta,dir -> [meta, file(dir).listFiles().collect()]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("seatoxin - stub ") { + + when { + + process { + """ + input[0] = UNTAR.out.untar.map { meta,dir -> [meta, file(dir).listFiles().collect()]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/foldmason/createdb/tests/main.nf.test.snap b/modules/nf-core/foldmason/createdb/tests/main.nf.test.snap new file mode 100644 index 00000000..dce5175d --- /dev/null +++ b/modules/nf-core/foldmason/createdb/tests/main.nf.test.snap @@ -0,0 +1,128 @@ +{ + "seatoxin - stub ": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "test:md5,941321067eae439b4a8ccf6425d84751", + "test.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015", + "test.lookup:md5,83047fddf9b6fbaa99f00cdf5a7dd274", + "test.source:md5,7968a6cb6577ead230c31910380af948", + "test_ca:md5,2738c516c1a15238a5c62c4ba6dee0b9", + "test_ca.dbtype:md5,3fd85f9ee7ca8882c8caa747d0eef0b3", + "test_ca.index:md5,cfdf544c3aa6d7e2034e4a01dac1d0ba", + "test_h:md5,ab9ce99a99fc6ba6a98c4460410b6a16", + "test_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2", + "test_h.index:md5,dc7c33ddb6a3dc54ad033120ef4c9af4", + "test_ss:md5,75d329b63c0383c3e43090ba89238e14", + "test_ss.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test_ss.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015" + ] + ] + ], + "1": [ + "versions.yml:md5,6ebe56979a45b356d374cfc65c8a2b45" + ], + "db": [ + [ + { + "id": "test" + }, + [ + "test:md5,941321067eae439b4a8ccf6425d84751", + "test.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015", + "test.lookup:md5,83047fddf9b6fbaa99f00cdf5a7dd274", + "test.source:md5,7968a6cb6577ead230c31910380af948", + "test_ca:md5,2738c516c1a15238a5c62c4ba6dee0b9", + "test_ca.dbtype:md5,3fd85f9ee7ca8882c8caa747d0eef0b3", + "test_ca.index:md5,cfdf544c3aa6d7e2034e4a01dac1d0ba", + "test_h:md5,ab9ce99a99fc6ba6a98c4460410b6a16", + "test_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2", + "test_h.index:md5,dc7c33ddb6a3dc54ad033120ef4c9af4", + "test_ss:md5,75d329b63c0383c3e43090ba89238e14", + "test_ss.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test_ss.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015" + ] + ] + ], + "versions": [ + "versions.yml:md5,6ebe56979a45b356d374cfc65c8a2b45" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-09T15:11:27.426024133" + }, + "seatoxin": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "test:md5,941321067eae439b4a8ccf6425d84751", + "test.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015", + "test.lookup:md5,83047fddf9b6fbaa99f00cdf5a7dd274", + "test.source:md5,7968a6cb6577ead230c31910380af948", + "test_ca:md5,2738c516c1a15238a5c62c4ba6dee0b9", + "test_ca.dbtype:md5,3fd85f9ee7ca8882c8caa747d0eef0b3", + "test_ca.index:md5,cfdf544c3aa6d7e2034e4a01dac1d0ba", + "test_h:md5,ab9ce99a99fc6ba6a98c4460410b6a16", + "test_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2", + "test_h.index:md5,dc7c33ddb6a3dc54ad033120ef4c9af4", + "test_ss:md5,75d329b63c0383c3e43090ba89238e14", + "test_ss.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test_ss.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015" + ] + ] + ], + "1": [ + "versions.yml:md5,6ebe56979a45b356d374cfc65c8a2b45" + ], + "db": [ + [ + { + "id": "test" + }, + [ + "test:md5,941321067eae439b4a8ccf6425d84751", + "test.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015", + "test.lookup:md5,83047fddf9b6fbaa99f00cdf5a7dd274", + "test.source:md5,7968a6cb6577ead230c31910380af948", + "test_ca:md5,2738c516c1a15238a5c62c4ba6dee0b9", + "test_ca.dbtype:md5,3fd85f9ee7ca8882c8caa747d0eef0b3", + "test_ca.index:md5,cfdf544c3aa6d7e2034e4a01dac1d0ba", + "test_h:md5,ab9ce99a99fc6ba6a98c4460410b6a16", + "test_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2", + "test_h.index:md5,dc7c33ddb6a3dc54ad033120ef4c9af4", + "test_ss:md5,75d329b63c0383c3e43090ba89238e14", + "test_ss.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test_ss.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015" + ] + ] + ], + "versions": [ + "versions.yml:md5,6ebe56979a45b356d374cfc65c8a2b45" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-09T15:11:15.375341633" + } +} \ No newline at end of file diff --git a/modules/nf-core/foldmason/msa2lddtreport/environment.yml b/modules/nf-core/foldmason/msa2lddtreport/environment.yml new file mode 100644 index 00000000..80d4dd37 --- /dev/null +++ b/modules/nf-core/foldmason/msa2lddtreport/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::foldmason=2.7bd21ed diff --git a/modules/local/foldmason_msa2lddtreport.nf b/modules/nf-core/foldmason/msa2lddtreport/main.nf similarity index 82% rename from modules/local/foldmason_msa2lddtreport.nf rename to modules/nf-core/foldmason/msa2lddtreport/main.nf index 38315582..e07a7552 100644 --- a/modules/local/foldmason_msa2lddtreport.nf +++ b/modules/nf-core/foldmason/msa2lddtreport/main.nf @@ -4,13 +4,13 @@ process FOLDMASON_MSA2LDDTREPORT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://community.wave.seqera.io/library/foldmason:2.7bd21ed--a45f76ed12b391e6': + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a8/a88d162c3f39a1518d48c3faec235e6fcde750586da868b62fc5f0a08a89aa9d/data' : 'community.wave.seqera.io/library/foldmason:2.7bd21ed--e7f739473ad6578d' }" input: - tuple val(meta) , path(msa) - tuple val(meta2) , path(db) - tuple val(meta3) , path(pdbs) - tuple val(meta4) , path(tree) + tuple val(meta) , path(msa) + tuple val(meta2), path(db) + tuple val(meta3), path(pdbs) + tuple val(meta4), path(tree) output: tuple val(meta), path("${prefix}.html"), emit: html diff --git a/modules/nf-core/foldmason/msa2lddtreport/meta.yml b/modules/nf-core/foldmason/msa2lddtreport/meta.yml new file mode 100644 index 00000000..cf9749ac --- /dev/null +++ b/modules/nf-core/foldmason/msa2lddtreport/meta.yml @@ -0,0 +1,74 @@ +name: "foldmason_msa2lddtreport" +description: Renders a visualization report using foldmason +keywords: + - alignment + - MSA + - genomics + - structure +tools: + - "foldmason": + description: "Multiple Protein Structure Alignment at Scale with FoldMason" + homepage: "https://github.com/steineggerlab/foldmason" + documentation: "https://github.com/steineggerlab/foldmason" + tool_dev_url: "https://github.com/steineggerlab/foldmason" + doi: "10.1101/2024.08.01.606130" + licence: ["GPL v3"] + identifier: biotools:foldmason + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - msa: + type: file + description: Input alignment file. + pattern: "*.{fa,fasta,aln}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - db: + type: file + description: Input foldmason database. + pattern: "*" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - pdbs: + type: file + description: Protein structures used for the visualization. + pattern: "*.{pdb}" + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - tree: + type: file + description: Guide tree used for the visualization . + pattern: "*.{nwk,dnd}" +output: + - html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "${prefix}.html": + type: file + description: HTML file with the foldmason visualization + pattern: "*.{html}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" +maintainers: + - "@luisas" diff --git a/modules/nf-core/foldmason/msa2lddtreport/tests/main.nf.test b/modules/nf-core/foldmason/msa2lddtreport/tests/main.nf.test new file mode 100644 index 00000000..b92c05d0 --- /dev/null +++ b/modules/nf-core/foldmason/msa2lddtreport/tests/main.nf.test @@ -0,0 +1,101 @@ +nextflow_process { + + name "Test Process FOLDMASON_MSA2LDDTREPORT" + script "../main.nf" + process "FOLDMASON_MSA2LDDTREPORT" + + tag "modules" + tag "modules_nfcore" + tag "foldmason" + tag "foldmason/msa2lddtreport" + tag "foldmason/createdb" + tag "untar" + tag "famsa/guidetree" + + + setup{ + run("UNTAR") { + script "../../../../../modules/nf-core/untar/main.nf" + process { + """ + archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/af2_structures/seatoxin-ref.tar.gz", checkIfExists: true) + input[0] = Channel.of(tuple([id:'test'], archive)) + """ + } + } + + run("FAMSA_GUIDETREE") { + script "../../../../../modules/nf-core/famsa/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test_tree' ], // meta map + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + run("FOLDMASON_CREATEDB") { + script "../../../../../modules/nf-core/foldmason/createdb/main.nf" + process { + """ + input[0] = UNTAR.out.untar.map { meta,dir -> [meta, file(dir).listFiles().collect()]} + """ + } + } + + + } + + test("seatoxin") { + + + when { + process { + """ + input[0] =[ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + ] + input[1] = FOLDMASON_CREATEDB.out.db.collect{ meta, db -> db }.map{ db -> [[ id: 'test'], db]} + input[2] = UNTAR.out.untar.map { meta,dir -> [meta, file(dir).listFiles().collect()]} + input[3] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_tree'], tree]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("seatixin - stub") { + + options "-stub" + + when { + process { + """ + input[0] =[ [ id:'test_tree' ], // meta map + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + ] + input[1] = FOLDMASON_CREATEDB.out.db.collect{ meta, db -> db }.map{ db -> [[ id: 'test'], db]} + input[2] = UNTAR.out.untar.map { meta,dir -> [meta, file(dir).listFiles().collect()]} + input[3] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_tree'], tree]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/foldmason/msa2lddtreport/tests/main.nf.test.snap b/modules/nf-core/foldmason/msa2lddtreport/tests/main.nf.test.snap new file mode 100644 index 00000000..da11cfad --- /dev/null +++ b/modules/nf-core/foldmason/msa2lddtreport/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "seatixin - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_tree" + }, + "test_tree.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,ed922d0faa7a3e3c8171a05296939468" + ], + "html": [ + [ + { + "id": "test_tree" + }, + "test_tree.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ed922d0faa7a3e3c8171a05296939468" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T09:36:23.360594258" + }, + "seatoxin": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.html:md5,7a90a8e674dc45ce4181498b8f53b519" + ] + ], + "1": [ + "versions.yml:md5,ed922d0faa7a3e3c8171a05296939468" + ], + "html": [ + [ + { + "id": "test" + }, + "test.html:md5,7a90a8e674dc45ce4181498b8f53b519" + ] + ], + "versions": [ + "versions.yml:md5,ed922d0faa7a3e3c8171a05296939468" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T09:54:03.711567262" + } +} \ No newline at end of file diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index f9e1086b..59538c5b 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -121,10 +121,13 @@ workflow ALIGN { metadependency, template, dependency, metatree, metaalign -> [ metadependency+metatree , metaalign, template, dependency ] } - .join(trees, by: 0) + .join(trees, by: 0, remainder: true) + .filter{ + it.size() == 5 + } .map { metratreeanddep, metaalign, template, dependency, tree -> - [ metratreeanddep+metaalign, tree, template, dependency ] + tree ? [ metratreeanddep+metaalign, tree, template, dependency ]:[ metratreeanddep+metaalign, [], template, dependency ] } .branch { foldmason: it[0]["aligner"] == "FOLDMASON" diff --git a/subworkflows/local/visualization.nf b/subworkflows/local/visualization.nf index c11d5faf..dec524a8 100644 --- a/subworkflows/local/visualization.nf +++ b/subworkflows/local/visualization.nf @@ -1,5 +1,5 @@ -include {FOLDMASON_CREATEDB } from '../../modules/local/foldmason_createdb.nf' -include {FOLDMASON_MSA2LDDTREPORT } from '../../modules/local/foldmason_msa2lddtreport.nf' +include {FOLDMASON_CREATEDB } from '../../modules/nf-core/foldmason/createdb/main' +include {FOLDMASON_MSA2LDDTREPORT } from '../../modules/nf-core/foldmason/msa2lddtreport/main' workflow VISUALIZATION { @@ -13,21 +13,20 @@ workflow VISUALIZATION { ch_versions = Channel.empty() ch_html = Channel.empty() + // Merge the msa and tree // split the msa meta to be able to merge with the tree meta ch_msa .map{ meta, file -> [meta.subMap(["id", "tree", "args_tree", "args_tree_clean"]), meta, file] } - .join( ch_trees, by:0, remainder: true) - .filter { it.size() == 4 } + .join(ch_trees, by: [0], remainder:true ).view() .map{ tree_meta, meta, msa, tree -> [meta.subMap(["id"]), meta, msa, tree] } - .join( ch_optional_data, by:0) + .cross( ch_optional_data) .set { ch_msa_tree_data } - // // FOLDMASON VISUALISATION // @@ -37,7 +36,7 @@ workflow VISUALIZATION { ) ch_msa_tree_data - .combine(FOLDMASON_CREATEDB.out.db, by:0) + .combine(FOLDMASON_CREATEDB.out.db.collect(), by:0) .multiMap{ id, meta, msafile, treefile, pdb, dbfiles -> msa: [meta, msafile] @@ -48,7 +47,6 @@ workflow VISUALIZATION { ch_msa_db_tree } - ch_msa_db_tree.msa.view() FOLDMASON_MSA2LDDTREPORT( ch_msa_db_tree.msa, ch_msa_db_tree.db, From 058ab34c418beae454d83ee3e2bebfb7a49fc8dd Mon Sep 17 00:00:00 2001 From: luisas Date: Wed, 11 Dec 2024 12:53:56 +0100 Subject: [PATCH 7/8] fix bug --- assets/samplesheet.csv | 2 +- subworkflows/local/visualization.nf | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index dda13367..cc478c4c 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,2 +1,2 @@ id,fasta,reference,optional_data -seatoxin-ref,,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz \ No newline at end of file +seatoxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz \ No newline at end of file diff --git a/subworkflows/local/visualization.nf b/subworkflows/local/visualization.nf index dec524a8..037a4e1b 100644 --- a/subworkflows/local/visualization.nf +++ b/subworkflows/local/visualization.nf @@ -13,19 +13,18 @@ workflow VISUALIZATION { ch_versions = Channel.empty() ch_html = Channel.empty() - // Merge the msa and tree // split the msa meta to be able to merge with the tree meta ch_msa .map{ meta, file -> [meta.subMap(["id", "tree", "args_tree", "args_tree_clean"]), meta, file] } - .join(ch_trees, by: [0], remainder:true ).view() + .join(ch_trees, by: [0], remainder:true ) .map{ tree_meta, meta, msa, tree -> [meta.subMap(["id"]), meta, msa, tree] } - .cross( ch_optional_data) - .set { ch_msa_tree_data } + .combine( ch_optional_data, by: [0]) + .set{ ch_msa_tree_data } // // FOLDMASON VISUALISATION From 3b01ff243310da28b8bd56cdf9a02e46bb2418b3 Mon Sep 17 00:00:00 2001 From: luisas Date: Wed, 11 Dec 2024 17:27:07 +0100 Subject: [PATCH 8/8] add --- assets/samplesheet.csv | 3 ++- assets/toolsheet.csv | 3 ++- bin/shiny_app/shiny_app.py | 6 +++--- conf/modules.config | 20 +++++++++---------- subworkflows/local/align.nf | 1 + .../main.nf | 11 +++++++++- subworkflows/local/visualization.nf | 2 ++ 7 files changed, 30 insertions(+), 16 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index cc478c4c..d8b90e2c 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,2 +1,3 @@ id,fasta,reference,optional_data -seatoxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz \ No newline at end of file +seatoxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz +toxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin.ref, \ No newline at end of file diff --git a/assets/toolsheet.csv b/assets/toolsheet.csv index a5f1b6e7..2ae15365 100644 --- a/assets/toolsheet.csv +++ b/assets/toolsheet.csv @@ -1,3 +1,4 @@ tree,args_tree,aligner,args_aligner ,,FOLDMASON, -FAMSA,,FAMSA, \ No newline at end of file +FAMSA,,FAMSA, +FAMSA,,FOLDMASON, \ No newline at end of file diff --git a/bin/shiny_app/shiny_app.py b/bin/shiny_app/shiny_app.py index 77c395ee..dd79529f 100644 --- a/bin/shiny_app/shiny_app.py +++ b/bin/shiny_app/shiny_app.py @@ -30,9 +30,9 @@ sys.exit(1) def merge_tree_args(row): - if str(row["tree"]) == "nan": + if str(row["tree"]) == "DEFAULT": return "None" - elif str(row["args_tree"]) == "nan": + elif str(row["args_tree"]) == "default": return str(row["tree"]) + " ()" else: return str(row["tree"]) + " (" + str(row["args_tree"]) + ")" @@ -42,7 +42,7 @@ def merge_tree_args(row): def merge_aligner_args(row): if str(row["aligner"]) == "nan": return "None" - elif str(row["args_aligner"]) == "nan": + elif str(row["args_aligner"]) == "default": return str(row["aligner"]) + " ()" else: return str(row["aligner"]) + " (" + str(row["args_aligner"]) + ")" diff --git a/conf/modules.config b/conf/modules.config index 6a7c1433..b026fd56 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -74,7 +74,7 @@ meta.args_tree ? "args: ${meta.args_tree}" : "" ].join(' ').trim() } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}" } ext.args = { "${meta.args_tree}" == "null" ? '' : "${meta.args_tree}" } publishDir = [ path: { "${params.outdir}/trees/${meta.id}" }, @@ -99,7 +99,7 @@ meta.args_aligner ? "args: ${meta.args_aligner}" : "" ].join(' ').trim() } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } ext.args = { "${meta.args_aligner}" == "null" ? '' : "${meta.args_aligner}" } if(params.skip_compression){ publishDir = [ @@ -119,7 +119,7 @@ meta.args_aligner ? "args: ${meta.args_aligner}" : "" ].join(' ').trim() } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } ext.args = { "${meta.args_aligner}" == "null" ? '' : "${meta.args_aligner}" } if(params.skip_compression){ publishDir = [ @@ -174,21 +174,21 @@ // withName: 'PARSE_IRMSD' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_irmsd" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_irmsd" } } withName: 'TCOFFEE_ALNCOMPARE_SP' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_sp" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_sp" } ext.args = "-compare_mode sp" } withName: 'TCOFFEE_ALNCOMPARE_TC' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_tc" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_tc" } ext.args = "-compare_mode tc" } withName: 'TCOFFEE_IRMSD' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_irmsd" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_irmsd" } publishDir = [ path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: params.publish_dir_mode, @@ -198,7 +198,7 @@ } withName: "CALC_GAPS" { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_gaps" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_gaps" } } withName: "CONCAT_IRMSD" { @@ -222,7 +222,7 @@ } withName: 'TCOFFEE_TCS' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_tcs" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_tcs" } publishDir = [ path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: params.publish_dir_mode, @@ -274,7 +274,7 @@ // Visualization // withName: 'FOLDMASON_MSA2LDDTREPORT' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } publishDir = [ path: { "${params.outdir}/visualization" }, mode: params.publish_dir_mode, diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 59538c5b..c8520618 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -49,6 +49,7 @@ workflow ALIGN { } .set { ch_tools_split } + ch_tools.view() // ------------------------------------------------ // Compute the required trees // ------------------------------------------------ diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index 12af446c..4c41253f 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -357,12 +357,21 @@ class Utils { // if clearnArgs is empty, return "" if (cleanArgs == null || cleanArgs == "") { - return "" + return "default" }else{ return cleanArgs } } + public static clean_tree(argsTree){ + + def tree = argsTree.toString() + if(tree == null || tree == "" || tree == "null"){ + return "DEFAULT" + } + return tree + } + public static fix_args(tool,args,tool_to_be_checked, required_flag, default_value) { /* This function checks if the required_flag is present in the args string for the tool_to_be_checked. diff --git a/subworkflows/local/visualization.nf b/subworkflows/local/visualization.nf index 037a4e1b..c2281650 100644 --- a/subworkflows/local/visualization.nf +++ b/subworkflows/local/visualization.nf @@ -13,6 +13,8 @@ workflow VISUALIZATION { ch_versions = Channel.empty() ch_html = Channel.empty() + + ch_msa.view() // Merge the msa and tree // split the msa meta to be able to merge with the tree meta ch_msa