diff --git a/.params_2024-12-02_11-11-58.json b/.params_2024-12-02_11-11-58.json new file mode 100644 index 00000000..538e9479 --- /dev/null +++ b/.params_2024-12-02_11-11-58.json @@ -0,0 +1,50 @@ +{ + "input": "https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/samplesheet/v1.0/samplesheet_test_af2.csv", + "tools": "https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/toolsheet/v1.0/toolsheet_small.csv", + "templates_suffix": ".pdb", + "optional_data_dir": null, + "build_consensus": true, + "skip_stats": false, + "calc_sim": true, + "calc_seq_stats": true, + "extract_plddt": true, + "skip_eval": false, + "calc_sp": true, + "calc_tc": true, + "calc_irmsd": true, + "calc_gaps": true, + "calc_tcs": true, + "skip_compression": false, + "multiqc_config": null, + "multiqc_title": null, + "multiqc_logo": null, + "max_multiqc_email_size": "25.MB", + "multiqc_methods_description": null, + "skip_multiqc": false, + "outdir": "results", + "publish_dir_mode": "copy", + "email": null, + "email_on_fail": null, + "plaintext_email": false, + "monochrome_logs": false, + "hook_url": null, + "help": false, + "help_full": false, + "show_hidden": false, + "version": false, + "pipelines_testdata_base_path": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "shiny_app": "/home/luisasantus/Desktop/multiplesequencealign/bin/shiny_app", + "skip_shiny": false, + "shiny_trace_mode": "latest", + "config_profile_name": "Full test profile", + "config_profile_description": "Full test dataset to check pipeline function", + "custom_config_version": "master", + "custom_config_base": "https://raw.githubusercontent.com/nf-core/configs/master", + "config_profile_contact": null, + "config_profile_url": null, + "validate_params": true, + "igenomes_ignore": true, + "genomes": { + + } +} \ No newline at end of file diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv new file mode 100644 index 00000000..d8b90e2c --- /dev/null +++ b/assets/samplesheet.csv @@ -0,0 +1,3 @@ +id,fasta,reference,optional_data +seatoxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz +toxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin.ref, \ No newline at end of file diff --git a/assets/toolsheet.csv b/assets/toolsheet.csv new file mode 100644 index 00000000..2ae15365 --- /dev/null +++ b/assets/toolsheet.csv @@ -0,0 +1,4 @@ +tree,args_tree,aligner,args_aligner +,,FOLDMASON, +FAMSA,,FAMSA, +FAMSA,,FOLDMASON, \ No newline at end of file diff --git a/bin/pdbs_to_fasta.py b/bin/pdbs_to_fasta.py new file mode 100755 index 00000000..5ad66d23 --- /dev/null +++ b/bin/pdbs_to_fasta.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +# read in multiple pdb files, extract the sequence and write to a fasta file +import sys +from Bio import PDB +from Bio.SeqUtils import seq1 + + +def pdb_to_fasta(pdb_file): + """ + Extract the sequence from a PDB file and format it in FASTA. + """ + parser = PDB.PDBParser(QUIET=True) + structure = parser.get_structure(pdb_file, pdb_file) + fasta_sequences = [] + file_id = pdb_file.rsplit(".", 1)[0] # Use the file name without extension as ID + + for model in structure: + for chain in model: + sequence = [] + for residue in chain: + if PDB.is_aa(residue, standard=True): + sequence.append(seq1(residue.resname)) + if sequence: + fasta_sequences.append(f">{file_id}\n{''.join(sequence)}") + return "\n".join(fasta_sequences) + +def main(): + pdb_files = sys.argv[1:] + for pdb_file in pdb_files: + fasta = pdb_to_fasta(pdb_file) + print(f"{fasta}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/bin/shiny_app/shiny_app.py b/bin/shiny_app/shiny_app.py index 77c395ee..dd79529f 100644 --- a/bin/shiny_app/shiny_app.py +++ b/bin/shiny_app/shiny_app.py @@ -30,9 +30,9 @@ sys.exit(1) def merge_tree_args(row): - if str(row["tree"]) == "nan": + if str(row["tree"]) == "DEFAULT": return "None" - elif str(row["args_tree"]) == "nan": + elif str(row["args_tree"]) == "default": return str(row["tree"]) + " ()" else: return str(row["tree"]) + " (" + str(row["args_tree"]) + ")" @@ -42,7 +42,7 @@ def merge_tree_args(row): def merge_aligner_args(row): if str(row["aligner"]) == "nan": return "None" - elif str(row["args_aligner"]) == "nan": + elif str(row["args_aligner"]) == "default": return str(row["aligner"]) + " ()" else: return str(row["aligner"]) + " (" + str(row["args_aligner"]) + ")" diff --git a/conf/modules.config b/conf/modules.config index daf1df3d..b026fd56 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -74,7 +74,7 @@ meta.args_tree ? "args: ${meta.args_tree}" : "" ].join(' ').trim() } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}" } ext.args = { "${meta.args_tree}" == "null" ? '' : "${meta.args_tree}" } publishDir = [ path: { "${params.outdir}/trees/${meta.id}" }, @@ -99,7 +99,7 @@ meta.args_aligner ? "args: ${meta.args_aligner}" : "" ].join(' ').trim() } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } ext.args = { "${meta.args_aligner}" == "null" ? '' : "${meta.args_aligner}" } if(params.skip_compression){ publishDir = [ @@ -119,7 +119,7 @@ meta.args_aligner ? "args: ${meta.args_aligner}" : "" ].join(' ').trim() } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } ext.args = { "${meta.args_aligner}" == "null" ? '' : "${meta.args_aligner}" } if(params.skip_compression){ publishDir = [ @@ -174,21 +174,21 @@ // withName: 'PARSE_IRMSD' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_irmsd" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_irmsd" } } withName: 'TCOFFEE_ALNCOMPARE_SP' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_sp" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_sp" } ext.args = "-compare_mode sp" } withName: 'TCOFFEE_ALNCOMPARE_TC' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_tc" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_tc" } ext.args = "-compare_mode tc" } withName: 'TCOFFEE_IRMSD' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_irmsd" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_irmsd" } publishDir = [ path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: params.publish_dir_mode, @@ -198,7 +198,7 @@ } withName: "CALC_GAPS" { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_gaps" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_gaps" } } withName: "CONCAT_IRMSD" { @@ -222,7 +222,7 @@ } withName: 'TCOFFEE_TCS' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_tcs" } + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_tcs" } publishDir = [ path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: params.publish_dir_mode, @@ -269,4 +269,16 @@ saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + // + // Visualization + // + withName: 'FOLDMASON_MSA2LDDTREPORT' { + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } + publishDir = [ + path: { "${params.outdir}/visualization" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/conf/test.config b/conf/test.config index a9e1502c..ee9cf66c 100644 --- a/conf/test.config +++ b/conf/test.config @@ -35,6 +35,6 @@ params { build_consensus = true // Input data - input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test_af2.csv' + input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.1/samplesheet_test_af2.csv' tools = params.pipelines_testdata_base_path + 'multiplesequencealign/toolsheet/v1.0/toolsheet_full.csv' } diff --git a/conf/test_full.config b/conf/test_full.config index 83154c7c..2ee5ec7e 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -13,7 +13,7 @@ process { resourceLimits = [ cpus: 4, memory: '15.GB', - time: 4.h' + time: '4.h' ] } @@ -36,6 +36,6 @@ params { build_consensus = true // Input data for full size test - input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_full.csv' + input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.1/samplesheet_full.csv' tools = params.pipelines_testdata_base_path + 'multiplesequencealign/toolsheet/v1.0/toolsheet_full.csv' } diff --git a/conf/test_parameters.config b/conf/test_parameters.config index 2eded35e..51e37ae0 100644 --- a/conf/test_parameters.config +++ b/conf/test_parameters.config @@ -25,6 +25,6 @@ params { skip_compression = false // Input data - input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test_af2.csv' + input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.1/samplesheet_test_af2.csv' tools = params.pipelines_testdata_base_path + 'multiplesequencealign/toolsheet/v1.0/toolsheet_full.csv' } diff --git a/conf/test_pdb.config b/conf/test_pdb.config index f0eb3fd6..f55ddf7f 100644 --- a/conf/test_pdb.config +++ b/conf/test_pdb.config @@ -24,14 +24,15 @@ params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' - skip_stats = true - calc_irmsd = true - calc_sp = false - calc_tc = false - calc_gaps = false - calc_tcs = false + skip_preprocessing = false + skip_stats = true + calc_irmsd = true + calc_sp = false + calc_tc = false + calc_gaps = false + calc_tcs = false // Input data - input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test.csv' + input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.1/samplesheet_test.csv' tools = params.pipelines_testdata_base_path + 'multiplesequencealign/toolsheet/v1.0/toolsheet_structural.csv' } diff --git a/conf/test_small.config b/conf/test_small.config index 7ee4fc78..956900a8 100644 --- a/conf/test_small.config +++ b/conf/test_small.config @@ -35,6 +35,6 @@ params { build_consensus = true // Input data for full size test - input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.0/samplesheet_test_af2.csv' + input = params.pipelines_testdata_base_path + 'multiplesequencealign/samplesheet/v1.1/samplesheet_test_af2.csv' tools = params.pipelines_testdata_base_path + 'multiplesequencealign/toolsheet/v1.0/toolsheet_small.csv' } diff --git a/modules.json b/modules.json index 3b7e8f53..7a9db7e2 100644 --- a/modules.json +++ b/modules.json @@ -8,139 +8,223 @@ "clustalo/align": { "branch": "master", "git_sha": "2a8530b890878747f5063a894bad9fb2abd5c071", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "clustalo/guidetree": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "csvtk/concat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "csvtk/join": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/csvtk/join/csvtk-join.diff" }, "famsa/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "famsa/guidetree": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, - "foldmason/easymsa": { + "fastavalidator": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "foldmason/createdb": { + "branch": "master", + "git_sha": "0270c0fbbbb09456d7823605e4285c4a2c5bbf40", + "installed_by": [ + "modules" + ] + }, + "foldmason/easymsa": { + "branch": "master", + "git_sha": "8541ec46706d6610b032748fa51acf4b3094ced8", + "installed_by": [ + "modules" + ], + "patch": "modules/nf-core/foldmason/easymsa/foldmason-easymsa.diff" + }, + "foldmason/msa2lddtreport": { + "branch": "master", + "git_sha": "d3555a4a33ae94269b65f79f7066ac2fcb836005", + "installed_by": [ + "modules" + ] }, "kalign/align": { "branch": "master", "git_sha": "cadb9bbfe56001ac421e0ee87808b0ccc754593a", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/kalign/align/kalign-align.diff" }, "learnmsa/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mafft": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mafft/align": { "branch": "master", "git_sha": "868cb0d7fc4862991fb7c2b4cd7289806cd53f81", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mafft/guidetree": { "branch": "master", "git_sha": "968b494e20f439a9ed3d23c89274e6a4a625eb92", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "magus/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mtmalign/align": { "branch": "master", "git_sha": "4eecd9a0c06fa508ae314c06ac952c161c019679", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "muscle5/super5": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pigz/compress": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pigz/uncompress": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/align": { "branch": "master", "git_sha": "66b22564bc1bc0db7292f2073cdef954ead773e7", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/alncompare": { "branch": "master", "git_sha": "ffa000ab3c33df25a165b5f9a039c4cbb665a77b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/consensus": { "branch": "master", "git_sha": "023e51187884ea6cc7290767486f551565f1b77a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "tcoffee/extractfrompdb": { + "branch": "master", + "git_sha": "1f94c91de2b9e9c6b42fca53e823cada9a8b8465", + "installed_by": [ + "modules" + ], + "patch": "modules/nf-core/tcoffee/extractfrompdb/tcoffee-extractfrompdb.diff" }, "tcoffee/irmsd": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/regressive": { "branch": "master", "git_sha": "66b22564bc1bc0db7292f2073cdef954ead773e7", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/seqreformat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tcoffee/tcs": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "upp/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -149,20 +233,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "1b89f75f1aa2021ec3360d0deccd0f6e97240551", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/local/custom_pdbtofasta.nf b/modules/local/custom_pdbtofasta.nf new file mode 100644 index 00000000..17bb256d --- /dev/null +++ b/modules/local/custom_pdbtofasta.nf @@ -0,0 +1,42 @@ +process CUSTOM_PDBSTOFASTA { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::python=3.11.0 conda-forge::biopython=1.80 conda-forge::pandas=1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' : + 'biocontainers/mulled-v2-27978155697a3671f3ef9aead4b5c823a02cc0b7:548df772fe13c0232a7eab1bc1deb98b495a05ab-0' }" + + + input: + tuple val(meta), path(structures) + + output: + tuple val (meta), path("${prefix}.fa"), emit: fasta + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + """ + pdbs_to_fasta.py ${structures} > ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastavalidator/environment.yml b/modules/nf-core/fastavalidator/environment.yml new file mode 100644 index 00000000..44d55c1f --- /dev/null +++ b/modules/nf-core/fastavalidator/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::py_fasta_validator=0.6" diff --git a/modules/nf-core/fastavalidator/main.nf b/modules/nf-core/fastavalidator/main.nf new file mode 100644 index 00000000..ac5470fb --- /dev/null +++ b/modules/nf-core/fastavalidator/main.nf @@ -0,0 +1,62 @@ +process FASTAVALIDATOR { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/py_fasta_validator:0.6--py37h595c7a6_0': + 'biocontainers/py_fasta_validator:0.6--py37h595c7a6_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.success.log') , emit: success_log , optional: true + tuple val(meta), path('*.error.log') , emit: error_log , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + py_fasta_validator \\ + -f $fasta \\ + 2> "${prefix}.error.log" \\ + || echo "Errors from fasta_validate printed to ${prefix}.error.log" + + if [ \$(cat "${prefix}.error.log" | wc -l) -gt 0 ]; then + echo "Validation failed..." + + cat \\ + "${prefix}.error.log" + else + echo "Validation successful..." + + mv \\ + "${prefix}.error.log" \\ + fasta_validate.stderr + + echo "Validation successful..." \\ + > "${prefix}.success.log" + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + py_fasta_validator: \$(py_fasta_validator -v | sed 's/.* version //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "Validation successful..." \\ + > "${prefix}.success.log" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + py_fasta_validator: \$(py_fasta_validator -v | sed 's/.* version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastavalidator/meta.yml b/modules/nf-core/fastavalidator/meta.yml new file mode 100644 index 00000000..94198e62 --- /dev/null +++ b/modules/nf-core/fastavalidator/meta.yml @@ -0,0 +1,61 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "fastavalidator" +description: | + "Python C-extension for a simple validator for fasta files. The module emits the validated file or an + error log upon validation failure." +keywords: + - fasta + - validation + - genome +tools: + - fasta_validate: + description: | + "Python C-extension for a simple C code to validate a fasta file. It only checks a few things, + and by default only sets its response via the return code, + so you will need to check that!" + homepage: "https://github.com/linsalrob/py_fasta_validator" + documentation: "https://github.com/linsalrob/py_fasta_validator" + tool_dev_url: "https://github.com/linsalrob/py_fasta_validator" + doi: "10.5281/zenodo.5002710" + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test' ] + - fasta: + type: file + description: Input fasta file + pattern: "*.fasta" +output: + - success_log: + - meta: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test' ] + - "*.success.log": + type: file + description: Log file for successful validation + pattern: "*.success.log" + - error_log: + - meta: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test' ] + - "*.error.log": + type: file + description: Log file for failed validation + pattern: "*.error.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@gallvp" +maintainers: + - "@gallvp" diff --git a/modules/nf-core/fastavalidator/tests/main.nf.test b/modules/nf-core/fastavalidator/tests/main.nf.test new file mode 100644 index 00000000..39b00d8b --- /dev/null +++ b/modules/nf-core/fastavalidator/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process FASTAVALIDATOR" + script "../main.nf" + process "FASTAVALIDATOR" + + tag "modules" + tag "modules_nfcore" + tag "fastavalidator" + + test("sarscov2-fasta-valid") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.success_log != null }, + { assert process.out.error_log == [] }, + { assert path(process.out.success_log.get(0).get(1)).getText().contains("Validation successful...") } + ) + } + + } + + test("sarscov2-gff3-invalid") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.success_log == [] }, + { assert process.out.error_log != null }, + { assert path(process.out.error_log.get(0).get(1)).getText().contains("genome.gff3 does not start with a >") } + ) + } + + } +} diff --git a/modules/nf-core/fastavalidator/tests/main.nf.test.snap b/modules/nf-core/fastavalidator/tests/main.nf.test.snap new file mode 100644 index 00000000..382dee72 --- /dev/null +++ b/modules/nf-core/fastavalidator/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "sarscov2-fasta-valid": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.success.log:md5,b0b859eda1db5cd43915846e00ebc22c" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ], + "error_log": [ + + ], + "success_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.success.log:md5,b0b859eda1db5cd43915846e00ebc22c" + ] + ], + "versions": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ] + } + ], + "timestamp": "2023-11-28T11:23:25.106872" + }, + "sarscov2-gff3-invalid": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.error.log:md5,531d520c0e7767176f743f197f1f87b3" + ] + ], + "2": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ], + "error_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.error.log:md5,531d520c0e7767176f743f197f1f87b3" + ] + ], + "success_log": [ + + ], + "versions": [ + "versions.yml:md5,05aa059840b3b4dd6d88bc1e4bf976d7" + ] + } + ], + "timestamp": "2023-11-28T11:23:29.40324" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastavalidator/tests/tags.yml b/modules/nf-core/fastavalidator/tests/tags.yml new file mode 100644 index 00000000..c3c77576 --- /dev/null +++ b/modules/nf-core/fastavalidator/tests/tags.yml @@ -0,0 +1,2 @@ +fastavalidator: + - "modules/nf-core/fastavalidator/**" diff --git a/modules/nf-core/foldmason/createdb/environment.yml b/modules/nf-core/foldmason/createdb/environment.yml new file mode 100644 index 00000000..80d4dd37 --- /dev/null +++ b/modules/nf-core/foldmason/createdb/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::foldmason=2.7bd21ed diff --git a/modules/nf-core/foldmason/createdb/main.nf b/modules/nf-core/foldmason/createdb/main.nf new file mode 100644 index 00000000..c54b45f3 --- /dev/null +++ b/modules/nf-core/foldmason/createdb/main.nf @@ -0,0 +1,47 @@ +process FOLDMASON_CREATEDB { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a8/a88d162c3f39a1518d48c3faec235e6fcde750586da868b62fc5f0a08a89aa9d/data' : + 'community.wave.seqera.io/library/foldmason:2.7bd21ed--e7f739473ad6578d' }" + + input: + tuple val(meta) , path(structures) + + output: + tuple val(meta), path("${prefix}*"), emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + foldmason createdb \\ + ${structures} \\ + ${prefix} \\ + $args \\ + --threads $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + foldmason: \$(foldmason | grep "foldmason Version:" | cut -d":" -f 2 | awk '{\$1=\$1;print}') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + foldmason: \$(foldmason | grep "foldmason Version:" | cut -d":" -f 2 | awk '{\$1=\$1;print}') + END_VERSIONS + """ +} diff --git a/modules/nf-core/foldmason/createdb/meta.yml b/modules/nf-core/foldmason/createdb/meta.yml new file mode 100644 index 00000000..fd47efe2 --- /dev/null +++ b/modules/nf-core/foldmason/createdb/meta.yml @@ -0,0 +1,48 @@ +name: "foldmason_createdb" +description: Creates a database for Foldmason. +keywords: + - alignment + - MSA + - genomics + - structure +tools: + - "foldmason": + description: "Multiple Protein Structure Alignment at Scale with FoldMason" + homepage: "https://github.com/steineggerlab/foldmason" + documentation: "https://github.com/steineggerlab/foldmason" + tool_dev_url: "https://github.com/steineggerlab/foldmason" + doi: "10.1101/2024.08.01.606130" + licence: ["GPL v3"] + identifier: biotools:foldmason + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - structures: + type: file + description: Input protein structures in `PDB` or `mmCIF` format. + pattern: "*.{pdb,mmcif}" + +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "${prefix}*": + type: file + description: All database files created by Foldmason + pattern: "*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" +maintainers: + - "@luisas" diff --git a/modules/nf-core/foldmason/createdb/tests/main.nf.test b/modules/nf-core/foldmason/createdb/tests/main.nf.test new file mode 100644 index 00000000..9ac567a7 --- /dev/null +++ b/modules/nf-core/foldmason/createdb/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + + name "Test Process FOLDMASON_CREATEDB" + script "../main.nf" + process "FOLDMASON_CREATEDB" + + tag "modules" + tag "modules_nfcore" + tag "foldmason" + tag "foldmason/createdb" + tag "untar" + + setup { + + run("UNTAR") { + script "../../../../../modules/nf-core/untar/main.nf" + process { + """ + archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/af2_structures/seatoxin-ref.tar.gz", checkIfExists: true) + input[0] = Channel.of(tuple([id:'test'], archive)) + """ + } + } + } + + test("seatoxin") { + + when { + + process { + """ + input[0] = UNTAR.out.untar.map { meta,dir -> [meta, file(dir).listFiles().collect()]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("seatoxin - stub ") { + + when { + + process { + """ + input[0] = UNTAR.out.untar.map { meta,dir -> [meta, file(dir).listFiles().collect()]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/foldmason/createdb/tests/main.nf.test.snap b/modules/nf-core/foldmason/createdb/tests/main.nf.test.snap new file mode 100644 index 00000000..dce5175d --- /dev/null +++ b/modules/nf-core/foldmason/createdb/tests/main.nf.test.snap @@ -0,0 +1,128 @@ +{ + "seatoxin - stub ": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "test:md5,941321067eae439b4a8ccf6425d84751", + "test.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015", + "test.lookup:md5,83047fddf9b6fbaa99f00cdf5a7dd274", + "test.source:md5,7968a6cb6577ead230c31910380af948", + "test_ca:md5,2738c516c1a15238a5c62c4ba6dee0b9", + "test_ca.dbtype:md5,3fd85f9ee7ca8882c8caa747d0eef0b3", + "test_ca.index:md5,cfdf544c3aa6d7e2034e4a01dac1d0ba", + "test_h:md5,ab9ce99a99fc6ba6a98c4460410b6a16", + "test_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2", + "test_h.index:md5,dc7c33ddb6a3dc54ad033120ef4c9af4", + "test_ss:md5,75d329b63c0383c3e43090ba89238e14", + "test_ss.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test_ss.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015" + ] + ] + ], + "1": [ + "versions.yml:md5,6ebe56979a45b356d374cfc65c8a2b45" + ], + "db": [ + [ + { + "id": "test" + }, + [ + "test:md5,941321067eae439b4a8ccf6425d84751", + "test.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015", + "test.lookup:md5,83047fddf9b6fbaa99f00cdf5a7dd274", + "test.source:md5,7968a6cb6577ead230c31910380af948", + "test_ca:md5,2738c516c1a15238a5c62c4ba6dee0b9", + "test_ca.dbtype:md5,3fd85f9ee7ca8882c8caa747d0eef0b3", + "test_ca.index:md5,cfdf544c3aa6d7e2034e4a01dac1d0ba", + "test_h:md5,ab9ce99a99fc6ba6a98c4460410b6a16", + "test_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2", + "test_h.index:md5,dc7c33ddb6a3dc54ad033120ef4c9af4", + "test_ss:md5,75d329b63c0383c3e43090ba89238e14", + "test_ss.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test_ss.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015" + ] + ] + ], + "versions": [ + "versions.yml:md5,6ebe56979a45b356d374cfc65c8a2b45" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-09T15:11:27.426024133" + }, + "seatoxin": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "test:md5,941321067eae439b4a8ccf6425d84751", + "test.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015", + "test.lookup:md5,83047fddf9b6fbaa99f00cdf5a7dd274", + "test.source:md5,7968a6cb6577ead230c31910380af948", + "test_ca:md5,2738c516c1a15238a5c62c4ba6dee0b9", + "test_ca.dbtype:md5,3fd85f9ee7ca8882c8caa747d0eef0b3", + "test_ca.index:md5,cfdf544c3aa6d7e2034e4a01dac1d0ba", + "test_h:md5,ab9ce99a99fc6ba6a98c4460410b6a16", + "test_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2", + "test_h.index:md5,dc7c33ddb6a3dc54ad033120ef4c9af4", + "test_ss:md5,75d329b63c0383c3e43090ba89238e14", + "test_ss.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test_ss.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015" + ] + ] + ], + "1": [ + "versions.yml:md5,6ebe56979a45b356d374cfc65c8a2b45" + ], + "db": [ + [ + { + "id": "test" + }, + [ + "test:md5,941321067eae439b4a8ccf6425d84751", + "test.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015", + "test.lookup:md5,83047fddf9b6fbaa99f00cdf5a7dd274", + "test.source:md5,7968a6cb6577ead230c31910380af948", + "test_ca:md5,2738c516c1a15238a5c62c4ba6dee0b9", + "test_ca.dbtype:md5,3fd85f9ee7ca8882c8caa747d0eef0b3", + "test_ca.index:md5,cfdf544c3aa6d7e2034e4a01dac1d0ba", + "test_h:md5,ab9ce99a99fc6ba6a98c4460410b6a16", + "test_h.dbtype:md5,740bab4f9ec8808aedb68d6b1281aeb2", + "test_h.index:md5,dc7c33ddb6a3dc54ad033120ef4c9af4", + "test_ss:md5,75d329b63c0383c3e43090ba89238e14", + "test_ss.dbtype:md5,f1d3ff8443297732862df21dc4e57262", + "test_ss.index:md5,8d3c65fcda8b216fff2f1eb2c4dcc015" + ] + ] + ], + "versions": [ + "versions.yml:md5,6ebe56979a45b356d374cfc65c8a2b45" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-09T15:11:15.375341633" + } +} \ No newline at end of file diff --git a/modules/nf-core/foldmason/easymsa/foldmason-easymsa.diff b/modules/nf-core/foldmason/easymsa/foldmason-easymsa.diff new file mode 100644 index 00000000..eed347f6 --- /dev/null +++ b/modules/nf-core/foldmason/easymsa/foldmason-easymsa.diff @@ -0,0 +1,50 @@ +Changes in module 'nf-core/foldmason/easymsa' +Changes in 'foldmason/easymsa/main.nf': +--- modules/nf-core/foldmason/easymsa/main.nf ++++ modules/nf-core/foldmason/easymsa/main.nf +@@ -12,7 +12,7 @@ + + output: + tuple val(meta), path("${prefix}_3di.fa${compress ? '.gz' : ''}"), emit: msa_3di +- tuple val(meta), path("${prefix}_aa.fa${compress ? '.gz' : ''}") , emit: msa_aa ++ tuple val(meta), path("${prefix}.fa${compress ? '.gz' : ''}") , emit: msa_aa + path "versions.yml" , emit: versions + + when: +@@ -33,7 +33,7 @@ + + if ${compress}; then + pigz -p ${task.cpus} ${prefix}_3di.fa +- pigz -p ${task.cpus} ${prefix}_aa.fa ++ pigz -p ${task.cpus} ${prefix}.fa + fi + + cat <<-END_VERSIONS > versions.yml +@@ -48,7 +48,7 @@ + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" ${compress ? '| gzip' : ''} > ${prefix}_3di.fa${compress ? '.gz' : ''} +- echo "" ${compress ? '| gzip' : ''} > ${prefix}_aa.fa${compress ? '.gz' : ''} ++ echo "" ${compress ? '| gzip' : ''} > ${prefix}.fa${compress ? '.gz' : ''} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +Changes in 'foldmason/easymsa/meta.yml': +--- modules/nf-core/foldmason/easymsa/meta.yml ++++ modules/nf-core/foldmason/easymsa/meta.yml +@@ -58,7 +58,7 @@ + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` +- - "${prefix}_aa.fa${compress ? '.gz' : ''}": ++ - "${prefix}.fa${compress ? '.gz' : ''}": + type: file + description: Fasta file containing the multiple sequence alignment with Amino + Acid alphabet + +'modules/nf-core/foldmason/easymsa/environment.yml' is unchanged +'modules/nf-core/foldmason/easymsa/tests/main.nf.test.snap' is unchanged +'modules/nf-core/foldmason/easymsa/tests/tags.yml' is unchanged +'modules/nf-core/foldmason/easymsa/tests/main.nf.test' is unchanged +************************************************************ diff --git a/modules/nf-core/foldmason/easymsa/main.nf b/modules/nf-core/foldmason/easymsa/main.nf index a1e4e910..b3357364 100644 --- a/modules/nf-core/foldmason/easymsa/main.nf +++ b/modules/nf-core/foldmason/easymsa/main.nf @@ -3,15 +3,16 @@ process FOLDMASON_EASYMSA { label 'process_medium' conda "${moduleDir}/environment.yml" - container "community.wave.seqera.io/library/foldmason_pigz:97b3311addb0f4a7" + container "community.wave.seqera.io/library/foldmason_pigz:54849036d93c89ed" input: - tuple val(meta), path(pdbs) + tuple val(meta) , path(pdbs) + tuple val(meta2), path(tree) val(compress) output: tuple val(meta), path("${prefix}_3di.fa${compress ? '.gz' : ''}"), emit: msa_3di - tuple val(meta), path("${prefix}_aa.fa${compress ? '.gz' : ''}") , emit: msa_aa + tuple val(meta), path("${prefix}.fa${compress ? '.gz' : ''}") , emit: msa_aa path "versions.yml" , emit: versions when: @@ -20,17 +21,21 @@ process FOLDMASON_EASYMSA { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" + def options_tree = tree ? "--guide-tree $tree" : "" """ foldmason easy-msa \\ - $args \\ - --threads $task.cpus \\ ${pdbs} \\ ${prefix} \\ - tmp + tmp \\ + ${options_tree} \\ + $args \\ + --threads $task.cpus + + mv ${prefix}_aa.fa ${prefix}.fa if ${compress}; then pigz -p ${task.cpus} ${prefix}_3di.fa - pigz -p ${task.cpus} ${prefix}_aa.fa + pigz -p ${task.cpus} ${prefix}.fa fi cat <<-END_VERSIONS > versions.yml @@ -45,7 +50,7 @@ process FOLDMASON_EASYMSA { prefix = task.ext.prefix ?: "${meta.id}" """ echo "" ${compress ? '| gzip' : ''} > ${prefix}_3di.fa${compress ? '.gz' : ''} - echo "" ${compress ? '| gzip' : ''} > ${prefix}_aa.fa${compress ? '.gz' : ''} + echo "" ${compress ? '| gzip' : ''} > ${prefix}.fa${compress ? '.gz' : ''} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/foldmason/easymsa/meta.yml b/modules/nf-core/foldmason/easymsa/meta.yml index cc00f44c..90322559 100644 --- a/modules/nf-core/foldmason/easymsa/meta.yml +++ b/modules/nf-core/foldmason/easymsa/meta.yml @@ -26,6 +26,15 @@ input: type: file description: Input protein structures in PDB format. pattern: "*.{pdb,mmcif}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - tree: + type: file + description: Input guide tree in Newick format. + pattern: "*.{dnd,nwk}" - - compress: type: boolean description: Flag representing whether the output MSA should be compressed. @@ -49,7 +58,7 @@ output: description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - "${prefix}_aa.fa${compress ? '.gz' : ''}": + - "${prefix}.fa${compress ? '.gz' : ''}": type: file description: Fasta file containing the multiple sequence alignment with Amino Acid alphabet diff --git a/modules/nf-core/foldmason/easymsa/tests/main.nf.test b/modules/nf-core/foldmason/easymsa/tests/main.nf.test index 663c6975..d6db9bca 100644 --- a/modules/nf-core/foldmason/easymsa/tests/main.nf.test +++ b/modules/nf-core/foldmason/easymsa/tests/main.nf.test @@ -9,20 +9,35 @@ nextflow_process { tag "foldmason" tag "foldmason/easymsa" tag "untar" + tag "famsa" + tag "famsa/guidetree" + + setup { + + run("UNTAR") { + script "../../../../../modules/nf-core/untar/main.nf" + process { + """ + archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/af2_structures/seatoxin-ref.tar.gz", checkIfExists: true) + input[0] = Channel.of(tuple([id:'test'], archive)) + """ + } + } - test("Test on seatoxin dataset - uncompressed") { - setup { - - run("UNTAR") { - script "../../../../../modules/nf-core/untar/main.nf" - process { - """ - archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) - input[0] = Channel.of(tuple([id:'test'], archive)) - """ - } + run("FAMSA_GUIDETREE") { + script "../../../../../modules/nf-core/famsa/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test_tree' ], // meta map + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ } } + } + + + test("Test on seatoxin dataset - uncompressed") { when { params { @@ -30,7 +45,8 @@ nextflow_process { process { """ input[0] = UNTAR.out.untar.map { meta,dir -> [meta, file(dir).listFiles().collect()]} - input[1] = false + input[1] = [[:],[]] + input[2] = false """ } } @@ -44,26 +60,37 @@ nextflow_process { } test("Test on seatoxin dataset - compressed") { - setup { - - run("UNTAR") { - script "../../../../../modules/nf-core/untar/main.nf" - process { - """ - archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) - input[0] = Channel.of(tuple([id:'test'], archive)) - """ - } + + when { + params { + } + process { + """ + input[0] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file(dir).listFiles().collect()]} + input[1] = [[:],[]] + input[2] = true + """ } } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Test on seatoxin dataset - guide_tree") { + when { params { } process { """ input[0] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file(dir).listFiles().collect()]} - input[1] = true + input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_tree'], tree]} + input[2] = false """ } } @@ -79,26 +106,14 @@ nextflow_process { test("Stub run") { options "-stub" - setup { - - run("UNTAR") { - script "../../../../../modules/nf-core/untar/main.nf" - process { - """ - archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) - input[0] = Channel.of(tuple([id:'test'], archive)) - """ - } - } - } - when { params { } process { """ input[0] = UNTAR.out.untar.map { meta,dir -> [meta, file(dir).listFiles().collect()]} - input[1] = false + input[1] = [[:],[]] + input[2] = false """ } } diff --git a/modules/nf-core/foldmason/easymsa/tests/main.nf.test.snap b/modules/nf-core/foldmason/easymsa/tests/main.nf.test.snap index 50af9b7f..384d2021 100644 --- a/modules/nf-core/foldmason/easymsa/tests/main.nf.test.snap +++ b/modules/nf-core/foldmason/easymsa/tests/main.nf.test.snap @@ -42,7 +42,11 @@ ] } ], - "timestamp": "2024-08-28T09:10:12.591561643" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T10:56:12.836231763" }, "Test on seatoxin dataset - uncompressed": { "content": [ @@ -52,7 +56,7 @@ { "id": "test" }, - "test_3di.fa:md5,ba1b6c8c5df11bdebfea12070bd9cb17" + "test_3di.fa:md5,e7da437c6ddb5ced60ff7c49b147a65d" ] ], "1": [ @@ -60,7 +64,7 @@ { "id": "test" }, - "test_aa.fa:md5,33e93479603115b46ef76af3f6a20cf1" + "test_aa.fa:md5,2d90cd080424db8024e6c404c1cfc0b4" ] ], "2": [ @@ -71,7 +75,7 @@ { "id": "test" }, - "test_3di.fa:md5,ba1b6c8c5df11bdebfea12070bd9cb17" + "test_3di.fa:md5,e7da437c6ddb5ced60ff7c49b147a65d" ] ], "msa_aa": [ @@ -79,7 +83,7 @@ { "id": "test" }, - "test_aa.fa:md5,33e93479603115b46ef76af3f6a20cf1" + "test_aa.fa:md5,2d90cd080424db8024e6c404c1cfc0b4" ] ], "versions": [ @@ -87,7 +91,11 @@ ] } ], - "timestamp": "2024-08-28T08:58:09.52040475" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T10:55:41.89060384" }, "Test on seatoxin dataset - compressed": { "content": [ @@ -97,7 +105,7 @@ { "id": "test" }, - "test_3di.fa.gz:md5,ba1b6c8c5df11bdebfea12070bd9cb17" + "test_3di.fa.gz:md5,e7da437c6ddb5ced60ff7c49b147a65d" ] ], "1": [ @@ -105,7 +113,7 @@ { "id": "test" }, - "test_aa.fa.gz:md5,33e93479603115b46ef76af3f6a20cf1" + "test_aa.fa.gz:md5,2d90cd080424db8024e6c404c1cfc0b4" ] ], "2": [ @@ -116,7 +124,7 @@ { "id": "test" }, - "test_3di.fa.gz:md5,ba1b6c8c5df11bdebfea12070bd9cb17" + "test_3di.fa.gz:md5,e7da437c6ddb5ced60ff7c49b147a65d" ] ], "msa_aa": [ @@ -124,7 +132,7 @@ { "id": "test" }, - "test_aa.fa.gz:md5,33e93479603115b46ef76af3f6a20cf1" + "test_aa.fa.gz:md5,2d90cd080424db8024e6c404c1cfc0b4" ] ], "versions": [ @@ -132,6 +140,59 @@ ] } ], - "timestamp": "2024-08-29T13:44:02.750191" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T10:55:52.135344443" + }, + "Test on seatoxin dataset - guide_tree": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_3di.fa:md5,46fa911158bb736c054dfad0378832b4" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_aa.fa:md5,7ada48f0152342787a46505b9e8a2fae" + ] + ], + "2": [ + "versions.yml:md5,da4694171d1b0bb9559f7049334126ed" + ], + "msa_3di": [ + [ + { + "id": "test" + }, + "test_3di.fa:md5,46fa911158bb736c054dfad0378832b4" + ] + ], + "msa_aa": [ + [ + { + "id": "test" + }, + "test_aa.fa:md5,7ada48f0152342787a46505b9e8a2fae" + ] + ], + "versions": [ + "versions.yml:md5,da4694171d1b0bb9559f7049334126ed" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-04T10:56:02.473496089" } } \ No newline at end of file diff --git a/modules/nf-core/foldmason/msa2lddtreport/environment.yml b/modules/nf-core/foldmason/msa2lddtreport/environment.yml new file mode 100644 index 00000000..80d4dd37 --- /dev/null +++ b/modules/nf-core/foldmason/msa2lddtreport/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::foldmason=2.7bd21ed diff --git a/modules/nf-core/foldmason/msa2lddtreport/main.nf b/modules/nf-core/foldmason/msa2lddtreport/main.nf new file mode 100644 index 00000000..e07a7552 --- /dev/null +++ b/modules/nf-core/foldmason/msa2lddtreport/main.nf @@ -0,0 +1,52 @@ +process FOLDMASON_MSA2LDDTREPORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a8/a88d162c3f39a1518d48c3faec235e6fcde750586da868b62fc5f0a08a89aa9d/data' : + 'community.wave.seqera.io/library/foldmason:2.7bd21ed--e7f739473ad6578d' }" + input: + tuple val(meta) , path(msa) + tuple val(meta2), path(db) + tuple val(meta3), path(pdbs) + tuple val(meta4), path(tree) + + output: + tuple val(meta), path("${prefix}.html"), emit: html + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def options_tree = tree ? "--guide-tree $tree" : "" + """ + foldmason msa2lddtreport \\ + ${meta.id} \\ + ${msa} \\ + ${prefix}.html \\ + $args \\ + ${options_tree} \\ + --threads $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + foldmason: \$(foldmason | grep "foldmason Version:" | cut -d":" -f 2 | awk '{\$1=\$1;print}') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + foldmason: \$(foldmason | grep "foldmason Version:" | cut -d":" -f 2 | awk '{\$1=\$1;print}') + END_VERSIONS + """ +} diff --git a/modules/nf-core/foldmason/msa2lddtreport/meta.yml b/modules/nf-core/foldmason/msa2lddtreport/meta.yml new file mode 100644 index 00000000..cf9749ac --- /dev/null +++ b/modules/nf-core/foldmason/msa2lddtreport/meta.yml @@ -0,0 +1,74 @@ +name: "foldmason_msa2lddtreport" +description: Renders a visualization report using foldmason +keywords: + - alignment + - MSA + - genomics + - structure +tools: + - "foldmason": + description: "Multiple Protein Structure Alignment at Scale with FoldMason" + homepage: "https://github.com/steineggerlab/foldmason" + documentation: "https://github.com/steineggerlab/foldmason" + tool_dev_url: "https://github.com/steineggerlab/foldmason" + doi: "10.1101/2024.08.01.606130" + licence: ["GPL v3"] + identifier: biotools:foldmason + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - msa: + type: file + description: Input alignment file. + pattern: "*.{fa,fasta,aln}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - db: + type: file + description: Input foldmason database. + pattern: "*" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - pdbs: + type: file + description: Protein structures used for the visualization. + pattern: "*.{pdb}" + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - tree: + type: file + description: Guide tree used for the visualization . + pattern: "*.{nwk,dnd}" +output: + - html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "${prefix}.html": + type: file + description: HTML file with the foldmason visualization + pattern: "*.{html}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" +maintainers: + - "@luisas" diff --git a/modules/nf-core/foldmason/msa2lddtreport/tests/main.nf.test b/modules/nf-core/foldmason/msa2lddtreport/tests/main.nf.test new file mode 100644 index 00000000..b92c05d0 --- /dev/null +++ b/modules/nf-core/foldmason/msa2lddtreport/tests/main.nf.test @@ -0,0 +1,101 @@ +nextflow_process { + + name "Test Process FOLDMASON_MSA2LDDTREPORT" + script "../main.nf" + process "FOLDMASON_MSA2LDDTREPORT" + + tag "modules" + tag "modules_nfcore" + tag "foldmason" + tag "foldmason/msa2lddtreport" + tag "foldmason/createdb" + tag "untar" + tag "famsa/guidetree" + + + setup{ + run("UNTAR") { + script "../../../../../modules/nf-core/untar/main.nf" + process { + """ + archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/af2_structures/seatoxin-ref.tar.gz", checkIfExists: true) + input[0] = Channel.of(tuple([id:'test'], archive)) + """ + } + } + + run("FAMSA_GUIDETREE") { + script "../../../../../modules/nf-core/famsa/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test_tree' ], // meta map + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + run("FOLDMASON_CREATEDB") { + script "../../../../../modules/nf-core/foldmason/createdb/main.nf" + process { + """ + input[0] = UNTAR.out.untar.map { meta,dir -> [meta, file(dir).listFiles().collect()]} + """ + } + } + + + } + + test("seatoxin") { + + + when { + process { + """ + input[0] =[ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + ] + input[1] = FOLDMASON_CREATEDB.out.db.collect{ meta, db -> db }.map{ db -> [[ id: 'test'], db]} + input[2] = UNTAR.out.untar.map { meta,dir -> [meta, file(dir).listFiles().collect()]} + input[3] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_tree'], tree]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("seatixin - stub") { + + options "-stub" + + when { + process { + """ + input[0] =[ [ id:'test_tree' ], // meta map + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + ] + input[1] = FOLDMASON_CREATEDB.out.db.collect{ meta, db -> db }.map{ db -> [[ id: 'test'], db]} + input[2] = UNTAR.out.untar.map { meta,dir -> [meta, file(dir).listFiles().collect()]} + input[3] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_tree'], tree]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/foldmason/msa2lddtreport/tests/main.nf.test.snap b/modules/nf-core/foldmason/msa2lddtreport/tests/main.nf.test.snap new file mode 100644 index 00000000..da11cfad --- /dev/null +++ b/modules/nf-core/foldmason/msa2lddtreport/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "seatixin - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_tree" + }, + "test_tree.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,ed922d0faa7a3e3c8171a05296939468" + ], + "html": [ + [ + { + "id": "test_tree" + }, + "test_tree.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ed922d0faa7a3e3c8171a05296939468" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T09:36:23.360594258" + }, + "seatoxin": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.html:md5,7a90a8e674dc45ce4181498b8f53b519" + ] + ], + "1": [ + "versions.yml:md5,ed922d0faa7a3e3c8171a05296939468" + ], + "html": [ + [ + { + "id": "test" + }, + "test.html:md5,7a90a8e674dc45ce4181498b8f53b519" + ] + ], + "versions": [ + "versions.yml:md5,ed922d0faa7a3e3c8171a05296939468" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T09:54:03.711567262" + } +} \ No newline at end of file diff --git a/modules/nf-core/tcoffee/extractfrompdb/environment.yml b/modules/nf-core/tcoffee/extractfrompdb/environment.yml new file mode 100644 index 00000000..7cc504fc --- /dev/null +++ b/modules/nf-core/tcoffee/extractfrompdb/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::t-coffee=13.46.0.919e8c6b diff --git a/modules/nf-core/tcoffee/extractfrompdb/main.nf b/modules/nf-core/tcoffee/extractfrompdb/main.nf new file mode 100644 index 00000000..919ba381 --- /dev/null +++ b/modules/nf-core/tcoffee/extractfrompdb/main.nf @@ -0,0 +1,52 @@ +process TCOFFEE_EXTRACTFROMPDB { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/t-coffee:13.46.0.919e8c6b--hfc96bf3_0': + 'biocontainers/t-coffee:13.46.0.919e8c6b--hfc96bf3_0' }" + + input: + tuple val(meta), path(pdb) + + output: + tuple val(meta), path("${pdb.baseName}.pdb"), emit: formatted_pdb + path "versions.yml" , emit: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + export TEMP='./' + mkdir old + mv ${pdb} old/ + t_coffee -other_pg extract_from_pdb \ + -infile old/${pdb} \ + $args \ + > "${pdb.baseName}.pdb" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + # Otherwise, tcoffee will crash when calling its version + export TEMP='./' + touch "${pdb.baseName}.pdb" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tcoffee/extractfrompdb/meta.yml b/modules/nf-core/tcoffee/extractfrompdb/meta.yml new file mode 100644 index 00000000..107eabe1 --- /dev/null +++ b/modules/nf-core/tcoffee/extractfrompdb/meta.yml @@ -0,0 +1,43 @@ +name: "tcoffee_extractfrompdb" +description: Reformats the header of PDB files with t-coffee +keywords: + - reformatting + - pdb + - genomics +tools: + - "tcoffee": + description: "A collection of tools for Computing, Evaluating and Manipulating + Multiple Alignments of DNA, RNA, Protein Sequences and Structures." + homepage: "http://www.tcoffee.org/Projects/tcoffee/" + documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" + tool_dev_url: "https://github.com/cbcrg/tcoffee" + doi: "10.1006/jmbi.2000.4042" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - pdb: + type: file + description: Input pdb to be reformatted +output: + - formatted_pdb: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - ${prefix}.pdb: + type: file + description: Formatted pdb file + pattern: "*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@luisas" diff --git a/modules/nf-core/tcoffee/extractfrompdb/tcoffee-extractfrompdb.diff b/modules/nf-core/tcoffee/extractfrompdb/tcoffee-extractfrompdb.diff new file mode 100644 index 00000000..302657ef --- /dev/null +++ b/modules/nf-core/tcoffee/extractfrompdb/tcoffee-extractfrompdb.diff @@ -0,0 +1,43 @@ +Changes in module 'nf-core/tcoffee/extractfrompdb' +Changes in 'tcoffee/extractfrompdb/main.nf': +--- modules/nf-core/tcoffee/extractfrompdb/main.nf ++++ modules/nf-core/tcoffee/extractfrompdb/main.nf +@@ -11,7 +11,7 @@ + tuple val(meta), path(pdb) + + output: +- tuple val(meta), path("${prefix}.pdb"), emit: formatted_pdb ++ tuple val(meta), path("${pdb.baseName}.pdb"), emit: formatted_pdb + path "versions.yml" , emit: versions + + +@@ -23,10 +23,12 @@ + prefix = task.ext.prefix ?: "${meta.id}" + """ + export TEMP='./' ++ mkdir old ++ mv ${pdb} old/ + t_coffee -other_pg extract_from_pdb \ +- -infile ${pdb} \ ++ -infile old/${pdb} \ + $args \ +- > "${prefix}.pdb" ++ > "${pdb.baseName}.pdb" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": +@@ -40,7 +42,7 @@ + """ + # Otherwise, tcoffee will crash when calling its version + export TEMP='./' +- touch "${prefix}.pdb" ++ touch "${pdb.baseName}.pdb" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +'modules/nf-core/tcoffee/extractfrompdb/meta.yml' is unchanged +'modules/nf-core/tcoffee/extractfrompdb/environment.yml' is unchanged +'modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test.snap' is unchanged +'modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test' is unchanged +************************************************************ diff --git a/modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test b/modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test new file mode 100644 index 00000000..939ff19a --- /dev/null +++ b/modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_process { + + name "Test Process TCOFFEE_EXTRACTFROMPDB" + script "../main.nf" + process "TCOFFEE_EXTRACTFROMPDB" + + tag "modules" + tag "modules_nfcore" + tag "tcoffee" + tag "tcoffee/extractfrompdb" + tag "untar" + + setup { + + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) + ] + + """ + } + } + } + + test("seatoxin ") { + + when { + process { + """ + input[0] = UNTAR.out.untar.collect{ meta, dir -> file(dir).listFiles().collect().first() }.map{ pdb -> [[ id: 'test'], pdb]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("seatoxin -stub ") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.fromPath('empty_file.pdb').map{ pdb -> [[ id: 'test'], pdb]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + +} diff --git a/modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test.snap b/modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test.snap new file mode 100644 index 00000000..5c6ae6dc --- /dev/null +++ b/modules/nf-core/tcoffee/extractfrompdb/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "seatoxin -stub ": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.pdb:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,3ee943561db268aeeb8ebae110debd83" + ], + "formatted_pdb": [ + [ + { + "id": "test" + }, + "test.pdb:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3ee943561db268aeeb8ebae110debd83" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-09T09:23:26.228225523" + }, + "seatoxin ": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.pdb:md5,f4d68827f3a77d8439a6f82036a0bda2" + ] + ], + "1": [ + "versions.yml:md5,3ee943561db268aeeb8ebae110debd83" + ], + "formatted_pdb": [ + [ + { + "id": "test" + }, + "test.pdb:md5,f4d68827f3a77d8439a6f82036a0bda2" + ] + ], + "versions": [ + "versions.yml:md5,3ee943561db268aeeb8ebae110debd83" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-09T09:23:07.991961475" + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index ddeae18f..9858e9b9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,6 +15,9 @@ params { templates_suffix = ".pdb" optional_data_dir = null + // Preprocessing + skip_preprocessing = false + // Alignment build_consensus = false @@ -61,6 +64,9 @@ params { skip_shiny = false shiny_trace_mode = "latest" // all, latest + // Visualisation options + skip_visualisation = false + // Config options config_profile_name = null config_profile_description = null diff --git a/nextflow_schema.json b/nextflow_schema.json index a64a9b96..801909a5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -48,6 +48,12 @@ "help_text": "This is a folder with all the optional data files to be used in the pipeline.", "fa_icon": "fas fa-folder-open" }, + "skip_preprocessing": { + "type": "boolean", + "description": "Skip the preprocessing step for the input files.", + "fa_icon": "fas fa-fast-forward", + "help_text": "Skip the preprocessing step and use the files directly." + }, "outdir": { "type": "string", "format": "directory-path", @@ -203,6 +209,11 @@ "shiny_trace_mode": { "type": "string", "description": "variable containing the shiny_trace mode to be used." + }, + "skip_visualisation": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip the visualization generation." } } }, diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 9a98fb46..c8520618 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -49,11 +49,13 @@ workflow ALIGN { } .set { ch_tools_split } + ch_tools.view() // ------------------------------------------------ // Compute the required trees // ------------------------------------------------ COMPUTE_TREES ( ch_fastas, + ch_optional_data, ch_tools_split.tree.unique() ) trees = COMPUTE_TREES.out.trees @@ -68,9 +70,15 @@ workflow ALIGN { // ------------------------------------------------ // Add back trees to the fasta channel + // And prepare the inout channels for the aligners // ------------------------------------------------ + + // Tools that accept sequence and tree ch_fasta_tools .join(trees, by: [0], remainder:true ) + .filter{ + it[1] != null + } .map { metafasta_tree, metaalign, fasta, tree -> [ metafasta_tree + metaalign, fasta, tree ] @@ -95,6 +103,8 @@ workflow ALIGN { } .set { ch_fasta_trees } + + // tools that accept only optional data ch_optional_data.combine(ch_tools) .map { metadependency, template, dependency, metatree, metaalign -> @@ -102,10 +112,31 @@ workflow ALIGN { } .branch { mtmalign: it[0]["aligner"] == "MTMALIGN" - foldmason: it[0]["aligner"] == "FOLDMASON" } .set { ch_optional_data_tools } + + // tools that accept optional data and tree + ch_optional_data.combine(ch_tools) + .map { + metadependency, template, dependency, metatree, metaalign -> + [ metadependency+metatree , metaalign, template, dependency ] + } + .join(trees, by: 0, remainder: true) + .filter{ + it.size() == 5 + } + .map { + metratreeanddep, metaalign, template, dependency, tree -> + tree ? [ metratreeanddep+metaalign, tree, template, dependency ]:[ metratreeanddep+metaalign, [], template, dependency ] + } + .branch { + foldmason: it[0]["aligner"] == "FOLDMASON" + } + .set { ch_optional_data_tools_tree } + + + // ------------------------------------------------ // Compute the alignments // ------------------------------------------------ @@ -283,7 +314,7 @@ workflow ALIGN { ch_msa = ch_msa.mix(UPP_ALIGN.out.alignment) ch_versions = ch_versions.mix(UPP_ALIGN.out.versions.first()) - // 2. SEQUENCE + STRUCTURE BASED + // // 2. SEQUENCE + STRUCTURE BASED if(params.templates_suffix == ".pdb"){ // ----------------- 3DCOFFEE ------------------ @@ -324,28 +355,36 @@ workflow ALIGN { ch_msa = ch_msa.mix(MTMALIGN_ALIGN.out.alignment) ch_versions = ch_versions.mix(MTMALIGN_ALIGN.out.versions.first()) - ch_optional_data_tools.foldmason + + // ----------------- FOLDMASON ------------------ + + ch_optional_data_tools_tree.foldmason .multiMap { - meta, template, dependency -> - pdbs: [ meta, dependency ] + meta, tree, template, dependency -> + pdbs: [ meta, dependency ] + trees: [ meta, tree ] } .set { ch_pdb_foldmason } FOLDMASON_EASYMSA ( ch_pdb_foldmason.pdbs, + ch_pdb_foldmason.trees, compress ) ch_msa = ch_msa.mix(FOLDMASON_EASYMSA.out.msa_aa) ch_versions = ch_versions.mix(FOLDMASON_EASYMSA.out.versions.first()) } + + // ----------------- CONSENSUS ------------------ if(params.build_consensus){ ch_msa.map{ meta, msa -> [ meta["id"], msa]} .groupTuple() + .filter{ it[1].size() > 1 } .map{ id_meta, msas -> [ ["id": id_meta, "tree":"", "args_tree":"", "args_tree_clean":null, "aligner":"CONSENSUS", "args_aligner":"", "args_aligner_clean":null ], msas ]} .set{ ch_msa_consensus } - + CONSENSUS(ch_msa_consensus, [[:],[]], compress) ch_msa = ch_msa.mix(CONSENSUS.out.alignment) ch_versions = ch_versions.mix(CONSENSUS.out.versions.first()) @@ -354,5 +393,6 @@ workflow ALIGN { emit: msa = ch_msa // channel: [ val(meta), path(msa) ] + trees = trees // channel: [ val(meta), path(tree) ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/compute_trees.nf b/subworkflows/local/compute_trees.nf index edd497a8..406b91c4 100644 --- a/subworkflows/local/compute_trees.nf +++ b/subworkflows/local/compute_trees.nf @@ -6,19 +6,47 @@ include { FAMSA_GUIDETREE } from '../../modules/nf-core/famsa/guidetree/main' include { CLUSTALO_GUIDETREE } from '../../modules/nf-core/clustalo/guidetree/main' include { MAFFT_GUIDETREE } from '../../modules/nf-core/mafft/guidetree/main' +include { CUSTOM_PDBSTOFASTA } from '../../modules/local/custom_pdbtofasta.nf' +include { FASTAVALIDATOR } from '../../modules/nf-core/fastavalidator/main' + workflow COMPUTE_TREES { take: - ch_fastas //channel: [ meta, /path/to/file.fasta ] - tree_tools //channel: [ meta ] ( tools to be run: meta.tree, meta.args_tree ) + ch_fastas //channel: [ meta, /path/to/file.fasta ] + ch_optional_data //channel: [ meta, template, [ /path/to/file1, /path/to/file2, ... ] ] + tree_tools //channel: [ meta ] ( tools to be run: meta.tree, meta.args_tree ) main: ch_versions = Channel.empty() + ch_trees = Channel.empty() + + // + // For the inputs that only have optional data but not a fasta + // we need to generate the fasta file + // + ch_optional_data + .join(ch_fastas, remainder:true) + .filter{ + it[-1] == null + } + .map{ + it -> [it[0], it[2]] + }.set { ch_optional_data_no_fasta } + + CUSTOM_PDBSTOFASTA(ch_optional_data_no_fasta) + + if(!params.skip_preprocessing){ + FASTAVALIDATOR(CUSTOM_PDBSTOFASTA.out.fasta) + ch_versions = ch_versions.mix(FASTAVALIDATOR.out.versions) + } + ch_fastas_all = ch_fastas.mix(CUSTOM_PDBSTOFASTA.out.fasta) + + // // Render the required guide trees // - ch_fastas + ch_fastas_all .combine(tree_tools) .map { metafasta, fasta, metatree -> @@ -30,6 +58,7 @@ workflow COMPUTE_TREES { mafft: it[0]["tree"] == "MAFFT" } .set { ch_fastas_fortrees } + FAMSA_GUIDETREE (ch_fastas_fortrees.famsa) ch_trees = FAMSA_GUIDETREE.out.tree diff --git a/subworkflows/local/preprocess.nf b/subworkflows/local/preprocess.nf new file mode 100644 index 00000000..ef17950f --- /dev/null +++ b/subworkflows/local/preprocess.nf @@ -0,0 +1,26 @@ + +include { TCOFFEE_EXTRACTFROMPDB } from '../../modules/nf-core/tcoffee/extractfrompdb/main' + +workflow PREPROCESS { + take: + ch_optional_data //channel: [ meta, [file1, ] ] + + main: + + ch_versions = Channel.empty() + ch_preprocessed_data = Channel.empty() + + if(params.templates_suffix == ".pdb"){ + // If the optional data is a pdb file, we can preprocess them to make + // them compatible with all the alignment tools + TCOFFEE_EXTRACTFROMPDB(ch_optional_data.transpose()) + TCOFFEE_EXTRACTFROMPDB.out.formatted_pdb + .groupTuple() + .set { ch_preprocessed_data } + } + + emit: + preprocessed_optionaldata = ch_preprocessed_data + versions = ch_versions + +} \ No newline at end of file diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index d3dd64e4..4c41253f 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -81,7 +81,7 @@ workflow PIPELINE_INITIALISATION { align_map["aligner"] = meta_clone["aligner"] align_map["args_aligner"] = Utils.check_required_args(meta_clone["aligner"], meta_clone["args_aligner"]) - align_map["args_aligner_clean"] = Utils.cleanArgs(align_map["args_aligner"]) + align_map["args_aligner_clean"] = Utils.cleanArgs(meta_clone.args_aligner) [ tree_map, align_map ] }.unique() @@ -357,12 +357,21 @@ class Utils { // if clearnArgs is empty, return "" if (cleanArgs == null || cleanArgs == "") { - return "" + return "default" }else{ return cleanArgs } } + public static clean_tree(argsTree){ + + def tree = argsTree.toString() + if(tree == null || tree == "" || tree == "null"){ + return "DEFAULT" + } + return tree + } + public static fix_args(tool,args,tool_to_be_checked, required_flag, default_value) { /* This function checks if the required_flag is present in the args string for the tool_to_be_checked. diff --git a/subworkflows/local/visualization.nf b/subworkflows/local/visualization.nf new file mode 100644 index 00000000..c2281650 --- /dev/null +++ b/subworkflows/local/visualization.nf @@ -0,0 +1,64 @@ +include {FOLDMASON_CREATEDB } from '../../modules/nf-core/foldmason/createdb/main' +include {FOLDMASON_MSA2LDDTREPORT } from '../../modules/nf-core/foldmason/msa2lddtreport/main' + +workflow VISUALIZATION { + + take: + ch_msa // channel: [ meta, /path/to/file.* ] + ch_trees // channel: [ meta, /path/to/file.* ] + ch_optional_data // channel: [ meta, /path/to/file.* ] + + main: + + ch_versions = Channel.empty() + ch_html = Channel.empty() + + + ch_msa.view() + // Merge the msa and tree + // split the msa meta to be able to merge with the tree meta + ch_msa + .map{ + meta, file -> [meta.subMap(["id", "tree", "args_tree", "args_tree_clean"]), meta, file] + } + .join(ch_trees, by: [0], remainder:true ) + .map{ + tree_meta, meta, msa, tree -> [meta.subMap(["id"]), meta, msa, tree] + } + .combine( ch_optional_data, by: [0]) + .set{ ch_msa_tree_data } + + // + // FOLDMASON VISUALISATION + // + + FOLDMASON_CREATEDB( + ch_optional_data + ) + + ch_msa_tree_data + .combine(FOLDMASON_CREATEDB.out.db.collect(), by:0) + .multiMap{ + id, meta, msafile, treefile, pdb, dbfiles -> + msa: [meta, msafile] + db: [id , dbfiles] + pdbs: [id , pdb] + tree: [meta, treefile == null ? [] : treefile] + }.set{ + ch_msa_db_tree + } + + FOLDMASON_MSA2LDDTREPORT( + ch_msa_db_tree.msa, + ch_msa_db_tree.db, + ch_msa_db_tree.pdbs, + [[:],[]] + ) + + ch_html = FOLDMASON_MSA2LDDTREPORT.out.html + + emit: + html = ch_html + versions = ch_versions + +} diff --git a/test.csv b/test.csv new file mode 100644 index 00000000..32b52aab --- /dev/null +++ b/test.csv @@ -0,0 +1,3 @@ +id,fasta,length +1,seq1,100 +2,seq2,200 diff --git a/test_merged.csv b/test_merged.csv new file mode 100644 index 00000000..5859d151 --- /dev/null +++ b/test_merged.csv @@ -0,0 +1 @@ +[id,seqlength_mean,seqlength_median,seqlength_max,n_sequences,perc_sim,tree,args_tree,args_tree_clean,aligner,args_aligner,args_aligner_clean,total_gaps,avg_gaps,tc,sp,TCS, seatoxin-ref,47.0,48.0,49,5,46.20,CLUSTALO,,,REGRESSIVE,-reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln,-reg_-reg_method_famsa_msa_-reg_nseq_1000_-output_fasta_aln,20,4,46.9,81.0,835, seatoxin-ref,47.0,48.0,49,5,46.20,,,,MAFFT,,,35,7,59.2,86.3,811, seatoxin-ref,47.0,48.0,49,5,46.20,FAMSA,-gt upgma -medoidtree,-gt_upgma_-medoidtree,FAMSA,,,20,4,46.9,81.0,835, seatoxin-ref,47.0,48.0,49,5,46.20,,,,FAMSA,,,20,4,46.9,81.0,835, seatoxin-ref,47.0,48.0,49,5,46.20,,,,TCOFFEE,-output fasta_aln,-output_fasta_aln,20,4,51.0,81.9,827, seatoxin-ref,47.0,48.0,49,5,46.20,,,,CLUSTALO,,,20,4,51.0,81.9,835, seatoxin-ref,47.0,48.0,49,5,46.20,,,,MAGUS,,,35,7,55.1,85.4,813, seatoxin-ref,47.0,48.0,49,5,46.20,,,,REGRESSIVE,-reg_nseq 3 -reg -reg_method famsa_msa -output fasta_aln,-reg_nseq_3_-reg_-reg_method_famsa_msa_-output_fasta_aln,20,4,46.9,81.7,834, seatoxin-ref,47.0,48.0,49,5,46.20,,,null,CONSENSUS,,null,35,7,59.2,85.4,819, toxin-ref,63.5,61.0,74,20,44.45,CLUSTALO,,,REGRESSIVE,-reg -reg_method famsa_msa -reg_nseq 1000 -output fasta_aln,-reg_-reg_method_famsa_msa_-reg_nseq_1000_-output_fasta_aln,330,16.5,61.0,89.3,802, toxin-ref,63.5,61.0,74,20,44.45,FAMSA,-gt upgma -medoidtree,-gt_upgma_-medoidtree,FAMSA,,,310,15.5,51.9,89.2,810, toxin-ref,63.5,61.0,74,20,44.45,,,,CLUSTALO,,,290,14.5,64.9,92.2,801, toxin-ref,63.5,61.0,74,20,44.45,,,,MAGUS,,,310,15.5,61.0,94.8,814, toxin-ref,63.5,61.0,74,20,44.45,,,,MAFFT,,,310,15.5,54.5,89.4,796, toxin-ref,63.5,61.0,74,20,44.45,,,,FAMSA,,,330,16.5,61.0,89.3,802, toxin-ref,63.5,61.0,74,20,44.45,,,,TCOFFEE,-output fasta_aln,-output_fasta_aln,310,15.5,54.5,92.5,826, toxin-ref,63.5,61.0,74,20,44.45,,,,REGRESSIVE,-reg_nseq 3 -reg -reg_method famsa_msa -output fasta_aln,-reg_nseq_3_-reg_-reg_method_famsa_msa_-output_fasta_aln,290,14.5,54.5,92.5,801, toxin-ref,63.5,61.0,74,20,44.45,,,,MUSCLE5,,,310,15.5,63.6,93.8,815, toxin-ref,63.5,61.0,74,20,44.45,,,null,CONSENSUS,,null,290,14.5,62.3,95.0,815] \ No newline at end of file diff --git a/test_merging.groovy b/test_merging.groovy new file mode 100644 index 00000000..5066bb6d --- /dev/null +++ b/test_merging.groovy @@ -0,0 +1,129 @@ +@Grab('com.xlson.groovycsv:groovycsv:1.3') +import static com.xlson.groovycsv.CsvParser.parseCsv + +def cleanTrace(trace) { + // Convert each row into a mutable map for dynamic property addition + def cleanedTrace = trace.collect { row -> + def mutableRow = row.toMap() + + // Extract the tag from the 'name' column using a regex pattern + def tagMatch = (mutableRow.name =~ /\((.*)\)/) + mutableRow.tag = tagMatch ? tagMatch[0][1] : null + + // Extract 'id' and 'args' from the tag safely + mutableRow.id = mutableRow.tag?.tokenize(' ')?.first() + mutableRow.args = mutableRow.tag?.split("args:")?.with { it.size() > 1 ? it[1].trim() : null } + + // Process the 'full_name' to extract workflow and process details + mutableRow.full_name = mutableRow.name.split(/\(/)?.first()?.trim() + def nameParts = mutableRow.full_name?.tokenize(':') ?: [] + mutableRow.process = nameParts ? nameParts.last() : null + mutableRow.subworkflow = nameParts.size() > 1 ? nameParts[-2] : null + + // Replace "null" strings with actual null values + mutableRow.each { key, value -> + if (value == 'null') { + mutableRow[key] = null + } + } + + return mutableRow + } + + // Return the cleaned trace + return cleanedTrace.findAll { it != null } +} + +// Utility function to convert time strings to minutes +def convertTime(String timeStr) { + def pattern = /((?\d+(\.\d+)?)h)?\s*((?\d+(\.\d+)?)m)?\s*((?\d+(\.\d+)?)s)?\s*((?\d+(\.\d+)?)ms)?/ + def matcher = timeStr.trim() =~ pattern + + if (!matcher.matches()) { + throw new IllegalArgumentException("Time string is not in the correct format: $timeStr") + } + + def hours = matcher.group('hours')?.toDouble() ?: 0.0 + def minutes = matcher.group('minutes')?.toDouble() ?: 0.0 + def seconds = matcher.group('seconds')?.toDouble() ?: 0.0 + def milliseconds = matcher.group('milliseconds')?.toDouble() ?: 0.0 + + return (hours * 60) + minutes + (seconds / 60) + (milliseconds / 60000) +} + +// Utility function to convert memory to GB +def convertMemory(String memory) { + if (!memory) return null + + if (memory.contains("GB")) { + return memory.replace("GB", "").toDouble() + } else if (memory.contains("MB")) { + return memory.replace("MB", "").toDouble() / 1000 + } else if (memory.contains("KB")) { + return memory.replace("KB", "").toDouble() / 1000000 + } + return null +} + +// Prepare trace trees +def prepTreeTrace(trace) { + def traceTrees = trace.findAll { it.subworkflow == "COMPUTE_TREES" } + traceTrees.each { row -> + row.args_tree = row.args + row.tree = row.process.replace("_GUIDETREE", "") + row.time_tree = convertTime(row.realtime) + row.memory_tree = convertMemory(row.rss) + row.cpus_tree = row.cpus + } + return traceTrees +} + +// Prepare align traces +def prepAlignTrace(trace) { + def traceAlign = trace.findAll { it.subworkflow == "ALIGN" } + traceAlign.each { row -> + row.args_aligner = row.args + row.aligner = row.process.replace("_ALIGN", "") + row.time_align = convertTime(row.realtime) + row.memory_align = convertMemory(row.rss) + row.cpus_align = row.cpus + } + return traceAlign +} + +def merge_summary_and_traces(summary_file, trace_dir_path, outFileName){ + + // Read the summary file with the scientific evaluation + def data = new File(summary_file).readLines() + + // Identify and parse the latest trace file + def trace_file = new File("${trace_dir_path}").listFiles().findAll { it.name.startsWith("execution_trace") }.sort { -it.lastModified() }.take(1)[0] + + // Keep only the lines that report running times related to evaluation + def header = trace_file.readLines()[0].replaceAll("\t", ",") + def trace_file_align = trace_file.readLines().findAll { it.contains("CACHED") && it.contains("MULTIPLESEQUENCEALIGN:ALIGN") }.collect { it.replaceAll("\t", ",") }.join("\n") + def trace = header + "\n" + trace_file_align + def trace_csv = parseCsv(trace) + + def cleanTraceData = cleanTrace(trace_csv) + def traceTrees = prepTreeTrace(cleanTraceData) + def traceAlign = prepAlignTrace(cleanTraceData) + + def mergedData = [] + data.each { row -> + def treeMatch = traceTrees.find { it.id == row.id && it.tree == row.tree && it.args_tree == row.args_tree } + def alignMatch = traceAlign.find { it.id == row.id && it.aligner == row.aligner && it.args_aligner == row.args_aligner } + def mergedRow = row + (treeMatch ?: [:]) + (alignMatch ?: [:]) + mergedData << mergedRow + } + new File(outFileName).withWriter { writer -> writer.write(mergedData as String) } + +} + +outdir = "/home/luisasantus/Desktop/multiplesequencealign/results" + +def summary_file = "${outdir}/summary/complete_summary_stats_eval.csv" +def outFileName = "${outdir}/../test_merged.csv" +def trace_dir_path = "${outdir}/pipeline_info/" + +merge_summary_and_traces(summary_file, trace_dir_path, outFileName) \ No newline at end of file diff --git a/udo systemctl enable docker b/udo systemctl enable docker new file mode 100644 index 00000000..476b9dc8 --- /dev/null +++ b/udo systemctl enable docker @@ -0,0 +1,21 @@ +● docker.service - Docker Application Container Engine + Loaded: loaded (/lib/systemd/system/docker.service; enabled; vendor preset: enabled) + Active: active (running) since Thu 2024-12-05 10:54:31 CET; 50s ago +TriggeredBy: ● docker.socket + Docs: https://docs.docker.com + Main PID: 547336 (dockerd) + Tasks: 12 + Memory: 29.6M + CGroup: /system.slice/docker.service + └─547336 /usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock + +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR systemd[1]: Starting Docker Application Container Engine... +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.616479842+01:00" level=info msg="Starting up" +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.617217460+01:00" level=info msg="detected 127.0.0.53 nameserver, assuming systemd-resolved, so using resolv.conf: /run/systemd/resolve/resolv.conf" +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.688746687+01:00" level=info msg="Loading containers: start." +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.755640983+01:00" level=info msg="Default bridge (docker0) is assigned with an IP address 10.220.0.0/24. Daemon option --bip can be used to set a preferred IP address" +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.788831417+01:00" level=info msg="Loading containers: done." +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.811014156+01:00" level=info msg="Docker daemon" commit=41ca978 containerd-snapshotter=false storage-driver=overlay2 version=27.3.1 +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.811239366+01:00" level=info msg="Daemon has completed initialization" +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR dockerd[547336]: time="2024-12-05T10:54:31.859048276+01:00" level=info msg="API listen on /run/docker.sock" +dic 05 10:54:31 luisasantus-HP-EliteDesk-800-G5-TWR systemd[1]: Started Docker Application Container Engine. diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index 3c42ec16..af8fb240 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -22,6 +22,9 @@ include { STATS } from '../subworkflows/local/stats' include { ALIGN } from '../subworkflows/local/align' include { EVALUATE } from '../subworkflows/local/evaluate' include { TEMPLATES } from '../subworkflows/local/templates' +include { PREPROCESS } from '../subworkflows/local/preprocess' +include { VISUALIZATION } from '../subworkflows/local/visualization' + // // MODULE: local modules @@ -41,6 +44,7 @@ include { PREPARE_SHINY } from '../modules/local/prepare_shiny' include { UNTAR } from '../modules/nf-core/untar/main' include { CSVTK_JOIN as MERGE_STATS_EVAL } from '../modules/nf-core/csvtk/join/main.nf' include { PIGZ_COMPRESS } from '../modules/nf-core/pigz/compress/main' +include { FASTAVALIDATOR } from '../modules/nf-core/fastavalidator/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -63,15 +67,16 @@ workflow MULTIPLESEQUENCEALIGN { ch_shiny_stats = Channel.empty() ch_refs = Channel.empty() ch_templates = Channel.empty() - ch_optional_data = Channel.empty() + ch_optional_data = Channel.empty() ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() ch_input + .filter { it[1].size() > 0} .map { meta, fasta, ref, str, template -> - [ meta, file(fasta) ] + [ meta, file(fasta) ] } .set { ch_seqs } @@ -166,6 +171,20 @@ workflow MULTIPLESEQUENCEALIGN { ch_versions = ch_versions.mix(UNTAR.out.versions) } + // + // VALIDATE AND PREPROCESS INPUT FILES + // + + FASTAVALIDATOR(ch_seqs) + ch_versions = ch_versions.mix(FASTAVALIDATOR.out.versions) + + if(!params.skip_preprocessing){ + PREPROCESS(ch_optional_data) + ch_optional_data = PREPROCESS.out.preprocessed_optionaldata + ch_versions = ch_versions.mix(PREPROCESS.out.versions) + } + + // // TEMPLATES // @@ -242,6 +261,15 @@ workflow MULTIPLESEQUENCEALIGN { ch_versions = ch_versions.mix(PREPARE_SHINY.out.versions) } + + if (!params.skip_visualisation) { + VISUALIZATION ( + ALIGN.out.msa, + ALIGN.out.trees, + ch_optional_data + ) + } + softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info",