From 5408656706c8747f4af31f7b7fdaf62a4483f12a Mon Sep 17 00:00:00 2001
From: luisas <luisa.santus95@gmail.com>
Date: Thu, 14 Sep 2023 15:45:40 +0200
Subject: [PATCH] Integrate nf-validation plugin

---
 assets/samplesheet.csv             |   2 +-
 assets/schema_input.json           |   1 +
 assets/schema_tools.json           |  10 +-
 bin/check_samplesheet.py           | 231 --------------------------
 bin/check_toolsheet.py             | 251 -----------------------------
 conf/modules.config                |  14 +-
 main.nf                            |   2 +-
 modules.json                       |  42 ++---
 modules/local/samplesheet_check.nf |  34 ----
 modules/local/toolsheet_check.nf   |  35 ----
 nextflow.config                    |   2 +-
 subworkflows/local/align.nf        |   5 +-
 subworkflows/local/input_check.nf  | 117 --------------
 workflows/multiplesequencealign.nf |  54 ++++---
 14 files changed, 63 insertions(+), 737 deletions(-)
 delete mode 100755 bin/check_samplesheet.py
 delete mode 100755 bin/check_toolsheet.py
 delete mode 100644 modules/local/samplesheet_check.nf
 delete mode 100644 modules/local/toolsheet_check.nf
 delete mode 100644 subworkflows/local/input_check.nf

diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv
index 1b7a2dd9..206caf6e 100644
--- a/assets/samplesheet.csv
+++ b/assets/samplesheet.csv
@@ -1,3 +1,3 @@
 id,fasta,reference,structures
-seatoxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref,
+seatoxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref,test-dataset/structures/setoxin-ref/
 toxin-ref,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin-ref.fa,https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/toxin.ref,
diff --git a/assets/schema_input.json b/assets/schema_input.json
index a17eca13..3ebb3c25 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -10,6 +10,7 @@
             "id": {
                 "type": "string",
                 "pattern": "^\\S+$",
+                "meta": ["id"],
                 "errorMessage": "id/sample name must be provided and cannot contain spaces"
             },
             "fasta": {
diff --git a/assets/schema_tools.json b/assets/schema_tools.json
index 6c85028f..cd33e7fa 100644
--- a/assets/schema_tools.json
+++ b/assets/schema_tools.json
@@ -9,17 +9,21 @@
         "properties": {
             "tree": {
                 "type": "string",
-                "errorMessage": "tree name cannot contain spaces"
+                "errorMessage": "tree name cannot contain spaces",
+                "meta": ["tree"]
             },
             "args_tree": {
-                "type": "string"
+                "type": "string",
+                "meta": ["args_tree"]
             },
             "align": {
                 "type": "string",
+                "meta": ["align"],
                 "errorMessage": "align name must be provided and cannot contain spaces"
             },
             "args_align": {
-                "type": "string"
+                "type": "string",
+                "meta": ["args_align"]
             }
         },
         "required": ["align"]
diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
deleted file mode 100755
index ddea8011..00000000
--- a/bin/check_samplesheet.py
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env python
-
-
-"""Provide a command line tool to validate and transform tabular samplesheets."""
-
-
-import argparse
-import csv
-import logging
-import sys
-from collections import Counter
-from pathlib import Path
-
-logger = logging.getLogger()
-
-
-class RowChecker:
-    """
-    Define a service that can validate and transform each given row.
-
-    Attributes:
-        modified (list): A list of dicts, where each dict corresponds to a previously
-            validated and transformed row. The order of rows is maintained.
-
-    """
-
-    VALID_FORMATS = (".fa", ".fasta")
-
-    def __init__(
-        self,
-        id_col="id",
-        fasta_col="fasta",
-        reference_col="reference",
-        structures_col="structures",
-        **kwargs,
-    ):
-        """
-        Initialize the row checker with the expected column names.
-
-        Args:
-            id_col (str): The name of the column that contains the id name
-                (default "id").
-            fasta_col (str): The name of the column that contains the fasta file
-                 path (default "fasta").
-
-        """
-        super().__init__(**kwargs)
-        self._id_col = id_col
-        self._fasta_col = fasta_col
-        self._reference_col = reference_col
-        self._structures_col = structures_col
-        self._seen = set()
-        self.modified = []
-
-    def validate_and_transform(self, row):
-        """
-        Perform all validations on the given row and insert the read pairing status.
-
-        Args:
-            row (dict): A mapping from column headers (keys) to elements of that row
-                (values).
-
-        """
-        self._validate_id(row)
-        self._validate_fasta(row)
-        self._validate_structures(row)
-        self._seen.add((row[self._id_col], row[self._fasta_col]))
-        self.modified.append(row)
-
-    def _validate_id(self, row):
-        """Assert that the id name exists and convert spaces to underscores."""
-        if len(row[self._id_col]) <= 0:
-            raise AssertionError("Sample input is required.")
-        # Sanitize samples slightly.
-        row[self._id_col] = row[self._id_col].replace(" ", "_")
-
-    def _validate_fasta(self, row):
-        """Assert that the fasta entry is non-empty and has the right format."""
-        if len(row[self._fasta_col]) <= 0:
-            raise AssertionError("At least the first FASTQ file is required.")
-        self._validate_fasta_format(row[self._fasta_col])
-
-    def _validate_fasta_format(self, filename):
-        """Assert that a given filename has one of the expected fasta extensions."""
-        if not any(filename.endswith(extension) for extension in self.VALID_FORMATS):
-            raise AssertionError(
-                f"The fasta file has an unrecognized extension: {filename}\n"
-                f"It should be one of: {', '.join(self.VALID_FORMATS)}"
-            )
-
-    def _validate_structures(self, row):
-        """Assert that the structures entry is non-empty and has the right format."""
-        if len(row[self._structures_col]) <= 0:
-            row[self._structures_col] = "none"
-
-    def validate_unique_samples(self):
-        """
-        Assert that the combination of id name and fasta filename is unique.
-
-        """
-        if len(self._seen) != len(self.modified):
-            raise AssertionError("The pair of sample name and fasta must be unique.")
-        seen = Counter()
-        for row in self.modified:
-            id = row[self._id_col]
-            seen[id] += 1
-
-
-def read_head(handle, num_lines=10):
-    """Read the specified number of lines from the current position in the file."""
-    lines = []
-    for idx, line in enumerate(handle):
-        if idx == num_lines:
-            break
-        lines.append(line)
-    return "".join(lines)
-
-
-def sniff_format(handle):
-    """
-    Detect the tabular format.
-
-    Args:
-        handle (text file): A handle to a `text file`_ object. The read position is
-        expected to be at the beginning (index 0).
-
-    Returns:
-        csv.Dialect: The detected tabular format.
-
-    .. _text file:
-        https://docs.python.org/3/glossary.html#term-text-file
-
-    """
-    peek = read_head(handle)
-    handle.seek(0)
-    sniffer = csv.Sniffer()
-    dialect = sniffer.sniff(peek)
-    return dialect
-
-
-def check_samplesheet(file_in, file_out):
-    """
-    Check that the tabular samplesheet has the structure expected by nf-core pipelines.
-
-    Validate the general shape of the table, expected columns, and each row. Also add
-    an additional column which records whether one or two FASTQ reads were found.
-
-    Args:
-        file_in (pathlib.Path): The given tabular samplesheet. The format can be either
-            CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``.
-        file_out (pathlib.Path): Where the validated and transformed samplesheet should
-            be created; always in CSV format.
-
-    Example:
-        This function checks that the samplesheet follows the following structure,
-        see also the `viral recon samplesheet`_::
-
-                id,fasta
-                seatoxin-ref,./testdata/seatoxin-ref.fa
-                toxin-ref,./testdata/toxin-ref.fa
-
-
-    """
-    required_columns = {"id", "fasta"}
-    # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`.
-    with file_in.open(newline="") as in_handle:
-        reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle))
-        # Validate the existence of the expected header columns.
-        if not required_columns.issubset(reader.fieldnames):
-            req_cols = ", ".join(required_columns)
-            logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.")
-            sys.exit(1)
-        # Validate each row.
-        checker = RowChecker()
-        for i, row in enumerate(reader):
-            try:
-                checker.validate_and_transform(row)
-            except AssertionError as error:
-                logger.critical(f"{str(error)} On line {i + 2}.")
-                sys.exit(1)
-        checker.validate_unique_samples()
-    header = list(reader.fieldnames)
-    # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`.
-    with file_out.open(mode="w", newline="") as out_handle:
-        writer = csv.DictWriter(out_handle, header, delimiter=",")
-        writer.writeheader()
-        for row in checker.modified:
-            writer.writerow(row)
-
-
-def parse_args(argv=None):
-    """Define and immediately parse command line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Validate and transform a tabular samplesheet.",
-        epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv",
-    )
-    parser.add_argument(
-        "file_in",
-        metavar="FILE_IN",
-        type=Path,
-        help="Tabular input samplesheet in CSV or TSV format.",
-    )
-    parser.add_argument(
-        "file_out",
-        metavar="FILE_OUT",
-        type=Path,
-        help="Transformed output samplesheet in CSV format.",
-    )
-    parser.add_argument(
-        "-l",
-        "--log-level",
-        help="The desired log level (default WARNING).",
-        choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"),
-        default="WARNING",
-    )
-    return parser.parse_args(argv)
-
-
-def main(argv=None):
-    """Coordinate argument parsing and program execution."""
-    args = parse_args(argv)
-    logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s")
-    if not args.file_in.is_file():
-        logger.error(f"The given input file {args.file_in} was not found!")
-        sys.exit(2)
-    args.file_out.parent.mkdir(parents=True, exist_ok=True)
-    check_samplesheet(args.file_in, args.file_out)
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/bin/check_toolsheet.py b/bin/check_toolsheet.py
deleted file mode 100755
index 8336e253..00000000
--- a/bin/check_toolsheet.py
+++ /dev/null
@@ -1,251 +0,0 @@
-#!/usr/bin/env python
-
-
-"""Provide a command line tool to validate and transform tabular samplesheets."""
-
-
-import argparse
-import csv
-import logging
-import sys
-from collections import Counter
-from pathlib import Path
-
-logger = logging.getLogger()
-
-
-def cleanargs(argstring):
-    cleanargs = argstring.strip().replace("-", "").replace(" ", "_").replace("==", "_").replace("\s+", "")
-
-    return cleanargs
-
-
-class RowChecker:
-    """
-    Define a service that can validate and transform each given row.
-
-    Attributes:
-        modified (list): A list of dicts, where each dict corresponds to a previously
-            validated and transformed row. The order of rows is maintained.
-
-    """
-
-    def __init__(
-        self,
-        tree_col="tree",
-        argstree_col="args_tree",
-        argstree_clean_col="argstree_clean",
-        align_col="align",
-        argsalign_col="args_align",
-        argsalign_clean_col="argsalign_clean",
-        **kwargs,
-    ):
-        """
-        Initialize the row checker with the expected column names.
-
-        Args:
-            family_col (str): The name of the column that contains the family name
-                (default "family").
-            fasta_col (str): The name of the column that contains the fasta file
-                 path (default "fasta").
-
-        """
-        super().__init__(**kwargs)
-        self._tree_col = tree_col
-        self._argstree_col = argstree_col
-        self._argstree_clean_col = argstree_clean_col
-        self._align_col = align_col
-        self._argsalign_col = argsalign_col
-        self._argsalign_clean_col = argsalign_clean_col
-        self._seen = set()
-        self.modified = []
-
-    def validate_and_transform(self, row):
-        """
-        Perform all validations on the given row and insert the read pairing status.
-
-        Args:
-            row (dict): A mapping from column headers (keys) to elements of that row
-                (values).
-
-        """
-        self._validate_align(row)
-        self._validate_tree(row)
-        self._validate_argstree(row)
-        self._validate_argsalign(row)
-        self._seen.add(
-            (
-                row[self._tree_col],
-                row[self._argstree_col],
-                row[self._align_col],
-                row[self._argsalign_col],
-                row[self._argstree_clean_col],
-            )
-        )
-        print(row)
-        self.modified.append(row)
-
-    def _validate_tree(self, row):
-        """Assert that the family name exists and convert spaces to underscores."""
-        if len(row[self._tree_col]) <= 0:
-            row[self._tree_col] = "none"
-        # Sanitize samples slightly.
-        row[self._tree_col] = row[self._tree_col]
-
-    def _validate_argstree(self, row):
-        if len(row[self._argstree_col]) <= 0:
-            row[self._argstree_col] = "none"
-            row[self._argstree_clean_col] = "none"
-        # Sanitize samples slightly.
-        row[self._argstree_col] = row[self._argstree_col]
-        row[self._argstree_clean_col] = cleanargs(row[self._argstree_col])
-
-    def _validate_align(self, row):
-        if len(row[self._align_col]) <= 0:
-            raise AssertionError("alignment tool is required.")
-        # Sanitize samples slightly.
-        row[self._align_col] = row[self._align_col]
-        row[self._argsalign_clean_col] = cleanargs(row[self._argsalign_col])
-
-    def _validate_argsalign(self, row):
-        if len(row[self._argsalign_col]) <= 0:
-            row[self._argsalign_col] = "none"
-            row[self._argsalign_clean_col] = "none"
-        # Sanitize samples slightly.
-        row[self._argsalign_col] = row[self._argsalign_col]
-
-    def validate_unique_samples(self):
-        """
-        Assert that the combination of family name and fasta filename is unique.
-
-        """
-        if len(self._seen) != len(self.modified):
-            raise AssertionError("The pair of sample name and fasta must be unique.")
-        seen = Counter()
-        for row in self.modified:
-            entry = row[self._tree_col] + row[self._argstree_col] + row[self._align_col] + row[self._argsalign_col]
-            seen[entry] += 1
-
-
-def read_head(handle, num_lines=10):
-    """Read the specified number of lines from the current position in the file."""
-    lines = []
-    for idx, line in enumerate(handle):
-        if idx == num_lines:
-            break
-        lines.append(line)
-    return "".join(lines)
-
-
-def sniff_format(handle):
-    """
-    Detect the tabular format.
-
-    Args:
-        handle (text file): A handle to a `text file`_ object. The read position is
-        expected to be at the beginning (index 0).
-
-    Returns:
-        csv.Dialect: The detected tabular format.
-
-    .. _text file:
-        https://docs.python.org/3/glossary.html#term-text-file
-
-    """
-    peek = read_head(handle)
-    handle.seek(0)
-    sniffer = csv.Sniffer()
-    dialect = sniffer.sniff(peek)
-    return dialect
-
-
-def check_samplesheet(file_in, file_out):
-    """
-    Check that the tabular samplesheet has the structure expected by nf-core pipelines.
-
-    Validate the general shape of the table, expected columns, and each row. Also add
-    an additional column which records whether one or two FASTQ reads were found.
-
-    Args:
-        file_in (pathlib.Path): The given tabular samplesheet. The format can be either
-            CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``.
-        file_out (pathlib.Path): Where the validated and transformed samplesheet should
-            be created; always in CSV format.
-
-    Example:
-
-
-
-    """
-    required_columns = {"align"}
-    # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`.
-    with file_in.open(newline="") as in_handle:
-        reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle))
-        # Remove white spaces from whole file, even after commas.
-        # Validate the existence of the expected header columns.
-        if not required_columns.issubset(reader.fieldnames):
-            req_cols = ", ".join(required_columns)
-            logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.")
-            sys.exit(1)
-        # Validate each row.
-        checker = RowChecker()
-        for i, row in enumerate(reader):
-            try:
-                checker.validate_and_transform(row)
-            except AssertionError as error:
-                logger.critical(f"{str(error)} On line {i + 2}.")
-                sys.exit(1)
-        checker.validate_unique_samples()
-    header = list(reader.fieldnames)
-    header.append("argstree_clean")
-    header.append("argsalign_clean")
-
-    # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`.
-    with file_out.open(mode="w", newline="") as out_handle:
-        writer = csv.DictWriter(out_handle, header, delimiter=",")
-        writer.writeheader()
-        for row in checker.modified:
-            writer.writerow(row)
-
-
-def parse_args(argv=None):
-    """Define and immediately parse command line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Validate and transform a tabular samplesheet.",
-        epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv",
-    )
-    parser.add_argument(
-        "file_in",
-        metavar="FILE_IN",
-        type=Path,
-        help="Tabular input samplesheet in CSV or TSV format.",
-    )
-    parser.add_argument(
-        "file_out",
-        metavar="FILE_OUT",
-        type=Path,
-        help="Transformed output samplesheet in CSV format.",
-    )
-    parser.add_argument(
-        "-l",
-        "--log-level",
-        help="The desired log level (default WARNING).",
-        choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"),
-        default="WARNING",
-    )
-    return parser.parse_args(argv)
-
-
-def main(argv=None):
-    """Coordinate argument parsing and program execution."""
-    args = parse_args(argv)
-    logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s")
-    if not args.file_in.is_file():
-        logger.error(f"The given input file {args.file_in} was not found!")
-        sys.exit(2)
-    args.file_out.parent.mkdir(parents=True, exist_ok=True)
-    check_samplesheet(args.file_in, args.file_out)
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/conf/modules.config b/conf/modules.config
index d3b935ce..1a8c5ec3 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -18,17 +18,9 @@ process {
         saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
     ]
 
-    withName: SAMPLESHEET_CHECK {
-        publishDir = [
-            path: { "${params.outdir}/pipeline_info" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
-
     withName: "FAMSA_GUIDETREE"{
         ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}" }
-        ext.args = { "${meta.args_tree}" == 'none' ? '' : "${meta.args_tree}" }
+        ext.args = { "${meta.args_tree}" == "null" ? '' : "${meta.args_tree}" }
         publishDir = [
             path: { "${params.outdir}/trees/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
             mode: params.publish_dir_mode,
@@ -38,7 +30,7 @@ process {
 
     withName: "CLUSTALO_GUIDETREE"{
         ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}" }
-        ext.args   = { "${meta.args_tree}" == 'none' ? '' : "${meta.args_tree}" }
+        ext.args   = { "${meta.args_tree}" == "null" ? '' : "${meta.args_tree}" }
         publishDir = [
             path: { "${params.outdir}/trees/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
             mode: params.publish_dir_mode,
@@ -48,7 +40,7 @@ process {
 
     withName: ".*ALIGN"{
         ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.align}-args-${meta.argsalign_clean}" }
-        ext.args = { "${meta.args_align}" == 'none' ? '' : "${meta.args_align}" }
+        ext.args = { "${meta.args_align}" == "null" ? '' : "${meta.args_align}" }
         publishDir = [
             path: { "${params.outdir}/alignment/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
             mode: params.publish_dir_mode,
diff --git a/main.nf b/main.nf
index 65949d47..3bdf7b6d 100644
--- a/main.nf
+++ b/main.nf
@@ -17,7 +17,7 @@ nextflow.enable.dsl = 2
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { validateParameters; paramsHelp } from 'plugin/nf-validation'
+include { validateParameters; paramsHelp; paramsSummaryLog; fromSamplesheet } from 'plugin/nf-validation'
 
 // Print help message if needed
 if (params.help) {
diff --git a/modules.json b/modules.json
index 0be4793d..51ce8069 100644
--- a/modules.json
+++ b/modules.json
@@ -8,75 +8,55 @@
                     "clustalo/align": {
                         "branch": "master",
                         "git_sha": "5c73153097b0f906fa3fe91eb94faaee394d5704",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "clustalo/guidetree": {
                         "branch": "master",
                         "git_sha": "9a884757b561688e0b3ff8b55ff7eb4da25eef33",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "custom/dumpsoftwareversions": {
                         "branch": "master",
                         "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "famsa/align": {
                         "branch": "master",
                         "git_sha": "db6245923c85e43df3fbc3a3a6c5150c9f374136",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "famsa/guidetree": {
                         "branch": "master",
                         "git_sha": "2fe424b685150dbcfae708ea42f521aa137ea21e",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "fastqc": {
                         "branch": "master",
                         "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "kalign/align": {
                         "branch": "master",
                         "git_sha": "c4328fea9d972088482f163052be0f51950eb91d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "mafft": {
                         "branch": "master",
                         "git_sha": "feb29be775d9e41750180539e9a3bdce801d0609",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "untar": {
                         "branch": "master",
                         "git_sha": "d0b4fc03af52a1cc8c6fb4493b921b57352b1dd8",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     }
                 }
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf
deleted file mode 100644
index 9b55bec8..00000000
--- a/modules/local/samplesheet_check.nf
+++ /dev/null
@@ -1,34 +0,0 @@
-process SAMPLESHEET_CHECK {
-    tag "$samplesheet"
-    label 'process_single'
-
-    conda "conda-forge::python=3.8.3"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/python:3.8.3' :
-        'biocontainers/python:3.8.3' }"
-
-    input:
-    path samplesheet
-
-    output:
-    path '*.csv'       , emit: csv
-    path "versions.yml", emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script: // This script is bundled with the pipeline, in nf-core/multiplesequencealign/bin/
-    def args = task.ext.args ?: ''
-    """
-    check_samplesheet.py \\
-        $samplesheet \\
-        samplesheet.valid.csv
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        python: \$(python --version | sed 's/Python //g')
-    END_VERSIONS
-    """
-}
-
-
diff --git a/modules/local/toolsheet_check.nf b/modules/local/toolsheet_check.nf
deleted file mode 100644
index f7a625a6..00000000
--- a/modules/local/toolsheet_check.nf
+++ /dev/null
@@ -1,35 +0,0 @@
-
-
-
-process TOOLSHEET_CHECK {
-    tag "$toolsheet"
-    label 'process_single'
-
-    conda "conda-forge::python=3.8.3"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/python:3.8.3' :
-        'biocontainers/python:3.8.3' }"
-
-    input:
-    path toolsheet
-
-    output:
-    path '*.csv'       , emit: csv
-    path "versions.yml", emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script: // This script is bundled with the pipeline, in nf-core/msa/bin/
-    def args = task.ext.args ?: ''
-    """
-    check_toolsheet.py \\
-        $toolsheet \\
-        toolsheet.valid.csv
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        python: \$(python --version | sed 's/Python //g')
-    END_VERSIONS
-    """
-}
diff --git a/nextflow.config b/nextflow.config
index bdac53e8..daebc3eb 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -7,7 +7,7 @@
 */
 
 plugins {
-  id 'nf-validation@0.2.1'
+  id 'nf-validation@0.3.1'
 }
 
 // Global default params, used in configs
diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf
index 4bd4d7f2..9d0c46aa 100644
--- a/subworkflows/local/align.nf
+++ b/subworkflows/local/align.nf
@@ -41,12 +41,11 @@ workflow ALIGN {
     ch_fastas.combine(ch_tools)
         .map{ it -> [it[0] + it[2] ,  it[3], it[1]] }
         .branch {
-            with_tree: it[0]["tree"] != "none"
-            without_tree: it[0]["tree"] == "none"
+            with_tree: it[0]["tree"] != null
+            without_tree: it[0]["tree"] == null
         }
         .set { ch_fasta_tools }
 
-
     // Here is all the combinations we need to compute
     ch_fasta_tools
         .with_tree
diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
deleted file mode 100644
index d59515b0..00000000
--- a/subworkflows/local/input_check.nf
+++ /dev/null
@@ -1,117 +0,0 @@
-//
-// Check input samplesheet and get read channels
-//
-import java.util.zip.ZipFile
-import java.util.zip.ZipEntry
-
-include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'
-include { TOOLSHEET_CHECK } from '../../modules/local/toolsheet_check'
-include { UNTAR as UNTAR_STRUCTURES }  from '../../modules/nf-core/untar/main'
-
-workflow INPUT_CHECK {
-    take:
-    samplesheet // file: /path/to/samplesheet.csv
-    toolsheet  // file: /path/to/toolsheet.csv
-
-    main:
-
-    ch_versions = Channel.empty()
-
-    samplesheet_ch = SAMPLESHEET_CHECK ( samplesheet)
-                    .csv
-                    .splitCsv ( header:true, sep:',' )
-
-    fasta = samplesheet_ch.map { create_fasta_channel(it) }
-    references = samplesheet_ch.map { create_references_channel(it) }
-    structures = samplesheet_ch.map { create_structures_channel(it) }.unique() 
-    ch_versions = ch_versions.mix(SAMPLESHEET_CHECK.out.versions)
-
-
-    TOOLSHEET_CHECK ( toolsheet )
-            .csv
-            .splitCsv ( header:true, sep:',' )
-            .map { create_tools_channel(it) }
-            .set { tools }
-    ch_versions = ch_versions.mix(TOOLSHEET_CHECK.out.versions)
-
-    emit:
-    fasta
-    references
-    structures
-    tools                                     // channel: [ val(meta), [ fasta ] ]
-    versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
-}
-
-
-// Function to get list of [ meta, [ fasta ] ]
-def create_fasta_channel(LinkedHashMap row) {
-    // create meta map
-    def meta = [:]
-    meta.id         = row.id
-
-    // add path(s) of the fastq file(s) to the meta map
-    def fasta_meta = []
-
-    if (!file(row.fasta).exists()) {
-        exit 1, "ERROR: Please check input samplesheet -> fasta file does not exist!\n${row.fasta}"
-    }
-    fasta_meta = [ meta, [ file(row.fasta) ] ]
-
-    return fasta_meta
-}
-
-
-// Function to get list of [ meta, [ fasta ] ]
-def create_references_channel(LinkedHashMap row) {
-    // create meta map
-    def meta = [:]
-    meta.id         = row.id
-
-    // add path(s) of the fastq file(s) to the meta map
-    def ref_meta = []
-    ref_meta = [ meta, [ file(row.reference) ] ]
-
-    return ref_meta
-}
-
-import groovy.io.FileType
-
-// Function to get list of [ meta, [ fasta ] ]
-def create_structures_channel(LinkedHashMap row) {
-    // create meta map
-    def meta = [:]
-    meta.id         = row.id
-
-    // add path(s) of the fastq file(s) to the meta map
-    if (row.structures != "none") {
-        def list = []
-        def dir = new File(row.structures)
-        dir.eachFileRecurse (FileType.FILES) { it ->
-            list << file(it)
-        }
-        structures = [ meta, list ]
-        return structures
-    } else {
-        return [ meta, [:] ]
-    }
-    
-}
-
-def create_tools_channel(LinkedHashMap row) {
-    // create meta map
-    def meta_tree = [:]
-    def meta_align = [:]
-
-    meta_tree.tree         = row.tree
-    meta_tree.args_tree    = row.args_tree
-    meta_tree.argstree_clean = row.argstree_clean
-    meta_align.align        = row.align
-    meta_align.args_align   = row.args_align
-    meta_align.argsalign_clean = row.argsalign_clean
-
-    // add path(s) of the fastq file(s) to the meta map
-    def tools_meta = []
-    tools_meta = [ meta_tree, meta_align ]
-
-    return tools_meta
-}
diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf
index 8d1a23f3..3fe7fdca 100644
--- a/workflows/multiplesequencealign.nf
+++ b/workflows/multiplesequencealign.nf
@@ -10,6 +10,15 @@ def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
 def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
 def summary_params = paramsSummaryMap(workflow)
 
+def cleanArgs(argString) {
+    def cleanArgs = argString.toString().trim().replace("-", "").replace(" ", "_").replaceAll("==", "_").replaceAll("\\s+", "")
+    // if clearnArgs is empty, return "default"
+    if (cleanArgs == null || cleanArgs == "") {
+        return ""
+    }else{
+        return cleanArgs
+    }
+}
 // Print parameter summary log to screen
 log.info logo + paramsSummaryLog(workflow) + citation
 
@@ -32,10 +41,9 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-//
-// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
-//
-include { INPUT_CHECK } from '../subworkflows/local/input_check'
+include { STATS                       } from '../subworkflows/local/stats'
+include { ALIGN                       } from '../subworkflows/local/align'
+include { EVALUATE                    } from '../subworkflows/local/evaluate'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -49,9 +57,6 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check'
 include { FASTQC                      } from '../modules/nf-core/fastqc/main'
 include { MULTIQC                     } from '../modules/nf-core/multiqc/main'
 include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
-include { STATS                       } from '../subworkflows/local/stats'
-include { ALIGN                       } from '../subworkflows/local/align'
-include { EVALUATE                    } from '../subworkflows/local/evaluate'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -67,21 +72,32 @@ workflow MULTIPLESEQUENCEALIGN {
     ch_versions = Channel.empty()
 
     //
-    // SUBWORKFLOW: Read in samplesheet, validate and stage input files
+    // Prepare input and metadata
     //
-    INPUT_CHECK (
-        file(params.input),
-        file(params.tools)
-    )
-    ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)
 
+    ch_input = Channel.fromSamplesheet('input')
+    ch_tools = Channel.fromSamplesheet('tools').map {
+                        meta ->
+                        def meta_clone = meta[0].clone()
+                        def treeMap = [:]
+                        def alignMap = [:]
 
-    ch_seqs = INPUT_CHECK.out.fasta
-    ch_tools = INPUT_CHECK.out.tools
-    ch_refs = INPUT_CHECK.out.references
-    ch_structures = INPUT_CHECK.out.structures
+                        treeMap["tree"] = meta_clone["tree"]
+                        treeMap["args_tree"] = meta_clone["args_tree"]
+                        treeMap["args_tree_clean"] = cleanArgs(meta_clone.args_tree)
+
+                        alignMap["align"] = meta_clone["align"]
+                        alignMap["args_align"] = meta_clone["args_align"]
+                        alignMap["args_align_clean"] = cleanArgs(meta_clone.args_align)
+                        
+                        [ treeMap, alignMap ]
+                    }
+
+
+    ch_seqs       = ch_input.map{ sample -> [ sample[0], file(sample[1]) ]}
+    ch_refs       = ch_input.map{ sample -> [ sample[0], file(sample[2]) ]}
+    ch_structures = ch_input.map{ sample -> [ sample[0], sample[3]       ]}
 
-    //
     // Compute summary statistics about the input sequences
     //
     if( !params.skip_stats ){
@@ -148,6 +164,8 @@ workflow.onComplete {
     }
 }
 
+
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     THE END