nf-core · charles-plessy · Oct 9, 2024 · Sep 26, 2024 · Sep 27, 2024 · Sep 27, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,10 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## v1.1.0 "Nattou maki" - [September 27th, 2024]
+
+Added a new `softmask` parameter, to optionally keep original softmasking.
+
 ## v1.0.0 "Sweet potato" - [August 27th, 2024]
 
 Initial release of nf-core/pairgenomealign, created with the [nf-core](https://nf-co.re/) template.
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
@@ -1,7 +1,7 @@
 report_comment: >
-  This report has been generated by the <a href="https://github.com/nf-core/pairgenomealign/releases/tag/1.0.0" target="_blank">nf-core/pairgenomealign</a>
+  This report has been generated by the <a href="https://github.com/nf-core/pairgenomealign/releases/tag/1.1.0" target="_blank">nf-core/pairgenomealign</a>
   analysis pipeline. For information about how to interpret these results, please see the
-  <a href="https://nf-co.re/pairgenomealign/1.0.0/docs/output" target="_blank">documentation</a>.
+  <a href="https://nf-co.re/pairgenomealign/1.1.0/docs/output" target="_blank">documentation</a>.
 report_section_order:
   "nf-core-pairgenomealign-methods-description":
     order: -1000

diff --git a/conf/modules.config b/conf/modules.config
@@ -28,10 +28,11 @@ process {
 
     withName: 'ALIGNMENT_LASTDB' {
         // See https://gitlab.com/mcfrith/last/-/blob/main/doc/lastdb.rst for details
-        // -R01: uppercase all sequences and then lowercase simple repeats
+        // -R01: uppercase all sequences and then lowercase simple repeats with tantan
+        // -R10: keep original lowercase masking
         // -c: soft-mask lowercase letters
         // -S2: index both strands
-        ext.args = { "-R01 -c -u${params.seed} -S2" }
+        ext.args = { "${params.softmask=="tantan" ? '-R01' : '-R11'} -c -u${params.seed} -S2" }
     }
 
     withName: 'ALIGNMENT_SPLIT_O2M' {

diff --git a/docs/usage.md b/docs/usage.md
@@ -38,7 +38,10 @@ An [example samplesheet](../assets/samplesheet_full.csv) has been provided with
 
 ## Options
 
-Please see the [parameter documentation](https://nf-co.re/pairgenomealign/parameters) for details.
+The parameters are described in details in the [online documentation](https://nf-co.re/pairgenomealign/parameters). Expert users can pass extra command line arguments to LAST commands. Apart from this the following options are of special importance:
+
+- `--m2m` enables the computation of the _many-to-many_ alignment, which is the only one to be useful in the case of self-alignments, but which on the other hand can exhaust computing resources in the case of very large genomes.
+- Likewise, when comparing very similar and repetitive genomes (like two vertebrate genomes from the same species), any dotplot other than for the _one-to-one_ alignment will be heavy to compute and useless anyway, because the whole page will be filled with dots. The `--skip_dotplot_*` options are there to solve that problem.
 
 ## Fixed arguments (taken from the [LAST cookbook][] and the [LAST tuning][] manual)
 

diff --git a/nextflow.config b/nextflow.config
@@ -58,7 +58,10 @@ params {
     max_cpus                   = 16
     max_time                   = '240.h'
 
+    // Indexing options
     seed                       = 'YASS'
+    softmask                   = 'tantan'
+
     targetName                 = 'target'
     m2m                        = false
 
@@ -258,7 +261,7 @@ manifest {
     description     = """Pairwise alignment pipeline (genome to genome or reads to genome)"""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=23.04.0'
-    version         = '1.0.0'
+    version         = '1.1.0'
     doi             = ''
 }
 

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -56,6 +56,31 @@
                 }
             }
         },
+        "indexing_options": {
+            "title": "Indexing options",
+            "type": "object",
+            "description": "",
+            "default": "",
+            "fa_icon": "fas fa-database",
+            "properties": {
+                "seed": {
+                    "type": "string",
+                    "enum": ["YASS", "NEAR", "MAM8", "RY128"],
+                    "help_text": "LAST creates a database of seed sequences in the _target_ genome, and provides different ways to generate these seeds. The default (`YASS`) searches for long-and-weak similarities that allow for mismatches but not gaps. Among alternatives, there are `NEAR` for short-and-strong (near-identical) similarities with many gaps (insertions and deletions), `MAM8` to find weak similarities with high sensitivity, but low speed and high memory usage, or `RY128` that reduces run time and memory use, by only seeking seeds at ~1/128 of positions in each sequence, which is useful when the purpose of running this pipeline is only to generate whole-genome dotplots, or when sensitivity for tiny fragments may be unnecessary or undesirable. See <https://gitlab.com/mcfrith/last/-/blob/main/doc/last-seeds.rst> for details.",
+                    "description": "Select the LAST seed to index the _target_ genome.",
+                    "default": "YASS",
+                    "fa_icon": "fas fa-seedling"
+                },
+                "softmask": {
+                    "type": "string",
+                    "enum": ["tantan", "original"],
+                    "help_text": "In this pipeline, letters soft-masked in lowercase are excluded from indexing (`lastdb -c`).  By default, the original mask is removed and a new one is made with an internal verison of the “tantan” tool.  Set this option to `original` to keep the orininal soft-masking.  See <https://gitlab.com/mcfrith/last/-/blob/main/doc/lastdb.rst> for details.",
+                    "description": "Customise the way to mask the _target_ genome.",
+                    "default": "tantan",
+                    "fa_icon": "fas fa-theater-masks"
+                }
+            }
+        },
         "alignment_options": {
             "title": "Alignment options",
             "type": "object",
@@ -68,14 +93,6 @@
                     "help_text": "This adds time and can comsume considerable amount of space; use only if you need that data, for instance in the case of a self-alignment",
                     "fa_icon": "fas fa-arrows-alt"
                 },
-                "seed": {
-                    "type": "string",
-                    "enum": ["YASS", "NEAR", "MAM8", "RY128"],
-                    "help_text": "LAST creates a database of seed sequences in the _target_ genome, and provides different ways to generate these seeds. The default (`YASS`) searches for long-and-weak similarities that allow for mismatches but not gaps. Among alternatives, there are `NEAR` for short-and-strong (near-identical) similarities with many gaps (insertions and deletions), `MAM8` to find weak similarities with high sensitivity, but low speed and high memory usage, or `RY128` that reduces run time and memory use, by only seeking seeds at ~1/128 of positions in each sequence, which is useful when the purpose of running this pipeline is only to generate whole-genome dotplots, or when sensitivity for tiny fragments may be unnecessary or undesirable. See <https://gitlab.com/mcfrith/last/-/blob/main/doc/last-seeds.rst> for details.",
-                    "description": "Select the the LAST seed to index the _target_ genome.",
-                    "default": "YASS",
-                    "fa_icon": "fas fa-seedling"
-                },
                 "lastal_params": {
                     "type": "string",
                     "description": "Path to a file containing alignment parameters or a scoring matrix. If this option is used, `last-train` will be skipped and alignment parameters will be the same for each query.",
@@ -372,6 +389,9 @@
         {
             "$ref": "#/definitions/input_output_options"
         },
+        {
+            "$ref": "#/definitions/indexing_options"
+        },
         {
             "$ref": "#/definitions/alignment_options"
         },