diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7f5f0db..ec6ae79 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,10 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## v1.1.0 "Nattou maki" - [September 27th, 2024]
+
+Added a new `softmask` parameter, to optionally keep original softmasking.
+
## v1.0.0 "Sweet potato" - [August 27th, 2024]
Initial release of nf-core/pairgenomealign, created with the [nf-core](https://nf-co.re/) template.
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 646ff21..1ed94d1 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,7 +1,7 @@
report_comment: >
- This report has been generated by the nf-core/pairgenomealign
+ This report has been generated by the nf-core/pairgenomealign
analysis pipeline. For information about how to interpret these results, please see the
- documentation.
+ documentation.
report_section_order:
"nf-core-pairgenomealign-methods-description":
order: -1000
diff --git a/conf/modules.config b/conf/modules.config
index 3c1b725..883a68e 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -28,10 +28,11 @@ process {
withName: 'ALIGNMENT_LASTDB' {
// See https://gitlab.com/mcfrith/last/-/blob/main/doc/lastdb.rst for details
- // -R01: uppercase all sequences and then lowercase simple repeats
+ // -R01: uppercase all sequences and then lowercase simple repeats with tantan
+ // -R10: keep original lowercase masking
// -c: soft-mask lowercase letters
// -S2: index both strands
- ext.args = { "-R01 -c -u${params.seed} -S2" }
+ ext.args = { "${params.softmask=="tantan" ? '-R01' : '-R11'} -c -u${params.seed} -S2" }
}
withName: 'ALIGNMENT_SPLIT_O2M' {
diff --git a/nextflow.config b/nextflow.config
index c75282a..e8c4bd3 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -58,7 +58,10 @@ params {
max_cpus = 16
max_time = '240.h'
+ // Indexing options
seed = 'YASS'
+ softmask = 'tantan'
+
targetName = 'target'
m2m = false
@@ -258,7 +261,7 @@ manifest {
description = """Pairwise alignment pipeline (genome to genome or reads to genome)"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
- version = '1.0.0'
+ version = '1.1.0'
doi = ''
}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 312b1bd..d52e963 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -56,6 +56,31 @@
}
}
},
+ "indexing_options": {
+ "title": "Indexing options",
+ "type": "object",
+ "description": "",
+ "default": "",
+ "fa_icon": "fas fa-database",
+ "properties": {
+ "seed": {
+ "type": "string",
+ "enum": ["YASS", "NEAR", "MAM8", "RY128"],
+ "help_text": "LAST creates a database of seed sequences in the _target_ genome, and provides different ways to generate these seeds. The default (`YASS`) searches for long-and-weak similarities that allow for mismatches but not gaps. Among alternatives, there are `NEAR` for short-and-strong (near-identical) similarities with many gaps (insertions and deletions), `MAM8` to find weak similarities with high sensitivity, but low speed and high memory usage, or `RY128` that reduces run time and memory use, by only seeking seeds at ~1/128 of positions in each sequence, which is useful when the purpose of running this pipeline is only to generate whole-genome dotplots, or when sensitivity for tiny fragments may be unnecessary or undesirable. See for details.",
+ "description": "Select the the LAST seed to index the _target_ genome.",
+ "default": "YASS",
+ "fa_icon": "fas fa-seedling"
+ },
+ "softmask": {
+ "type": "string",
+ "enum": ["tantan", "original"],
+ "help_text": "In this pipeline, letters soft-masked in lowercase are excluded from indexing (`lastdb -c`). By default, the original mask is removed and a new one is made with an internal verison of the “tantan” tool. Set this option to `original` to keep the orininal soft-masking. See for details.",
+ "description": "Customise the way to mask the _target_ genome.",
+ "default": "tantan",
+ "fa_icon": "fas fa-theater-masks"
+ }
+ }
+ },
"alignment_options": {
"title": "Alignment options",
"type": "object",
@@ -68,14 +93,6 @@
"help_text": "This adds time and can comsume considerable amount of space; use only if you need that data, for instance in the case of a self-alignment",
"fa_icon": "fas fa-arrows-alt"
},
- "seed": {
- "type": "string",
- "enum": ["YASS", "NEAR", "MAM8", "RY128"],
- "help_text": "LAST creates a database of seed sequences in the _target_ genome, and provides different ways to generate these seeds. The default (`YASS`) searches for long-and-weak similarities that allow for mismatches but not gaps. Among alternatives, there are `NEAR` for short-and-strong (near-identical) similarities with many gaps (insertions and deletions), `MAM8` to find weak similarities with high sensitivity, but low speed and high memory usage, or `RY128` that reduces run time and memory use, by only seeking seeds at ~1/128 of positions in each sequence, which is useful when the purpose of running this pipeline is only to generate whole-genome dotplots, or when sensitivity for tiny fragments may be unnecessary or undesirable. See for details.",
- "description": "Select the the LAST seed to index the _target_ genome.",
- "default": "YASS",
- "fa_icon": "fas fa-seedling"
- },
"lastal_params": {
"type": "string",
"description": "Path to a file containing alignment parameters or a scoring matrix. If this option is used, `last-train` will be skipped and alignment parameters will be the same for each query.",
@@ -372,6 +389,9 @@
{
"$ref": "#/definitions/input_output_options"
},
+ {
+ "$ref": "#/definitions/indexing_options"
+ },
{
"$ref": "#/definitions/alignment_options"
},