From 899e82a36aca2dd454bf766a00e13b73585a1a04 Mon Sep 17 00:00:00 2001 From: TernovojD Date: Tue, 30 Apr 2024 15:51:13 +0200 Subject: [PATCH 01/18] added sample_sheet feature --- eval.nf | 45 ++++++++++++++++++++++++++++++--------------- nextflow.config | 12 +++++++++--- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/eval.nf b/eval.nf index 00b4c07..9898119 100644 --- a/eval.nf +++ b/eval.nf @@ -11,22 +11,37 @@ workflow{ ch_ref = Channel.value("$baseDir/" + params.reference) ch_ref_idx = SAMTOOLS_FAIDX(ch_ref) - ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}") - ch_callsets - .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('.vcf', '').replaceFirst('.gz', '').toInteger(), file(it)) } - .set {ch_callsets} - //ch_callsets.view() - - ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf") - ch_truthsets - .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('hap', '').replaceFirst('.vcf', '').toInteger(), file(it)) } - .set {ch_truthsets} - //ch_truthsets.view() - - ch_truthsets.join(ch_callsets, by: 0) - .set {ch_variantsets_map} - //ch_variantsets_map.view() + if (params.callsets_dir != "") { + ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}") + ch_callsets + .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('.vcf', '').replaceFirst('.gz', '').toInteger(), file(it)) } + .set {ch_callsets} + ch_callsets.view() + + ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf") + ch_truthsets + .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('hap', '').replaceFirst('.vcf', '').toInteger(), file(it)) } + .set {ch_truthsets} + // ch_truthsets.view() + + ch_truthsets.join(ch_callsets, by: 0) + .set {ch_variantsets_map} + // ch_variantsets_map.view() + + } else if (params.sample_sheet != "") { + + ch_variantsets_map = Channel + .fromPath(params.sample_sheet, checkIfExists: true) + .splitCsv(header: true, sep: ",") + .map {row -> [row["index"] as Integer, row["callset"], row["truthset"]]} + .view() + + } else { + + exit 1, "ERROR: Either the sample_sheet or callsets_dir parameter has to be provided!\n" + + } // ------------------ // | Main processes | diff --git a/nextflow.config b/nextflow.config index c984d9d..ff76e99 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,12 +11,12 @@ manifest { params { // Individual parameters n = 3 - reference = 'reference/Sars-Cov-2/Wuhan-Hu-1/MN908947.3.fasta' + reference = 'ressources/MN908947.3.fasta' read_type = 'ngs' // General parameters seed = 479 - outdir = 'results' + outdir = 'hap_results' // NGS (WGS) - Read simulation parameters nb_frag = 3000 @@ -35,7 +35,8 @@ params { nb_reads = 180 // Evaluation parameters - callsets_dir = 'data' + callsets_dir = '' + sample_sheet = '' } // Enable execution report @@ -63,4 +64,9 @@ profiles { executor.name = "local" executor.cpus = 4 } + + slurm { + executor.name = "slurm" + executor.cpus = 4 + } } From 8b8f17ed9d7e732de9176693b67e46824f1f023d Mon Sep 17 00:00:00 2001 From: TernovojD Date: Thu, 2 May 2024 09:35:23 +0200 Subject: [PATCH 02/18] added requested changes, updated README --- README.md | 9 +++++---- eval.nf | 10 +++++----- nextflow.config | 4 ++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index c25d6de..88f686d 100644 --- a/README.md +++ b/README.md @@ -69,10 +69,11 @@ The minimal command to evaluate the accordance between a truthset (generated dat nextflow run eval.nf -profile local,conda --callsets_dir ``` where `--callsets_dir` is the parameter to specify a folder containing the callset VCF files. -Currently, a callset within this folder has to follow the naming convention `callset_.vcf[.gz]` where _\_ is the integer of the corresponding truthset. -Callsets can optionally be _gzip_ compressed. - -🚧 For convenience, the `eval.nf` will get an option to provide a sample sheet as an alternative input format in the future. +Currently, a callset within this folder has to follow the naming convention `callset_.vcf[.gz]` where _\_ is the integer of the corresponding truthset. Alternatively, one can provide a sample sheet (","-delimited) with the columns "index", "callset" and truthset", where "index" is an iteration from 1 to n (number of samples) and "callset"/"truthset" are paths to the respectively matching callset VCF files. The command is +``` +nextflow run eval.nf -profile local,conda --sample_sheet +``` +Note: Callsets can optionally be _gzip_ compressed.
⚠️ Run commands from the root directory Without further ado, please run the commands from a terminal at the top folder (root directory) of this repository. diff --git a/eval.nf b/eval.nf index 9898119..69d5622 100644 --- a/eval.nf +++ b/eval.nf @@ -11,13 +11,13 @@ workflow{ ch_ref = Channel.value("$baseDir/" + params.reference) ch_ref_idx = SAMTOOLS_FAIDX(ch_ref) - if (params.callsets_dir != "") { + if (params.callsets_dir != "" && params.sample_sheet == "") { ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}") ch_callsets .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('.vcf', '').replaceFirst('.gz', '').toInteger(), file(it)) } .set {ch_callsets} - ch_callsets.view() + // ch_callsets.view() ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf") ch_truthsets @@ -29,17 +29,17 @@ workflow{ .set {ch_variantsets_map} // ch_variantsets_map.view() - } else if (params.sample_sheet != "") { + } else if (params.sample_sheet != "" && params.callsets_dir == "") { ch_variantsets_map = Channel .fromPath(params.sample_sheet, checkIfExists: true) .splitCsv(header: true, sep: ",") .map {row -> [row["index"] as Integer, row["callset"], row["truthset"]]} - .view() + // .view() } else { - exit 1, "ERROR: Either the sample_sheet or callsets_dir parameter has to be provided!\n" + exit 1, "ERROR: Data input incorrect - please supply only one of the following parameters: sample_sheet, callsets_dir\n" } diff --git a/nextflow.config b/nextflow.config index ff76e99..f49f4c6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,12 +11,12 @@ manifest { params { // Individual parameters n = 3 - reference = 'ressources/MN908947.3.fasta' + reference = 'reference/Sars-Cov-2/Wuhan-Hu-1/MN908947.3.fasta' read_type = 'ngs' // General parameters seed = 479 - outdir = 'hap_results' + outdir = 'results' // NGS (WGS) - Read simulation parameters nb_frag = 3000 From da1b411ccf759cb106fde51299c41300efb7ae73 Mon Sep 17 00:00:00 2001 From: Thomas Krannich Date: Mon, 6 May 2024 14:41:20 +0200 Subject: [PATCH 03/18] Update tests.yml try sequential CI jobs --- .github/workflows/tests.yml | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 37a669c..53cbf87 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,4 +1,4 @@ -name: CI +name: tests on: push: @@ -8,8 +8,8 @@ on: # designed as in: https://github.com/marketplace/actions/setup-miniconda jobs: - CI: - name: CI (Linux) + build: + name: build env runs-on: "ubuntu-latest" defaults: run: @@ -36,6 +36,14 @@ jobs: run: | nextflow -version + run-hap: + name: Run hap.nf + needs: build + runs-on: "ubuntu-latest" + defaults: + run: + shell: bash -el {0} + steps: - name : Download reference run: | wget https://www.ebi.ac.uk/ena/browser/api/fasta/MN908947.3 @@ -47,6 +55,14 @@ jobs: run: | nextflow run hap.nf -profile local,conda + run-eval: + name: Run eval.nf + needs: build + runs-on: "ubuntu-latest" + defaults: + run: + shell: bash -el {0} + steps: - name: Test callset evaluation run: | nextflow run eval.nf -profile local,conda --callsets_dir aux/ci_data/ From e25d337a799d25297477c095b011a13c53a606fe Mon Sep 17 00:00:00 2001 From: Thomas Krannich Date: Mon, 6 May 2024 15:07:34 +0200 Subject: [PATCH 04/18] Update tests.yml duplicating environments to all jobs --- .github/workflows/tests.yml | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 53cbf87..6e1a3f1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -9,7 +9,7 @@ on: # designed as in: https://github.com/marketplace/actions/setup-miniconda jobs: build: - name: build env + name: build nf env runs-on: "ubuntu-latest" defaults: run: @@ -36,6 +36,7 @@ jobs: run: | nextflow -version + run-hap: name: Run hap.nf needs: build @@ -44,6 +45,16 @@ jobs: run: shell: bash -el {0} steps: + - uses: actions/checkout@v4 + - uses: conda-incubator/setup-miniconda@v3 + with: + miniconda-version: "latest" + activate-environment: nextflow + environment-file: env/conda_nxf.yml + channels: conda-forge,bioconda,defaults + channel-priority: true + auto-activate-base: false + - name : Download reference run: | wget https://www.ebi.ac.uk/ena/browser/api/fasta/MN908947.3 @@ -63,6 +74,16 @@ jobs: run: shell: bash -el {0} steps: + - uses: actions/checkout@v4 + - uses: conda-incubator/setup-miniconda@v3 + with: + miniconda-version: "latest" + activate-environment: nextflow + environment-file: env/conda_nxf.yml + channels: conda-forge,bioconda,defaults + channel-priority: true + auto-activate-base: false + - name: Test callset evaluation run: | nextflow run eval.nf -profile local,conda --callsets_dir aux/ci_data/ From 6929bd63d9dd9b6e23d4a35a71d45344bd2fb457 Mon Sep 17 00:00:00 2001 From: Thomas Krannich Date: Mon, 6 May 2024 15:19:17 +0200 Subject: [PATCH 05/18] Update tests.yml download ref in eval job --- .github/workflows/tests.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6e1a3f1..73ce4ff 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -84,6 +84,13 @@ jobs: channel-priority: true auto-activate-base: false + - name : Download reference + run: | + wget https://www.ebi.ac.uk/ena/browser/api/fasta/MN908947.3 + sed 's/>ENA|MN908947|MN908947.3 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome./>MN908947.3/g' MN908947.3 > MN908947.3.fasta + mkdir -p reference/Sars-Cov-2/Wuhan-Hu-1/ + mv MN908947.3.fasta reference/Sars-Cov-2/Wuhan-Hu-1/ + - name: Test callset evaluation run: | nextflow run eval.nf -profile local,conda --callsets_dir aux/ci_data/ From f3ca9cef6195cc1da9d8365d8cc3ee0c0af68c6a Mon Sep 17 00:00:00 2001 From: Thomas Krannich Date: Mon, 6 May 2024 16:19:24 +0200 Subject: [PATCH 06/18] Update tests.yml test sample sheet --- .github/workflows/tests.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 73ce4ff..e231800 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -91,7 +91,16 @@ jobs: mkdir -p reference/Sars-Cov-2/Wuhan-Hu-1/ mv MN908947.3.fasta reference/Sars-Cov-2/Wuhan-Hu-1/ - - name: Test callset evaluation + - name: Test callset evaluation with callset_dir run: | nextflow run eval.nf -profile local,conda --callsets_dir aux/ci_data/ + - name: Test callset evaluation with sample_sheet + run: | + echo "index,callset,truthset" > my_samples.csv + echo "1,aux/ci_data/callset_1.vcf.gz,results/simulated_hap1.vcf" >> my_samples.csv + echo "2,aux/ci_data/callset_2.vcf.gz,results/simulated_hap2.vcf" >> my_samples.csv + echo "3,aux/ci_data/callset_3.vcf.gz,results/simulated_hap3.vcf" >> my_samples.csv + + nextflow run eval.nf -profile local,conda --sample_sheet my_samples.csv + From 0e1e6235841ed3061906851924a9b59c3e81f7e0 Mon Sep 17 00:00:00 2001 From: Thomas Krannich Date: Mon, 6 May 2024 16:35:07 +0200 Subject: [PATCH 07/18] Update tests.yml update eval.nf test's paths --- .github/workflows/tests.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e231800..3e7af33 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -97,10 +97,11 @@ jobs: - name: Test callset evaluation with sample_sheet run: | + cwd=$(pwd) echo "index,callset,truthset" > my_samples.csv - echo "1,aux/ci_data/callset_1.vcf.gz,results/simulated_hap1.vcf" >> my_samples.csv - echo "2,aux/ci_data/callset_2.vcf.gz,results/simulated_hap2.vcf" >> my_samples.csv - echo "3,aux/ci_data/callset_3.vcf.gz,results/simulated_hap3.vcf" >> my_samples.csv + echo "1,${cwd}/aux/ci_data/callset_1.vcf.gz,${cwd}/results/simulated_hap1.vcf" >> my_samples.csv + echo "2,${cwd}/aux/ci_data/callset_2.vcf.gz,${cwd}/results/simulated_hap2.vcf" >> my_samples.csv + echo "3,${cwd}/aux/ci_data/callset_3.vcf.gz,${cwd}/results/simulated_hap3.vcf" >> my_samples.csv nextflow run eval.nf -profile local,conda --sample_sheet my_samples.csv From a147e6e9ede8b925e5f7a37f9293e6b126c00da3 Mon Sep 17 00:00:00 2001 From: Thomas Krannich Date: Mon, 6 May 2024 16:44:59 +0200 Subject: [PATCH 08/18] Update eval.nf swap truthset and callset in CSV sample sheet --- eval.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eval.nf b/eval.nf index 69d5622..88a05b1 100644 --- a/eval.nf +++ b/eval.nf @@ -34,7 +34,7 @@ workflow{ ch_variantsets_map = Channel .fromPath(params.sample_sheet, checkIfExists: true) .splitCsv(header: true, sep: ",") - .map {row -> [row["index"] as Integer, row["callset"], row["truthset"]]} + .map {row -> [row["index"] as Integer, row["truthset"], row["callset"]]} // .view() } else { From ae9813595d59e8bae0d3336c4400ad2bf8c20709 Mon Sep 17 00:00:00 2001 From: Thomas Krannich Date: Mon, 6 May 2024 16:52:06 +0200 Subject: [PATCH 09/18] Update README.md --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 88f686d..28c9708 100644 --- a/README.md +++ b/README.md @@ -69,11 +69,13 @@ The minimal command to evaluate the accordance between a truthset (generated dat nextflow run eval.nf -profile local,conda --callsets_dir ``` where `--callsets_dir` is the parameter to specify a folder containing the callset VCF files. -Currently, a callset within this folder has to follow the naming convention `callset_.vcf[.gz]` where _\_ is the integer of the corresponding truthset. Alternatively, one can provide a sample sheet (","-delimited) with the columns "index", "callset" and truthset", where "index" is an iteration from 1 to n (number of samples) and "callset"/"truthset" are paths to the respectively matching callset VCF files. The command is +Currently, a callset within this folder has to follow the naming convention `callset_.vcf[.gz]` where _\_ is the integer of the corresponding truthset. +Alternatively, one can provide a sample sheet of comma separated values (CSV file) with the columns "index", "truthset" and callset", where "index" is an integer from 1 to n (number of samples) and "callset"/"truthset" are paths to the pairwise matching VCF files. +Callsets can optionally be _gzip_ compressed. +The command for the sample sheet input is ``` nextflow run eval.nf -profile local,conda --sample_sheet ``` -Note: Callsets can optionally be _gzip_ compressed.
⚠️ Run commands from the root directory Without further ado, please run the commands from a terminal at the top folder (root directory) of this repository. From 62aee034355899a11efec601a4c86bc84073bcff Mon Sep 17 00:00:00 2001 From: Thomas Krannich Date: Mon, 6 May 2024 16:54:11 +0200 Subject: [PATCH 10/18] Update tests.yml --- .github/workflows/tests.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3e7af33..23dcdc3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -98,10 +98,10 @@ jobs: - name: Test callset evaluation with sample_sheet run: | cwd=$(pwd) - echo "index,callset,truthset" > my_samples.csv - echo "1,${cwd}/aux/ci_data/callset_1.vcf.gz,${cwd}/results/simulated_hap1.vcf" >> my_samples.csv - echo "2,${cwd}/aux/ci_data/callset_2.vcf.gz,${cwd}/results/simulated_hap2.vcf" >> my_samples.csv - echo "3,${cwd}/aux/ci_data/callset_3.vcf.gz,${cwd}/results/simulated_hap3.vcf" >> my_samples.csv + echo "index,truthset,callset" > my_samples.csv + echo "1,${cwd}/results/simulated_hap1.vcf,${cwd}/aux/ci_data/callset_1.vcf.gz" >> my_samples.csv + echo "2,${cwd}/results/simulated_hap2.vcf,${cwd}/aux/ci_data/callset_2.vcf.gz" >> my_samples.csv + echo "3,${cwd}/results/simulated_hap3.vcf,${cwd}/aux/ci_data/callset_3.vcf.gz" >> my_samples.csv nextflow run eval.nf -profile local,conda --sample_sheet my_samples.csv From 91f6bd5dd6cf45414f083daee0561a2e414dfc54 Mon Sep 17 00:00:00 2001 From: Thomas Krannich Date: Mon, 6 May 2024 17:05:42 +0200 Subject: [PATCH 11/18] Update tests.yml --- .github/workflows/tests.yml | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 23dcdc3..3c322eb 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -37,8 +37,8 @@ jobs: nextflow -version - run-hap: - name: Run hap.nf + run-cievad: + name: Run cievad needs: build runs-on: "ubuntu-latest" defaults: @@ -66,31 +66,6 @@ jobs: run: | nextflow run hap.nf -profile local,conda - run-eval: - name: Run eval.nf - needs: build - runs-on: "ubuntu-latest" - defaults: - run: - shell: bash -el {0} - steps: - - uses: actions/checkout@v4 - - uses: conda-incubator/setup-miniconda@v3 - with: - miniconda-version: "latest" - activate-environment: nextflow - environment-file: env/conda_nxf.yml - channels: conda-forge,bioconda,defaults - channel-priority: true - auto-activate-base: false - - - name : Download reference - run: | - wget https://www.ebi.ac.uk/ena/browser/api/fasta/MN908947.3 - sed 's/>ENA|MN908947|MN908947.3 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome./>MN908947.3/g' MN908947.3 > MN908947.3.fasta - mkdir -p reference/Sars-Cov-2/Wuhan-Hu-1/ - mv MN908947.3.fasta reference/Sars-Cov-2/Wuhan-Hu-1/ - - name: Test callset evaluation with callset_dir run: | nextflow run eval.nf -profile local,conda --callsets_dir aux/ci_data/ From caebc789c7fc740b2c5df9d8f7e96e67e04aea4e Mon Sep 17 00:00:00 2001 From: Thomas Krannich Date: Mon, 6 May 2024 17:26:16 +0200 Subject: [PATCH 12/18] Update eval.nf add file checks --- eval.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eval.nf b/eval.nf index 88a05b1..f5e45bd 100644 --- a/eval.nf +++ b/eval.nf @@ -13,13 +13,13 @@ workflow{ if (params.callsets_dir != "" && params.sample_sheet == "") { - ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}") + ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}", checkIfExists: true) ch_callsets .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('.vcf', '').replaceFirst('.gz', '').toInteger(), file(it)) } .set {ch_callsets} // ch_callsets.view() - ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf") + ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf", checkIfExists: true) ch_truthsets .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('hap', '').replaceFirst('.vcf', '').toInteger(), file(it)) } .set {ch_truthsets} From fbed1fd22e041d718fdabe4ef4ce03221f63b696 Mon Sep 17 00:00:00 2001 From: TernovojD Date: Wed, 8 May 2024 12:20:09 +0200 Subject: [PATCH 13/18] Initial Commit Help Pages --- eval.nf | 8 +++++ hap.nf | 8 +++++ nextflow.config | 1 + src/data_class.groovy | 78 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 95 insertions(+) create mode 100644 src/data_class.groovy diff --git a/eval.nf b/eval.nf index f5e45bd..088c711 100644 --- a/eval.nf +++ b/eval.nf @@ -1,3 +1,10 @@ +//load in help function +File data_class_file = new File("./src/data_class.groovy"); +Class groovyClass = new GroovyClassLoader(getClass().getClassLoader()).parseClass(data_class_file); +GroovyObject data_class = (GroovyObject) groovyClass.newInstance(); + +if (params.help) { exit 0, data_class.helpEval(workflow.manifest.version) } + // include modules - here, modules are single processes include { SAMTOOLS_FAIDX } from './modules/samtools/faidx/main.nf' include { HAPPY } from './modules/happy/main.nf' @@ -50,3 +57,4 @@ workflow{ SOMPY_SUMMARY(ch_csv.collect()) } + diff --git a/hap.nf b/hap.nf index 1a363d4..f03230b 100644 --- a/hap.nf +++ b/hap.nf @@ -1,3 +1,11 @@ +//load in help function +File data_class_file = new File("./src/data_class.groovy"); +Class groovyClass = new GroovyClassLoader(getClass().getClassLoader()).parseClass(data_class_file); +GroovyObject data_class = (GroovyObject) groovyClass.newInstance(); + +if (params.help) { exit 0, data_class.helpHap(workflow.manifest.version, params.nb_frag, params.fragment_min_size, params.fragment_max_size, params.fragment_mean_size, + params.fragment_size_std_dev, params.illumina_read_length, params.dna_type, params.model_prefix, params.model_caller, params.median_length, params.sd_length, params.nb_reads) } + // include modules - here, modules are single processes //include { AMPLISIM } from './modules/amplisim/main.nf' include { MASON_SIMULATOR } from './modules/mason/simulator/main.nf' diff --git a/nextflow.config b/nextflow.config index f49f4c6..36acb03 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,6 +15,7 @@ params { read_type = 'ngs' // General parameters + help = false seed = 479 outdir = 'results' diff --git a/src/data_class.groovy b/src/data_class.groovy new file mode 100644 index 0000000..b958c6c --- /dev/null +++ b/src/data_class.groovy @@ -0,0 +1,78 @@ +class Helper { + def helpEval(version){ + String c_green = "\033[0;32m"; + String c_reset = "\033[0m"; + String c_yellow = "\033[0;33m"; + String c_blue = "\033[0;34m"; + String c_red = "\u001B[31m"; + String c_dim = "\033[2m"; + log.info """ + ____________________________________________________________________________________________ + + ${c_blue}Robert Koch Institute, MF1 Bioinformatics${c_reset} + + Workflow: cievad (${version}) + + ${c_yellow}Minimal Usage Examples:${c_reset} + + nextflow run eval.nf -profile local,conda --callsets_dir + or + nextflow run eval.nf -profile local,conda --sample_sheet + + ${c_yellow}Data Input, required:${c_reset} + + ${c_green} --callsets_dir ${c_reset} Directory containing variant callsets for evaluation (naming format: callset_.vcf[.gz]). + OR + ${c_green} --sample_sheet ${c_reset} Sample sheet (.csv) with the header ("index","truthset","callset"), mapping corresponding truth- and callsets. + """ + } + + def helpHap(version,nb_frag,fragment_min_size,fragment_max_size,fragment_mean_size,fragment_size_std_dev, + illumina_read_length, dna_type, model_prefix, model_caller, median_length, sd_length, nb_reads){ + String c_green = "\033[0;32m"; + String c_reset = "\033[0m"; + String c_yellow = "\033[0;33m"; + String c_blue = "\033[0;34m"; + String c_red = "\u001B[31m"; + String c_dim = "\033[2m"; + log.info """ + ____________________________________________________________________________________________ + + ${c_blue}Robert Koch Institute, MF1 Bioinformatics${c_reset} + + Workflow: cievad (${version}) + + ${c_yellow}Minimal Usage Example:${c_reset} + + nextflow run hap.nf -profile local,conda + + ${c_yellow}Individual Parameter, required:${c_reset} + + ${c_green} --n ${c_reset} number of synthetic samples to be generated + ${c_green} --reference ${c_reset} reference used for the generation of synthetic sequencing data + ${c_green} --read_type ${c_reset} type of resulting WGS synthetic reads (options: ngs, ont) + + ${c_yellow}Output Directory, required:${c_reset} + + ${c_green} --outdir ${c_reset} directory to save results in + + ${c_yellow}Next Generation Sequencing (WGS) Parameter, required if [--read_type ngs] supplied ${c_reset} + + ${c_green} --nb_frag ${c_reset} number of fragments per sample [default: ${nb_frag}] + ${c_green} --fragment_min_size ${c_reset} minimum size of fragments [default: ${fragment_min_size}] + ${c_green} --fragment_max_size ${c_reset} maximum size of fragments [default: ${fragment_max_size}] + ${c_green} --fragment_mean_size ${c_reset} mean size of fragments [default: ${fragment_mean_size}] + ${c_green} --fragment_size_std_dev ${c_reset} standard deviation for fragment size [default: ${fragment_size_std_dev}] + ${c_green} --illumina_read_length ${c_reset} read length of synthetic illumina reads [default: ${illumina_read_length}] + + ${c_yellow}Nanopore Sequencing (WGS) Parameter, required if [--read_type ont] supplied ${c_reset} + + ${c_green} --dna_type ${c_reset} used DNA type [default: ${dna_type}] + ${c_green} --model_prefix ${c_reset} path and prefix of the used model (e.g.: ${model_prefix}) + ${c_green} --model_caller ${c_reset} algorithm to conduct the basecalling [default: ${model_caller}] + ${c_green} --median_length ${c_reset} median length of the resulting synthetic reads [default: ${median_length}] + ${c_green} --sd_length ${c_reset} standard deviation length of the resulting synthetic reads [default: ${sd_length}] + ${c_green} --nb_reads ${c_reset} number of synthetic reads [default: ${nb_reads}] + """ + } +} \ No newline at end of file From 0b2a1f5d2dd6ee19da7e6cadc38e9a9c7896065f Mon Sep 17 00:00:00 2001 From: TernovojD Date: Wed, 8 May 2024 12:55:01 +0200 Subject: [PATCH 14/18] added VERSIONS file and improved the help pages call --- hap.nf | 3 +-- nextflow.config | 2 +- src/data_class.groovy | 27 +++++++++++++-------------- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/hap.nf b/hap.nf index f03230b..6a7547b 100644 --- a/hap.nf +++ b/hap.nf @@ -3,8 +3,7 @@ File data_class_file = new File("./src/data_class.groovy"); Class groovyClass = new GroovyClassLoader(getClass().getClassLoader()).parseClass(data_class_file); GroovyObject data_class = (GroovyObject) groovyClass.newInstance(); -if (params.help) { exit 0, data_class.helpHap(workflow.manifest.version, params.nb_frag, params.fragment_min_size, params.fragment_max_size, params.fragment_mean_size, - params.fragment_size_std_dev, params.illumina_read_length, params.dna_type, params.model_prefix, params.model_caller, params.median_length, params.sd_length, params.nb_reads) } +if (params.help) { exit 0, data_class.helpHap(workflow.manifest.version, params) } // include modules - here, modules are single processes //include { AMPLISIM } from './modules/amplisim/main.nf' diff --git a/nextflow.config b/nextflow.config index 36acb03..60e0aa4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -4,7 +4,7 @@ manifest { description = 'A workflow for a simple, streamlined and rapid evaluation of variant callsets ' author = 'Thomas Krannich' nextflowVersion = '>=20.04.0' - version = '0.2.0' + version = new File('VERSION').text.trim() } // Parameters that are accessible in the pipeline script diff --git a/src/data_class.groovy b/src/data_class.groovy index b958c6c..69f0566 100644 --- a/src/data_class.groovy +++ b/src/data_class.groovy @@ -27,8 +27,7 @@ class Helper { """ } - def helpHap(version,nb_frag,fragment_min_size,fragment_max_size,fragment_mean_size,fragment_size_std_dev, - illumina_read_length, dna_type, model_prefix, model_caller, median_length, sd_length, nb_reads){ + def helpHap(version,params){ String c_green = "\033[0;32m"; String c_reset = "\033[0m"; String c_yellow = "\033[0;33m"; @@ -58,21 +57,21 @@ class Helper { ${c_yellow}Next Generation Sequencing (WGS) Parameter, required if [--read_type ngs] supplied ${c_reset} - ${c_green} --nb_frag ${c_reset} number of fragments per sample [default: ${nb_frag}] - ${c_green} --fragment_min_size ${c_reset} minimum size of fragments [default: ${fragment_min_size}] - ${c_green} --fragment_max_size ${c_reset} maximum size of fragments [default: ${fragment_max_size}] - ${c_green} --fragment_mean_size ${c_reset} mean size of fragments [default: ${fragment_mean_size}] - ${c_green} --fragment_size_std_dev ${c_reset} standard deviation for fragment size [default: ${fragment_size_std_dev}] - ${c_green} --illumina_read_length ${c_reset} read length of synthetic illumina reads [default: ${illumina_read_length}] + ${c_green} --nb_frag ${c_reset} number of fragments per sample [default: ${params.nb_frag}] + ${c_green} --fragment_min_size ${c_reset} minimum size of fragments [default: ${params.fragment_min_size}] + ${c_green} --fragment_max_size ${c_reset} maximum size of fragments [default: ${params.fragment_max_size}] + ${c_green} --fragment_mean_size ${c_reset} mean size of fragments [default: ${params.fragment_mean_size}] + ${c_green} --fragment_size_std_dev ${c_reset} standard deviation for fragment size [default: ${params.fragment_size_std_dev}] + ${c_green} --illumina_read_length ${c_reset} read length of synthetic illumina reads [default: ${params.illumina_read_length}] ${c_yellow}Nanopore Sequencing (WGS) Parameter, required if [--read_type ont] supplied ${c_reset} - ${c_green} --dna_type ${c_reset} used DNA type [default: ${dna_type}] - ${c_green} --model_prefix ${c_reset} path and prefix of the used model (e.g.: ${model_prefix}) - ${c_green} --model_caller ${c_reset} algorithm to conduct the basecalling [default: ${model_caller}] - ${c_green} --median_length ${c_reset} median length of the resulting synthetic reads [default: ${median_length}] - ${c_green} --sd_length ${c_reset} standard deviation length of the resulting synthetic reads [default: ${sd_length}] - ${c_green} --nb_reads ${c_reset} number of synthetic reads [default: ${nb_reads}] + ${c_green} --dna_type ${c_reset} used DNA type [default: ${params.dna_type}] + ${c_green} --model_prefix ${c_reset} path and prefix of the used model (e.g.: ${params.model_prefix}) + ${c_green} --model_caller ${c_reset} algorithm to conduct the basecalling [default: ${params.model_caller}] + ${c_green} --median_length ${c_reset} median length of the resulting synthetic reads [default: ${params.median_length}] + ${c_green} --sd_length ${c_reset} standard deviation length of the resulting synthetic reads [default: ${params.sd_length}] + ${c_green} --nb_reads ${c_reset} number of synthetic reads [default: ${params.nb_reads}] """ } } \ No newline at end of file From 7896f3a0476abc50559110f306d406096db8c3b6 Mon Sep 17 00:00:00 2001 From: TernovojD Date: Wed, 8 May 2024 13:11:11 +0200 Subject: [PATCH 15/18] adding VERSION file --- VERSION | 1 + nextflow.config | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 VERSION diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..12ca3c5 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +'0.2.0' \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 60e0aa4..efe1566 100644 --- a/nextflow.config +++ b/nextflow.config @@ -4,7 +4,7 @@ manifest { description = 'A workflow for a simple, streamlined and rapid evaluation of variant callsets ' author = 'Thomas Krannich' nextflowVersion = '>=20.04.0' - version = new File('VERSION').text.trim() + version = new File('./VERSION').text.trim() } // Parameters that are accessible in the pipeline script From 933559b27588f7ac9ef0d9795231b76f99dda7e2 Mon Sep 17 00:00:00 2001 From: Krannich479 Date: Tue, 14 May 2024 15:24:33 +0200 Subject: [PATCH 16/18] update help pages --- eval.nf | 2 +- src/data_class.groovy | 50 +++++++++++++++++++++++-------------------- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/eval.nf b/eval.nf index 088c711..19a1006 100644 --- a/eval.nf +++ b/eval.nf @@ -3,7 +3,7 @@ File data_class_file = new File("./src/data_class.groovy"); Class groovyClass = new GroovyClassLoader(getClass().getClassLoader()).parseClass(data_class_file); GroovyObject data_class = (GroovyObject) groovyClass.newInstance(); -if (params.help) { exit 0, data_class.helpEval(workflow.manifest.version) } +if (params.help) { exit 0, data_class.helpEval(workflow.manifest.version, params) } // include modules - here, modules are single processes include { SAMTOOLS_FAIDX } from './modules/samtools/faidx/main.nf' diff --git a/src/data_class.groovy b/src/data_class.groovy index 69f0566..3420440 100644 --- a/src/data_class.groovy +++ b/src/data_class.groovy @@ -1,5 +1,5 @@ class Helper { - def helpEval(version){ + def helpEval(version,params){ String c_green = "\033[0;32m"; String c_reset = "\033[0m"; String c_yellow = "\033[0;33m"; @@ -9,9 +9,9 @@ class Helper { log.info """ ____________________________________________________________________________________________ - ${c_blue}Robert Koch Institute, MF1 Bioinformatics${c_reset} + ${c_blue}Robert Koch Institute, Genome Competence Center${c_reset} - Workflow: cievad (${version}) + Workflow: cievad (${version}) - evaluation of callsets ${c_yellow}Minimal Usage Examples:${c_reset} @@ -19,11 +19,15 @@ class Helper { or nextflow run eval.nf -profile local,conda --sample_sheet - ${c_yellow}Data Input, required:${c_reset} + ${c_yellow}Input parameter (required):${c_reset} - ${c_green} --callsets_dir ${c_reset} Directory containing variant callsets for evaluation (naming format: callset_.vcf[.gz]). + ${c_green} --callsets_dir ${c_reset} Directory containing variant callsets for evaluation (files of format: callset_.vcf[.gz]), where is the index of the corresponding truthset. OR - ${c_green} --sample_sheet ${c_reset} Sample sheet (.csv) with the header ("index","truthset","callset"), mapping corresponding truth- and callsets. + ${c_green} --sample_sheet ${c_reset} Sample sheet (.csv) with the header "index,truthset,callset". Every following line contains an index and matching truth- and callset. + + ${c_yellow}Other workflow parameter:${c_reset} + + ${c_green} --outdir ${c_reset} directory to save results in [default: ${params.outdir}] """ } @@ -37,25 +41,25 @@ class Helper { log.info """ ____________________________________________________________________________________________ - ${c_blue}Robert Koch Institute, MF1 Bioinformatics${c_reset} + ${c_blue}Robert Koch Institute, Genome Competence Center${c_reset} - Workflow: cievad (${version}) + Workflow: cievad (${version}) - haplotype generation ${c_yellow}Minimal Usage Example:${c_reset} - nextflow run hap.nf -profile local,conda + nextflow run hap.nf -profile local,conda --reference - ${c_yellow}Individual Parameter, required:${c_reset} + ${c_yellow}Input parameter (required):${c_reset} - ${c_green} --n ${c_reset} number of synthetic samples to be generated - ${c_green} --reference ${c_reset} reference used for the generation of synthetic sequencing data - ${c_green} --read_type ${c_reset} type of resulting WGS synthetic reads (options: ngs, ont) + ${c_green} --reference ${c_reset} reference genome (.fasta) used for the generation of synthetic sequencing data - ${c_yellow}Output Directory, required:${c_reset} + ${c_yellow}Other workflow parameter:${c_reset} - ${c_green} --outdir ${c_reset} directory to save results in + ${c_green} --n ${c_reset} number of synthetic samples to be generated [default: ${params.n}] + ${c_green} --read_type ${c_reset} type of synthetic reads to be generated (options: ngs, ont) [default: ${params.read_type}] + ${c_green} --outdir ${c_reset} directory to save results in [default: ${params.outdir}] - ${c_yellow}Next Generation Sequencing (WGS) Parameter, required if [--read_type ngs] supplied ${c_reset} + ${c_yellow}Next Generation Sequencing parameter, optional if [--read_type ngs] is supplied ${c_reset} ${c_green} --nb_frag ${c_reset} number of fragments per sample [default: ${params.nb_frag}] ${c_green} --fragment_min_size ${c_reset} minimum size of fragments [default: ${params.fragment_min_size}] @@ -64,14 +68,14 @@ class Helper { ${c_green} --fragment_size_std_dev ${c_reset} standard deviation for fragment size [default: ${params.fragment_size_std_dev}] ${c_green} --illumina_read_length ${c_reset} read length of synthetic illumina reads [default: ${params.illumina_read_length}] - ${c_yellow}Nanopore Sequencing (WGS) Parameter, required if [--read_type ont] supplied ${c_reset} + ${c_yellow}Nanopore Sequencing parameter, optional if [--read_type ont] is supplied ${c_reset} - ${c_green} --dna_type ${c_reset} used DNA type [default: ${params.dna_type}] - ${c_green} --model_prefix ${c_reset} path and prefix of the used model (e.g.: ${params.model_prefix}) + ${c_green} --dna_type ${c_reset} used DNA type (options: linear, circular) [default: ${params.dna_type}] + ${c_green} --model_prefix ${c_reset} path and prefix of a NanoSim model [default: ${params.model_prefix}] ${c_green} --model_caller ${c_reset} algorithm to conduct the basecalling [default: ${params.model_caller}] - ${c_green} --median_length ${c_reset} median length of the resulting synthetic reads [default: ${params.median_length}] - ${c_green} --sd_length ${c_reset} standard deviation length of the resulting synthetic reads [default: ${params.sd_length}] - ${c_green} --nb_reads ${c_reset} number of synthetic reads [default: ${params.nb_reads}] + ${c_green} --median_length ${c_reset} median length of the synthetic reads [default: ${params.median_length}] + ${c_green} --sd_length ${c_reset} standard deviation of the synthetic read lengths [default: ${params.sd_length}] + ${c_green} --nb_reads ${c_reset} number of synthetic reads per sample [default: ${params.nb_reads}] """ } -} \ No newline at end of file +} From ea97feec8ed00ccda45f1698502d96a237920c15 Mon Sep 17 00:00:00 2001 From: Thomas Krannich Date: Tue, 14 May 2024 15:28:51 +0200 Subject: [PATCH 17/18] Update VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 12ca3c5..92e23f0 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -'0.2.0' \ No newline at end of file +'0.3.0' From 5e6528585896fece825c181e704815b7d50cb554 Mon Sep 17 00:00:00 2001 From: TernovojD Date: Thu, 16 May 2024 11:01:51 +0200 Subject: [PATCH 18/18] name changes for HelppagesClass import --- eval.nf | 8 ++++---- hap.nf | 8 ++++---- src/{data_class.groovy => Helppages.groovy} | 0 3 files changed, 8 insertions(+), 8 deletions(-) rename src/{data_class.groovy => Helppages.groovy} (100%) diff --git a/eval.nf b/eval.nf index 19a1006..108d7da 100644 --- a/eval.nf +++ b/eval.nf @@ -1,9 +1,9 @@ //load in help function -File data_class_file = new File("./src/data_class.groovy"); -Class groovyClass = new GroovyClassLoader(getClass().getClassLoader()).parseClass(data_class_file); -GroovyObject data_class = (GroovyObject) groovyClass.newInstance(); +File helppages_class_file = new File("./src/Helppages.groovy"); +Class HelppagesClass = new GroovyClassLoader(getClass().getClassLoader()).parseClass(helppages_class_file); +GroovyObject help = (GroovyObject) HelppagesClass.newInstance(); -if (params.help) { exit 0, data_class.helpEval(workflow.manifest.version, params) } +if (params.help) { exit 0, help.helpEval(workflow.manifest.version, params) } // include modules - here, modules are single processes include { SAMTOOLS_FAIDX } from './modules/samtools/faidx/main.nf' diff --git a/hap.nf b/hap.nf index 6a7547b..fe630be 100644 --- a/hap.nf +++ b/hap.nf @@ -1,9 +1,9 @@ //load in help function -File data_class_file = new File("./src/data_class.groovy"); -Class groovyClass = new GroovyClassLoader(getClass().getClassLoader()).parseClass(data_class_file); -GroovyObject data_class = (GroovyObject) groovyClass.newInstance(); +File helppages_class_file = new File("./src/Helppages.groovy"); +Class HelppagesClass = new GroovyClassLoader(getClass().getClassLoader()).parseClass(helppages_class_file); +GroovyObject help = (GroovyObject) HelppagesClass.newInstance(); -if (params.help) { exit 0, data_class.helpHap(workflow.manifest.version, params) } +if (params.help) { exit 0, help.helpHap(workflow.manifest.version, params) } // include modules - here, modules are single processes //include { AMPLISIM } from './modules/amplisim/main.nf' diff --git a/src/data_class.groovy b/src/Helppages.groovy similarity index 100% rename from src/data_class.groovy rename to src/Helppages.groovy