Skip to content

Commit

Permalink
Merge pull request #50 from rki-mf1/dev
Browse files Browse the repository at this point in the history
Merge dev and push to version 0.3.0
  • Loading branch information
Krannich479 authored May 16, 2024
2 parents 811b395 + 79bc341 commit 0d6b015
Show file tree
Hide file tree
Showing 7 changed files with 172 additions and 21 deletions.
37 changes: 33 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: CI
name: tests

on:
push:
Expand All @@ -8,8 +8,8 @@ on:

# designed as in: https://github.com/marketplace/actions/setup-miniconda
jobs:
CI:
name: CI (Linux)
build:
name: build nf env
runs-on: "ubuntu-latest"
defaults:
run:
Expand All @@ -36,6 +36,25 @@ jobs:
run: |
nextflow -version
run-cievad:
name: Run cievad
needs: build
runs-on: "ubuntu-latest"
defaults:
run:
shell: bash -el {0}
steps:
- uses: actions/checkout@v4
- uses: conda-incubator/setup-miniconda@v3
with:
miniconda-version: "latest"
activate-environment: nextflow
environment-file: env/conda_nxf.yml
channels: conda-forge,bioconda,defaults
channel-priority: true
auto-activate-base: false

- name : Download reference
run: |
wget https://www.ebi.ac.uk/ena/browser/api/fasta/MN908947.3
Expand All @@ -47,7 +66,17 @@ jobs:
run: |
nextflow run hap.nf -profile local,conda
- name: Test callset evaluation
- name: Test callset evaluation with callset_dir
run: |
nextflow run eval.nf -profile local,conda --callsets_dir aux/ci_data/
- name: Test callset evaluation with sample_sheet
run: |
cwd=$(pwd)
echo "index,truthset,callset" > my_samples.csv
echo "1,${cwd}/results/simulated_hap1.vcf,${cwd}/aux/ci_data/callset_1.vcf.gz" >> my_samples.csv
echo "2,${cwd}/results/simulated_hap2.vcf,${cwd}/aux/ci_data/callset_2.vcf.gz" >> my_samples.csv
echo "3,${cwd}/results/simulated_hap3.vcf,${cwd}/aux/ci_data/callset_3.vcf.gz" >> my_samples.csv
nextflow run eval.nf -profile local,conda --sample_sheet my_samples.csv
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,12 @@ nextflow run eval.nf -profile local,conda --callsets_dir <path/to/callsets>
```
where `--callsets_dir` is the parameter to specify a folder containing the callset VCF files.
Currently, a callset within this folder has to follow the naming convention `callset_<X>.vcf[.gz]` where _\<X\>_ is the integer of the corresponding truthset.
Alternatively, one can provide a sample sheet of comma separated values (CSV file) with the columns "index", "truthset" and callset", where "index" is an integer from 1 to n (number of samples) and "callset"/"truthset" are paths to the pairwise matching VCF files.
Callsets can optionally be _gzip_ compressed.

🚧 For convenience, the `eval.nf` will get an option to provide a sample sheet as an alternative input format in the future.
The command for the sample sheet input is
```
nextflow run eval.nf -profile local,conda --sample_sheet <path/to/sample_sheet>
```

<details><summary>⚠️ Run commands from the root directory </summary>
Without further ado, please run the commands from a terminal at the top folder (root directory) of this repository.
Expand Down
1 change: 1 addition & 0 deletions VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
'0.3.0'
49 changes: 36 additions & 13 deletions eval.nf
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
//load in help function
File helppages_class_file = new File("./src/Helppages.groovy");
Class HelppagesClass = new GroovyClassLoader(getClass().getClassLoader()).parseClass(helppages_class_file);
GroovyObject help = (GroovyObject) HelppagesClass.newInstance();

if (params.help) { exit 0, help.helpEval(workflow.manifest.version, params) }

// include modules - here, modules are single processes
include { SAMTOOLS_FAIDX } from './modules/samtools/faidx/main.nf'
include { HAPPY } from './modules/happy/main.nf'
Expand All @@ -11,22 +18,37 @@ workflow{
ch_ref = Channel.value("$baseDir/" + params.reference)
ch_ref_idx = SAMTOOLS_FAIDX(ch_ref)

ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}")
ch_callsets
.map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('.vcf', '').replaceFirst('.gz', '').toInteger(), file(it)) }
.set {ch_callsets}
//ch_callsets.view()
if (params.callsets_dir != "" && params.sample_sheet == "") {

ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}", checkIfExists: true)
ch_callsets
.map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('.vcf', '').replaceFirst('.gz', '').toInteger(), file(it)) }
.set {ch_callsets}
// ch_callsets.view()

ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf", checkIfExists: true)
ch_truthsets
.map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('hap', '').replaceFirst('.vcf', '').toInteger(), file(it)) }
.set {ch_truthsets}
// ch_truthsets.view()

ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf")
ch_truthsets
.map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('hap', '').replaceFirst('.vcf', '').toInteger(), file(it)) }
.set {ch_truthsets}
//ch_truthsets.view()
ch_truthsets.join(ch_callsets, by: 0)
.set {ch_variantsets_map}
// ch_variantsets_map.view()

ch_truthsets.join(ch_callsets, by: 0)
.set {ch_variantsets_map}
//ch_variantsets_map.view()
} else if (params.sample_sheet != "" && params.callsets_dir == "") {

ch_variantsets_map = Channel
.fromPath(params.sample_sheet, checkIfExists: true)
.splitCsv(header: true, sep: ",")
.map {row -> [row["index"] as Integer, row["truthset"], row["callset"]]}
// .view()

} else {

exit 1, "ERROR: Data input incorrect - please supply only one of the following parameters: sample_sheet, callsets_dir\n"

}

// ------------------
// | Main processes |
Expand All @@ -35,3 +57,4 @@ workflow{

SOMPY_SUMMARY(ch_csv.collect())
}

7 changes: 7 additions & 0 deletions hap.nf
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
//load in help function
File helppages_class_file = new File("./src/Helppages.groovy");
Class HelppagesClass = new GroovyClassLoader(getClass().getClassLoader()).parseClass(helppages_class_file);
GroovyObject help = (GroovyObject) HelppagesClass.newInstance();

if (params.help) { exit 0, help.helpHap(workflow.manifest.version, params) }

// include modules - here, modules are single processes
//include { AMPLISIM } from './modules/amplisim/main.nf'
include { MASON_SIMULATOR } from './modules/mason/simulator/main.nf'
Expand Down
11 changes: 9 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ manifest {
description = 'A workflow for a simple, streamlined and rapid evaluation of variant callsets '
author = 'Thomas Krannich'
nextflowVersion = '>=20.04.0'
version = '0.2.0'
version = new File('./VERSION').text.trim()
}

// Parameters that are accessible in the pipeline script
Expand All @@ -15,6 +15,7 @@ params {
read_type = 'ngs'

// General parameters
help = false
seed = 479
outdir = 'results'

Expand All @@ -35,7 +36,8 @@ params {
nb_reads = 180

// Evaluation parameters
callsets_dir = 'data'
callsets_dir = ''
sample_sheet = ''
}

// Enable execution report
Expand Down Expand Up @@ -63,4 +65,9 @@ profiles {
executor.name = "local"
executor.cpus = 4
}

slurm {
executor.name = "slurm"
executor.cpus = 4
}
}
81 changes: 81 additions & 0 deletions src/Helppages.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
class Helper {
def helpEval(version,params){
String c_green = "\033[0;32m";
String c_reset = "\033[0m";
String c_yellow = "\033[0;33m";
String c_blue = "\033[0;34m";
String c_red = "\u001B[31m";
String c_dim = "\033[2m";
log.info """
____________________________________________________________________________________________
${c_blue}Robert Koch Institute, Genome Competence Center${c_reset}
Workflow: cievad (${version}) - evaluation of callsets
${c_yellow}Minimal Usage Examples:${c_reset}
nextflow run eval.nf -profile local,conda --callsets_dir <path/to/callsets>
or
nextflow run eval.nf -profile local,conda --sample_sheet <path/to/sample_sheet>
${c_yellow}Input parameter (required):${c_reset}
${c_green} --callsets_dir ${c_reset} Directory containing variant callsets for evaluation (files of format: callset_<X>.vcf[.gz]), where <X> is the index of the corresponding truthset.
OR
${c_green} --sample_sheet ${c_reset} Sample sheet (.csv) with the header "index,truthset,callset". Every following line contains an index and matching truth- and callset.
${c_yellow}Other workflow parameter:${c_reset}
${c_green} --outdir ${c_reset} directory to save results in [default: ${params.outdir}]
"""
}

def helpHap(version,params){
String c_green = "\033[0;32m";
String c_reset = "\033[0m";
String c_yellow = "\033[0;33m";
String c_blue = "\033[0;34m";
String c_red = "\u001B[31m";
String c_dim = "\033[2m";
log.info """
____________________________________________________________________________________________
${c_blue}Robert Koch Institute, Genome Competence Center${c_reset}
Workflow: cievad (${version}) - haplotype generation
${c_yellow}Minimal Usage Example:${c_reset}
nextflow run hap.nf -profile local,conda --reference <cievad/path/to/ref>
${c_yellow}Input parameter (required):${c_reset}
${c_green} --reference ${c_reset} reference genome (.fasta) used for the generation of synthetic sequencing data
${c_yellow}Other workflow parameter:${c_reset}
${c_green} --n ${c_reset} number of synthetic samples to be generated [default: ${params.n}]
${c_green} --read_type ${c_reset} type of synthetic reads to be generated (options: ngs, ont) [default: ${params.read_type}]
${c_green} --outdir ${c_reset} directory to save results in [default: ${params.outdir}]
${c_yellow}Next Generation Sequencing parameter, optional if [--read_type ngs] is supplied ${c_reset}
${c_green} --nb_frag ${c_reset} number of fragments per sample [default: ${params.nb_frag}]
${c_green} --fragment_min_size ${c_reset} minimum size of fragments [default: ${params.fragment_min_size}]
${c_green} --fragment_max_size ${c_reset} maximum size of fragments [default: ${params.fragment_max_size}]
${c_green} --fragment_mean_size ${c_reset} mean size of fragments [default: ${params.fragment_mean_size}]
${c_green} --fragment_size_std_dev ${c_reset} standard deviation for fragment size [default: ${params.fragment_size_std_dev}]
${c_green} --illumina_read_length ${c_reset} read length of synthetic illumina reads [default: ${params.illumina_read_length}]
${c_yellow}Nanopore Sequencing parameter, optional if [--read_type ont] is supplied ${c_reset}
${c_green} --dna_type ${c_reset} used DNA type (options: linear, circular) [default: ${params.dna_type}]
${c_green} --model_prefix ${c_reset} path and prefix of a NanoSim model [default: ${params.model_prefix}]
${c_green} --model_caller ${c_reset} algorithm to conduct the basecalling [default: ${params.model_caller}]
${c_green} --median_length ${c_reset} median length of the synthetic reads [default: ${params.median_length}]
${c_green} --sd_length ${c_reset} standard deviation of the synthetic read lengths [default: ${params.sd_length}]
${c_green} --nb_reads ${c_reset} number of synthetic reads per sample [default: ${params.nb_reads}]
"""
}
}

0 comments on commit 0d6b015

Please sign in to comment.