diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 0d3be98..ff1b28b 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -9,21 +9,20 @@ on:
# designed as in: https://github.com/marketplace/actions/setup-miniconda
jobs:
CI:
- name: CI tests using linux
+ name: CI (Linux)
runs-on: "ubuntu-latest"
defaults:
run:
shell: bash -el {0}
steps:
- - uses: actions/checkout@v2
- - uses: conda-incubator/setup-miniconda@v2
+ - uses: actions/checkout@v4
+ - uses: conda-incubator/setup-miniconda@v3
with:
miniconda-version: "latest"
- python-version: "3.11.3"
- activate-environment: snakemake7
- environment-file: env/conda_snakemake7.yaml
+ activate-environment: nextflow
+ environment-file: env/conda_nxf.yml
channels: conda-forge,bioconda,defaults
- channel-priority: strict
+ channel-priority: true
auto-activate-base: false
- name: Test conda installation
@@ -33,29 +32,22 @@ jobs:
conda config --show-sources
conda config --show
- - name: Test snakemake installation
+ - name: Test nextflow installation
run: |
- snakemake --version
+ nextflow -version
- name : Download reference
run: |
wget https://www.ebi.ac.uk/ena/browser/api/fasta/MN908947.3
sed 's/>ENA|MN908947|MN908947.3 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome./>MN908947.3/g' MN908947.3 > MN908947.3.fasta
-
- - name: Test CIEVaD principal functionality
- run: |
- python cievad.py --help
+ mkdir -p reference/Sars-Cov-2/Wuhan-Hu-1/
+ mv MN908947.3.fasta reference/Sars-Cov-2/Wuhan-Hu-1/
- name: Test haplotype simulation
run: |
- python cievad.py hap -n 3 -r MN908947.3.fasta
+ nextflow run hap.nf -profile local,conda
- - name: Test NGS simulation
+ - name: Test callset evaluation
run: |
- python cievad.py ngs -n 3 -f 1000
+ nextflow run eval.nf -profile local,conda --callsets_dir aux/ci_data/
- - name: Test Nanopore simulation
- run: |
- python cievad.py nano -n 3 -r 100
-
-
diff --git a/.gitignore b/.gitignore
index 157fdeb..823ec9f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,4 +18,8 @@ results/
aux/nanosim_model/human_NA12878_DNA_FAB49712_guppy.tar.gz
-*.pyc
\ No newline at end of file
+*.pyc
+
+.nextflow.log*
+.nextflow/
+work/
\ No newline at end of file
diff --git a/README.md b/README.md
index 704b2f0..219a94e 100644
--- a/README.md
+++ b/README.md
@@ -1,29 +1,31 @@
![Static Badge](https://img.shields.io/badge/requires-conda-blue)
-![Static Badge](https://img.shields.io/badge/requires-snakemake-blue)
+![Static Badge](https://img.shields.io/badge/requires-nextflow-blue)
# CIEVaD
-Continuous Integration and Evaluation for Variant Detection. This repository provides a tool suite for simple, streamlined and rapid creation and evaluation of genomic variant callsets. It is primarily designed for continuous integration of variant detection software and a plain containment check between sets of variants. The tools suite utilizes the _conda_ package management system and _Snakemake_ workflow language.
+Continuous Integration and Evaluation for Variant Detection. This repository provides a tool suite for simple, streamlined and rapid creation and evaluation of genomic variant callsets. It is primarily designed for continuous integration of variant detection software and a plain containment check between sets of variants. The tools suite utilizes the _conda_ package management system and _nextflow_ workflow language.
## Contents:
1. [System requirements](#system-requirements)
2. [Installation](#installation)
3. [Usage](#usage)
-4. [Help](#help)
+4. [Output](#output)
+5. [Help](#help)
## System requirements:
-This tool suite was developed under Linux/UNIX and is the only officially supported operating system here.
-Having any derivative of the `conda` package management system installed is the only strict system requirement.
-Having a recent `snakemake` (≥6.0.0) and `python` (≥3.2) version installed is required too but both can be installed via conda (see [Installation](#installation)).
+This tool suite was developed for Linux and is the only officially supported operating system here.
+Having any derivative of the conda package management system installed is the only strict system requirement.
+A recent version (≥20.04.0) of nextflow is required to execute the workflows, but can easily be installed via conda.
+For an installation instruction of nextflow via conda see [Installation](#installation).
-🛠️ See tested setups:
+🛠️ See list of tested setups:
| Requirement | Tested with |
| --- | --- |
-| 64 bits operating system | Ubuntu 20.04.5 LTS |
-| [Conda](https://docs.conda.io/en/latest/) | vers. 23.5.0 |
-| [Snakemake](https://snakemake.readthedocs.io/en/stable/) | vers. 7.25.3 |
+| 64 bits Linux operating system | Ubuntu 20.04.5 LTS |
+| [Conda](https://docs.conda.io/en/latest/) | vers. 23.5.0, 24.1.2|
+| [Nextflow](https://nextflow.io/) | vers. 20.04.0, 23.10.1 |
@@ -32,43 +34,59 @@ Having a recent `snakemake` (≥6.0.0) and `python` (≥3.2) version installed i
1. Download the repository:
```
-git clone https://github.com/rki-mf1/imsmp-variant-calling-benchmark.git
+git clone https://github.com/rki-mf1/cievad.git
```
-2. [Optional] Install Snakemake if not yet on your system. You can use the conda environment description file provided in this repository:
+2. [Optional] Install nextflow if not yet on your system. For good practise you should use a new conda environment:
```
conda deactivate
-conda env create -f env/conda_snakemake7.yaml
-conda activate snakemake7
+conda create -n cievad -c bioconda nextflow
+conda activate cievad
```
## Usage:
-This tool suite provides multiple workflows to generate synthetic sequencing data and evaluate sets of predicted variants (callsets).
-A full list of workflows, their respective modules in the python command line interface (CLI) and a detailed description of input and output files can be found in this [wiki](https://github.com/rki-mf1/imsmp-variant-calling-benchmark/wiki) page of the repository.
-The current list of principal functionality is:
-* Generating synthetic haplotypes from a given reference genome
-* Generating synthetic NGS reads from a given haplotype
-* Generating synthetic amplicon sequences from a given reference genome and generating synthetic NGS reads from the amplicons
-* Generating synthetic long-reads from a given haplotype
-* Evaluate compliance between sets of variants
-
-The repository provides a simple CLI for a convenient application-like user experience with the underlying Snakemake workflows.
-The CLI is started from the root directory via
+This tool suite provides multiple functional features to generate synthetic sequencing data, generate sets of ground truth variants (truthsets) and evaluate sets of predicted variants (callsets).
+There are two main workflows, `hap.nf` and `eval.nf`.
+Both workflows are executed via the nextflow command line interface (CLI).
+The current list and roadmap of principal functionality is:
+* [x] Generating synthetic haplotypes from a given reference genome. This returns a haplotype sequence (FASTA) and its set of variants (VCF) with respect to the reference.
+* [x] Generating synthetic NGS reads from a given haplotype
+* [ ] Generating synthetic amplicon sequences from a given reference genome and generating synthetic reads from those amplicons
+* [ ] Generating synthetic long-reads from a given haplotype
+* [x] Evaluate compliance between sets of variants
+
+### Generating haplotype data
+The minimal command to generate haplotype data is
```
-python cievad.py --help
+nextflow run hap.py -profile local,conda
```
-and each individual module provides another help page via its sub-command
+
+### Evaluating variant calls
+The minimal command to evaluate the accordance between a truthset (generated data) and a callset is
```
-python cievad.py --help
+nextflow run eval.nf -profile local,conda --callsets_dir
```
+where `--callsets_dir` is the parameter to specify a folder containing the callset VCF files.
+Currently, a callset within this folder has to follow the naming convention `callset_.vcf[.gz]` where _\_ is the integer of the corresponding truthset.
+Callsets can optionally be _gzip_ compressed.
+
+🚧 For convenience, the `eval.nf` will get an option to provide a sample sheet as an alternative input format in the future.
⚠️ Run commands from the root directory
Without further ado, please run the commands from a terminal at the top folder (root directory) of this repository.
Otherwise relative paths within the workflows might be invalid.
+### Tuning the workflows via CLI parameters
+\
+
+### Tuning the workflows via the config file
+\
+
+## Output
+\
## Help:
diff --git a/aux/ci_data/README.md b/aux/ci_data/README.md
new file mode 100644
index 0000000..fe59605
--- /dev/null
+++ b/aux/ci_data/README.md
@@ -0,0 +1,3 @@
+# CI Data
+
+(10.04.2024) The `callset_{1,2,3}.vcf.gz` are renamed but original `hap{1,2,3}.filtered.gt_adjust.filtered_indels.vcf.gz` VCF files containing variants from the CovPipe2 workflow using default parameters.
diff --git a/aux/ci_data/callset_1.vcf.gz b/aux/ci_data/callset_1.vcf.gz
new file mode 100644
index 0000000..ed01b66
Binary files /dev/null and b/aux/ci_data/callset_1.vcf.gz differ
diff --git a/aux/ci_data/callset_2.vcf.gz b/aux/ci_data/callset_2.vcf.gz
new file mode 100644
index 0000000..cb77477
Binary files /dev/null and b/aux/ci_data/callset_2.vcf.gz differ
diff --git a/aux/ci_data/callset_3.vcf.gz b/aux/ci_data/callset_3.vcf.gz
new file mode 100644
index 0000000..f99a80d
Binary files /dev/null and b/aux/ci_data/callset_3.vcf.gz differ
diff --git a/bin/SURVIVOR b/bin/SURVIVOR
deleted file mode 100755
index 00725eb..0000000
Binary files a/bin/SURVIVOR and /dev/null differ
diff --git a/bin/amplisim-v0_1_0-ubuntu_20_04 b/bin/amplisim-v0_1_0-ubuntu_20_04
deleted file mode 100755
index 4e35900..0000000
Binary files a/bin/amplisim-v0_1_0-ubuntu_20_04 and /dev/null differ
diff --git a/bin/mason_simulator b/bin/mason_simulator
deleted file mode 100755
index 4de06ea..0000000
Binary files a/bin/mason_simulator and /dev/null differ
diff --git a/cievad.py b/cievad.py
deleted file mode 100644
index 859e287..0000000
--- a/cievad.py
+++ /dev/null
@@ -1,264 +0,0 @@
-# ----------------------------------------------------------------------------------------
-# SETUP
-# ----------------------------------------------------------------------------------------
-import os
-import sys
-import argparse
-import time
-from python.runWorkflows import run_hap, run_ngs, run_ampli, run_nanopore, run_eval
-
-if sys.version_info.major != 3:
- print("Error: Abort: This UI requires python3.")
- exit(1)
-
-
-# ----------------------------------------------------------------------------------------
-# PARSER
-# ----------------------------------------------------------------------------------------
-if __name__ == "__main__":
- __version_info__ = ('0','1','0')
- __version__ = '.'.join(__version_info__)
-
- parser = argparse.ArgumentParser(
- prog='cievad',
- description='CIEVaD - A tool suite to facilitate continuous integration and evaluation of variant detection.',
- epilog='For more help and bug reports please refer to the GitHub repository.')
- parser.add_argument('--version', action='version', version="%(prog)s ("+__version__+")")
-
- subparsers = parser.add_subparsers(help='sub-command help', dest='command')
-
- # ----------------
- # SUB PARSERS |
- # ----------------
-
- # parser for haplotype simulation
- parser_hap = subparsers.add_parser('hap',
- help='Module to generate haplotypes from a given reference.',
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
- parser_hap.add_argument(
- '-s', '--snakefile',
- help='Path to the Snakefile.',
- default='snakemake/hap/Snakefile')
- parser_hap_group1 = parser_hap.add_argument_group('Run with config', 'Use a config file (yaml) to generate haplotypes.')
- parser_hap_group1.add_argument(
- '-c', '--config',
- metavar='FILE',
- default = None,
- help='Path to a config file for the snakemake pipeline.')
- parser_hap_group2 = parser_hap.add_argument_group('Run with parameter', 'Specify parameters to generate haplotypes.')
- parser_hap_group2.add_argument(
- '-d', '--head-dir',
- metavar='DIR',
- default= os.path.realpath(os.path.dirname(__file__)),
- help='Root directory path.')
- parser_hap_group2.add_argument(
- '-n', '--nb-samples',
- type = int,
- metavar='INT',
- default = 10,
- help='Specify the number of samples to be simulated.')
- parser_hap_group2.add_argument(
- '--seed',
- type = int,
- metavar='INT',
- default = int(round(time.time())),
- help='Specify a random seed. Default is current system time in seconds.')
- parser_hap_group2.add_argument(
- '-r', '--reference',
- metavar='FASTA',
- help='Path to reference genome.')
- parser_hap.set_defaults(func=run_hap)
-
- # parser for NGS read simulation
- parser_ngs = subparsers.add_parser('ngs',
- help='Module to generate NGS reads from a given reference.',
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
- parser_ngs.add_argument(
- '-s', '--snakefile',
- help='Path to the Snakefile.',
- default='snakemake/ngs/Snakefile')
- parser_ngs.add_argument(
- '-t', '--threads',
- help='Number of CPU threads for the task.',
- metavar='INT',
- default = 1)
- parser_ngs_group1 = parser_ngs.add_argument_group('Run with config', 'Use a config file (yaml) to generate NGS reads.')
- parser_ngs_group1.add_argument(
- '-c', '--config',
- metavar='FILE',
- default = None,
- help='Path to a config file for the snakemake pipeline.')
- parser_ngs_group2 = parser_ngs.add_argument_group('Run with parameter', 'Specify parameters to generate NGS reads.')
- parser_ngs_group2.add_argument(
- '-d', '--head-dir',
- metavar='DIR',
- default= os.path.realpath(os.path.dirname(__file__)),
- help='Root directory path.')
- parser_ngs_group2.add_argument(
- '-n', '--nb-samples',
- type = int,
- metavar='INT',
- default = 10,
- help='Specify the number of samples to be simulated.')
- parser_ngs_group2.add_argument(
- '--seed',
- type = int,
- metavar='INT',
- default = int(round(time.time())),
- help='Specify a random seed. Default is current system time in seconds.')
- parser_ngs_group2.add_argument(
- '-f', '--nb-frags',
- type = int,
- metavar='INT',
- default = 3000,
- help='Specify the number of genomic fragments used for the reads simulation. This INT*2 will result in the total number of NGS reads.')
- parser_ngs.set_defaults(func=run_ngs)
-
- # parser for generating amplicons and NGS reads
- parser_ampli = subparsers.add_parser('ampli',
- help='Module to generate amplicons and NGS reads from a given reference.',
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
- parser_ampli.add_argument(
- '-s', '--snakefile',
- help='Path to the Snakefile.',
- default='snakemake/amplicon/Snakefile')
- parser_ampli_group1 = parser_ampli.add_argument_group('Run with config', 'Use a config file (yaml) to generate amplicons and NGS reads.')
- parser_ampli_group1.add_argument(
- '-c', '--config',
- metavar='FILE',
- default = None,
- help='Path to a config file for the snakemake pipeline.')
- parser_ampli_group2 = parser_ampli.add_argument_group('Run with parameter', 'Specify parameters to generate amplicons and NGS reads.')
- parser_ampli_group2.add_argument(
- '-d', '--head-dir',
- metavar='DIR',
- default= os.path.realpath(os.path.dirname(__file__)),
- help='Root directory path.')
- parser_ampli_group2.add_argument(
- '-n', '--nb-samples',
- type = int,
- metavar='INT',
- default = 10,
- help='Specify the number of samples to be simulated.')
- parser_ampli_group2.add_argument(
- '--seed',
- type = int,
- metavar='INT',
- default = int(round(time.time())),
- help='Specify a random seed. Default is current system time in seconds.')
- parser_ampli_group2.add_argument(
- '-r', '--reference',
- metavar='FASTA',
- help='Path to reference genome.')
- parser_ampli_group2.add_argument(
- '-p', '--primers',
- metavar = 'BED',
- help='Path to primer file.')
- parser_ampli.set_defaults(func=run_ampli)
-
- # parser for generating nanopore reads
- parser_nanopore = subparsers.add_parser('nano',
- help='Module to generate Oxford-Nanopore-style long reads from a given reference.',
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
- parser_nanopore.add_argument(
- '-s', '--snakefile',
- help='Path to the Snakefile.',
- default='snakemake/nanopore/Snakefile')
- parser_nanopore.add_argument(
- '-t', '--threads',
- help='Number of CPU threads for the task.',
- metavar='INT',
- default = 1)
- parser_nanopore_group1 = parser_nanopore.add_argument_group('Run with config', 'Use a config file (yaml) to generate ONT-style long reads.')
- parser_nanopore_group1.add_argument(
- '-c', '--config',
- metavar='FILE',
- default = None,
- help='Path to a config file for the snakemake pipeline.')
- parser_nanopore_group2 = parser_nanopore.add_argument_group('Run with parameter', 'Specify parameters to generate ONT-style long reads.')
- parser_nanopore_group2.add_argument(
- '-d', '--head-dir',
- metavar='DIR',
- default= os.path.realpath(os.path.dirname(__file__)),
- help='Root directory path.')
- parser_nanopore_group2.add_argument(
- '-n', '--nb-samples',
- type = int,
- metavar='INT',
- default = 10,
- help='Specify the number of samples to be simulated.')
- parser_nanopore_group2.add_argument(
- '--seed',
- type = int,
- metavar='INT',
- default = int(round(time.time())),
- help='Specify a random seed. Default is current system time in seconds.')
- parser_nanopore_group2.add_argument(
- '-m', '--model-prefix',
- metavar='STR',
- default = 'aux/nanosim_model/human_NA12878_DNA_FAB49712_guppy/training',
- help='Specify a path (relative to the HEAD_DIR) to the prefix of a nanosim model.')
- parser_nanopore_group2.add_argument(
- '-g', '--model-caller',
- metavar='STR',
- default = 'guppy',
- help='Specify a caller of the nanosim model specified with -m.')
- parser_nanopore_group2.add_argument(
- '-y', '--dna-type',
- metavar='STR',
- default = 'linear',
- help='Specify a dna type for the nanosim simulator.')
- parser_nanopore_group2.add_argument(
- '-l', '--median-length',
- type = int,
- metavar='INT',
- default = 5000,
- help='Specify a median read length for the nanosim simulator.')
- parser_nanopore_group2.add_argument(
- '-a', '--sd-length',
- type = float,
- metavar='FLOAT',
- default = 1.05,
- help='Specify a standard deviation of the read length for the nanosim simulator.')
- parser_nanopore_group2.add_argument(
- '-r', '--nb-reads',
- type = int,
- metavar='INT',
- default = 180,
- help='Specify the number of long reads to be simulated per sample.')
- parser_nanopore.set_defaults(func=run_nanopore)
-
- # parser for variant evaluation
- parser_eval = subparsers.add_parser('eval',
- help='Module for variant set evaluation.',
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
- parser_eval.add_argument(
- '-s', '--snakefile',
- help='Path to the Snakefile.',
- default='snakemake/eval/Snakefile')
- parser_eval_group1 = parser_eval.add_argument_group('Run with config', 'Use a config file (yaml) to evaluate a variant callset.')
- parser_eval_group1.add_argument(
- '-c', '--config',
- metavar='FILE',
- default = None,
- help='Path to a config file for the snakemake pipeline.')
- parser_eval_group2 = parser_eval.add_argument_group('Run with parameter', 'Specify parameters to evaluate a variant callset.')
- parser_eval_group2.add_argument(
- '-d', '--head-dir',
- metavar='DIR',
- default= os.path.realpath(os.path.dirname(__file__)),
- help='Root directory path.')
- parser_eval_group2.add_argument(
- '-n', '--nb-samples',
- type = int,
- metavar='INT',
- default = 10,
- help='Specify the number of samples to be simulated.')
- parser_eval.set_defaults(func=run_eval)
-
- # ---------------
- # PARSE ARGS |
- # ---------------
-
- args = parser.parse_args()
- args.func(args) if len(sys.argv)>1 else print("Error: Abort: Too few arguments. See help page: python vc_benchmark.py --help")
\ No newline at end of file
diff --git a/env/conda_bcftools.yaml b/env/conda_bcftools.yaml
deleted file mode 100644
index a27495b..0000000
--- a/env/conda_bcftools.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-name: bcftools
-channels:
- - conda-forge
- - bioconda
- - defaults
-dependencies:
- - _libgcc_mutex=0.1=conda_forge
- - _openmp_mutex=4.5=2_gnu
- - bcftools=1.17=h3cc50cf_1
- - bzip2=1.0.8=h7f98852_4
- - c-ares=1.19.1=hd590300_0
- - ca-certificates=2023.5.7=hbcca054_0
- - gsl=2.7=he838d99_0
- - htslib=1.17=h81da01d_2
- - keyutils=1.6.1=h166bdaf_0
- - krb5=1.20.1=h81ceb04_0
- - libblas=3.9.0=17_linux64_openblas
- - libcblas=3.9.0=17_linux64_openblas
- - libcurl=8.1.2=h409715c_0
- - libdeflate=1.18=h0b41bf4_0
- - libedit=3.1.20191231=he28a2e2_2
- - libev=4.33=h516909a_1
- - libgcc-ng=13.1.0=he5830b7_0
- - libgfortran-ng=13.1.0=h69a702a_0
- - libgfortran5=13.1.0=h15d22d2_0
- - libgomp=13.1.0=he5830b7_0
- - libnghttp2=1.52.0=h61bc06f_0
- - libnsl=2.0.0=h7f98852_0
- - libopenblas=0.3.23=pthreads_h80387f5_0
- - libssh2=1.11.0=h0841786_0
- - libstdcxx-ng=13.1.0=hfd8a6a1_0
- - libzlib=1.2.13=hd590300_5
- - ncurses=6.4=hcb278e6_0
- - openssl=3.1.1=hd590300_1
- - perl=5.32.1=2_h7f98852_perl5
- - xz=5.2.6=h166bdaf_0
- - zlib=1.2.13=hd590300_5
- - zstd=1.5.2=h3eb15da_6
diff --git a/env/conda_bwa_and_samtools.yaml b/env/conda_bwa_and_samtools.yaml
deleted file mode 100644
index 2b9f6ad..0000000
--- a/env/conda_bwa_and_samtools.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-name: samtools
-channels:
- - bioconda
- - defaults
-dependencies:
- - _libgcc_mutex=0.1=main
- - _openmp_mutex=5.1=1_gnu
- - bwa=0.7.17=h5bf99c6_8
- - bzip2=1.0.8=h7b6447c_0
- - c-ares=1.19.0=h5eee18b_0
- - ca-certificates=2023.01.10=h06a4308_0
- - curl=7.88.1=h5eee18b_0
- - gdbm=1.18=hd4cb3f1_4
- - krb5=1.19.4=h568e23c_0
- - libcurl=7.88.1=h91b91d3_0
- - libedit=3.1.20221030=h5eee18b_0
- - libev=4.33=h7f8727e_1
- - libgcc-ng=11.2.0=h1234567_1
- - libgomp=11.2.0=h1234567_1
- - libnghttp2=1.46.0=hce63b2e_0
- - libssh2=1.10.0=h8f2d780_0
- - libstdcxx-ng=11.2.0=h1234567_1
- - ncurses=6.4=h6a678d5_0
- - openssl=1.1.1t=h7f8727e_0
- - perl=5.34.0=h5eee18b_2
- - readline=8.2=h5eee18b_0
- - samtools=1.6=hb116620_7
- - xz=5.2.10=h5eee18b_1
- - zlib=1.2.13=h5eee18b_0
diff --git a/env/conda_ci.yaml b/env/conda_ci.yaml
deleted file mode 100644
index 3bde0a5..0000000
--- a/env/conda_ci.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: sc2-gr-ci
-channels:
- - bioconda
- - conda-forge
- - defaults
-dependencies:
- - _libgcc_mutex=0.1=conda_forge
- - _openmp_mutex=4.5=2_gnu
- - bcftools=1.17=h3cc50cf_1
- - bzip2=1.0.8=h7f98852_4
- - c-ares=1.19.1=hd590300_0
- - ca-certificates=2023.5.7=hbcca054_0
- - certifi=2016.9.26=py36_0
- - docutils=0.16=py36h5fab9bb_3
- - dropbox=5.2.1=py36_0
- - ecdsa=0.18.0=pyhd8ed1ab_0
- - filechunkio=1.6=py36_0
- - ftputil=3.2=py36_0
- - gsl=2.7=he838d99_0
- - htslib=1.17=h6bc39ce_1
- - keyutils=1.6.1=h166bdaf_0
- - krb5=1.19.3=h3790be6_0
- - ld_impl_linux-64=2.40=h41732ed_0
- - libblas=3.9.0=16_linux64_openblas
- - libcblas=3.9.0=16_linux64_openblas
- - libcurl=7.87.0=h91b91d3_0
- - libdeflate=1.18=h0b41bf4_0
- - libedit=3.1.20191231=he28a2e2_2
- - libev=4.33=h516909a_1
- - libffi=3.4.2=h7f98852_5
- - libgcc-ng=12.2.0=h65d4601_19
- - libgfortran-ng=12.2.0=h69a702a_19
- - libgfortran5=12.2.0=h337968e_19
- - libgomp=12.2.0=h65d4601_19
- - liblapack=3.9.0=16_linux64_openblas
- - libnghttp2=1.46.0=hce63b2e_0
- - libnsl=2.0.0=h7f98852_0
- - libopenblas=0.3.21=pthreads_h78a6416_3
- - libsqlite=3.42.0=h2797004_0
- - libssh2=1.10.0=haa6b8db_3
- - libstdcxx-ng=12.2.0=h46fd767_19
- - libzlib=1.2.13=h166bdaf_4
- - mason=2.0.9=h9ee0642_1
- - ncurses=6.3=h27087fc_1
- - numpy=1.19.5=py36hfc0c790_2
- - openssl=1.1.1u=hd590300_0
- - pandas=1.1.5=py36h284efc9_0
- - paramiko=1.18.2=py36_0
- - perl=5.32.1=2_h7f98852_perl5
- - pip=20.0.2=py36_1
- - psutil=4.4.2=py36_0
- - pycrypto=2.6.1=py36he6145b8_1005
- - pysftp=0.2.9=py36_0
- - python=3.6.15=hb7a2778_0_cpython
- - python-dateutil=2.8.2=pyhd8ed1ab_0
- - python_abi=3.6=2_cp36m
- - pytz=2023.3=pyhd8ed1ab_0
- - pyyaml=5.4.1=py36h8f6f2f9_1
- - readline=8.2=h8228510_1
- - requests=2.12.5=py36_0
- - samtools=1.17=hd87286a_1
- - setuptools=49.6.0=py36h5fab9bb_3
- - six=1.16.0=pyh6c4a22f_0
- - snakemake=3.13.3=py36_0
- - sqlite=3.42.0=h2c6b66d_0
- - tk=8.6.12=h27826a3_0
- - urllib3=1.12=py36_0
- - wheel=0.36.2=pyhd3deb0d_0
- - wrapt=1.12.1=py36h8f6f2f9_3
- - xz=5.2.6=h166bdaf_0
- - yaml=0.2.5=h7f98852_2
- - zlib=1.2.13=h166bdaf_4
diff --git a/env/conda_freebayes.yaml b/env/conda_freebayes.yaml
deleted file mode 100644
index 887ca6c..0000000
--- a/env/conda_freebayes.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-name: freebayes
-channels:
- - bioconda
- - conda-forge
- - defaults
-dependencies:
- - _libgcc_mutex=0.1=conda_forge
- - _openmp_mutex=4.5=2_gnu
- - bc=1.07.1=h7f98852_0
- - bzip2=1.0.8=h7f98852_4
- - c-ares=1.19.0=hd590300_0
- - ca-certificates=2023.5.7=hbcca054_0
- - freebayes=1.3.6=h6f59eb7_3
- - htslib=1.17=h81da01d_2
- - keyutils=1.6.1=h166bdaf_0
- - krb5=1.20.1=h81ceb04_0
- - ld_impl_linux-64=2.40=h41732ed_0
- - libcurl=8.1.0=h409715c_0
- - libdeflate=1.18=h0b41bf4_0
- - libedit=3.1.20191231=he28a2e2_2
- - libev=4.33=h516909a_1
- - libexpat=2.5.0=hcb278e6_1
- - libffi=3.4.2=h7f98852_5
- - libgcc-ng=12.2.0=h65d4601_19
- - libgomp=12.2.0=h65d4601_19
- - libnghttp2=1.52.0=h61bc06f_0
- - libnsl=2.0.0=h7f98852_0
- - libsqlite=3.42.0=h2797004_0
- - libssh2=1.10.0=hf14f497_3
- - libstdcxx-ng=12.2.0=h46fd767_19
- - libuuid=2.38.1=h0b41bf4_0
- - libzlib=1.2.13=h166bdaf_4
- - ncurses=6.3=h27087fc_1
- - openssl=3.1.0=hd590300_3
- - parallel=20170422=pl5.22.0_0
- - perl=5.22.0.1=0
- - pip=23.1.2=pyhd8ed1ab_0
- - python=3.11.3=h2755cc3_0_cpython
- - readline=8.2=h8228510_1
- - samtools=1.17=hd87286a_1
- - setuptools=67.7.2=pyhd8ed1ab_0
- - tabixpp=1.1.0=h6448e42_12
- - tk=8.6.12=h27826a3_0
- - tzdata=2023c=h71feb2d_0
- - vcflib=1.0.3=h6b7c446_3
- - wheel=0.40.0=pyhd8ed1ab_0
- - xz=5.2.6=h166bdaf_0
- - zlib=1.2.13=h166bdaf_4
- - zstd=1.5.2=h3eb15da_6
diff --git a/env/conda_mason.yaml b/env/conda_mason.yaml
deleted file mode 100644
index 6e5d7fe..0000000
--- a/env/conda_mason.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-name: mason
-channels:
- - conda-forge
- - bioconda
- - defaults
-dependencies:
- - mason=2.0.9=h9ee0642_1
diff --git a/env/conda_nanosim.yaml b/env/conda_nanosim.yaml
deleted file mode 100644
index 478b3f2..0000000
--- a/env/conda_nanosim.yaml
+++ /dev/null
@@ -1,138 +0,0 @@
-name: nanosim
-channels:
- - conda-forge
- - bioconda
- - defaults
-dependencies:
- - _libgcc_mutex=0.1=conda_forge
- - _openmp_mutex=4.5=2_gnu
- - bedtools=2.31.0=hf5e1c6e_2
- - brotli=1.0.9=h166bdaf_9
- - brotli-bin=1.0.9=h166bdaf_9
- - brotli-python=1.0.9=py38hfa26641_9
- - bzip2=1.0.8=h7f98852_4
- - c-ares=1.19.1=hd590300_0
- - ca-certificates=2023.5.7=hbcca054_0
- - cairo=1.16.0=hbbf8b49_1016
- - certifi=2023.5.7=pyhd8ed1ab_0
- - charset-normalizer=3.1.0=pyhd8ed1ab_0
- - contourpy=1.1.0=py38h7f3f72f_0
- - cycler=0.11.0=pyhd8ed1ab_0
- - expat=2.5.0=hcb278e6_1
- - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
- - font-ttf-inconsolata=3.000=h77eed37_0
- - font-ttf-source-code-pro=2.038=h77eed37_0
- - font-ttf-ubuntu=0.83=hab24e00_0
- - fontconfig=2.14.2=h14ed4e7_0
- - fonts-conda-ecosystem=1=0
- - fonts-conda-forge=1=0
- - fonttools=4.40.0=py38h01eb140_0
- - freetype=2.12.1=hca18f0e_1
- - fribidi=1.0.10=h36c2ea0_0
- - genometools-genometools=1.6.2=py38hf7b97cc_6
- - gettext=0.21.1=h27087fc_0
- - graphite2=1.3.13=h58526e2_1001
- - harfbuzz=7.3.0=hdb3a94d_0
- - htseq=2.0.3=py38h8c35140_1
- - htslib=1.17=h81da01d_2
- - icu=72.1=hcb278e6_0
- - idna=3.4=pyhd8ed1ab_0
- - importlib-resources=5.12.0=pyhd8ed1ab_0
- - importlib_resources=5.12.0=pyhd8ed1ab_0
- - joblib=1.3.0=pyhd8ed1ab_1
- - k8=0.2.5=hdcf5f25_4
- - keyutils=1.6.1=h166bdaf_0
- - kiwisolver=1.4.4=py38h43d8883_1
- - krb5=1.20.1=h81ceb04_0
- - last=1454=h5b5514e_0
- - lcms2=2.15=haa2dc70_1
- - ld_impl_linux-64=2.40=h41732ed_0
- - lerc=4.0.0=h27087fc_0
- - libblas=3.9.0=17_linux64_openblas
- - libbrotlicommon=1.0.9=h166bdaf_9
- - libbrotlidec=1.0.9=h166bdaf_9
- - libbrotlienc=1.0.9=h166bdaf_9
- - libcblas=3.9.0=17_linux64_openblas
- - libcurl=8.1.2=h409715c_0
- - libdeflate=1.18=h0b41bf4_0
- - libedit=3.1.20191231=he28a2e2_2
- - libev=4.33=h516909a_1
- - libexpat=2.5.0=hcb278e6_1
- - libffi=3.4.2=h7f98852_5
- - libgcc-ng=13.1.0=he5830b7_0
- - libgfortran-ng=13.1.0=h69a702a_0
- - libgfortran5=13.1.0=h15d22d2_0
- - libglib=2.76.3=hebfc3b9_0
- - libgomp=13.1.0=he5830b7_0
- - libiconv=1.17=h166bdaf_0
- - libjpeg-turbo=2.1.5.1=h0b41bf4_0
- - liblapack=3.9.0=17_linux64_openblas
- - libnghttp2=1.52.0=h61bc06f_0
- - libnsl=2.0.0=h7f98852_0
- - libopenblas=0.3.23=pthreads_h80387f5_0
- - libpng=1.6.39=h753d276_0
- - libsqlite=3.42.0=h2797004_0
- - libssh2=1.11.0=h0841786_0
- - libstdcxx-ng=13.1.0=hfd8a6a1_0
- - libtiff=4.5.1=h8b53f26_0
- - libuuid=2.38.1=h0b41bf4_0
- - libwebp-base=1.3.1=hd590300_0
- - libxcb=1.15=h0b41bf4_0
- - libzlib=1.2.13=hd590300_5
- - matplotlib-base=3.7.1=py38hd6c3c57_0
- - minimap2=2.26=he4a0461_1
- - munkres=1.1.4=pyh9f0ad1d_0
- - nanosim=3.1.0=hdfd78af_0
- - ncurses=6.4=hcb278e6_0
- - numpy=1.24.4=py38h59b608b_0
- - openjpeg=2.5.0=hfec8fc6_2
- - openssl=3.1.1=hd590300_1
- - packaging=23.1=pyhd8ed1ab_0
- - pandas=2.0.3=py38h01efb38_0
- - pango=1.50.14=heaa33ce_1
- - parallel=20230522=ha770c72_0
- - pcre2=10.40=hc3806b6_0
- - perl=5.32.1=3_hd590300_perl5
- - pillow=10.0.0=py38h885162f_0
- - pip=23.1.2=pyhd8ed1ab_0
- - pixman=0.40.0=h36c2ea0_0
- - platformdirs=3.8.0=pyhd8ed1ab_0
- - pooch=1.7.0=pyha770c72_3
- - pthread-stubs=0.4=h36c2ea0_1001
- - pybedtools=0.9.0=py38he0f268d_2
- - pyparsing=3.1.0=pyhd8ed1ab_0
- - pysam=0.21.0=py38h15b938a_1
- - pysocks=1.7.1=pyha2e5f31_6
- - python=3.8.17=he550d4f_0_cpython
- - python-dateutil=2.8.2=pyhd8ed1ab_0
- - python-tzdata=2023.3=pyhd8ed1ab_0
- - python_abi=3.8=3_cp38
- - pytz=2023.3=pyhd8ed1ab_0
- - readline=8.2=h8228510_1
- - requests=2.31.0=pyhd8ed1ab_0
- - samtools=1.17=hd87286a_1
- - scikit-learn=0.22.1=py38hcdab131_1
- - scipy=1.10.1=py38h59b608b_3
- - setuptools=68.0.0=pyhd8ed1ab_0
- - six=1.16.0=pyh6c4a22f_0
- - tk=8.6.12=h27826a3_0
- - typing-extensions=4.7.1=hd8ed1ab_0
- - typing_extensions=4.7.1=pyha770c72_0
- - unicodedata2=15.0.0=py38h0a891b7_0
- - urllib3=2.0.3=pyhd8ed1ab_1
- - wheel=0.40.0=pyhd8ed1ab_0
- - xorg-kbproto=1.0.7=h7f98852_1002
- - xorg-libice=1.1.1=hd590300_0
- - xorg-libsm=1.2.4=h7391055_0
- - xorg-libx11=1.8.6=h8ee46fc_0
- - xorg-libxau=1.0.11=hd590300_0
- - xorg-libxdmcp=1.1.3=h7f98852_0
- - xorg-libxext=1.3.4=h0b41bf4_2
- - xorg-libxrender=0.9.11=hd590300_0
- - xorg-renderproto=0.11.1=h7f98852_1002
- - xorg-xextproto=7.3.0=h0b41bf4_1003
- - xorg-xproto=7.0.31=h7f98852_1007
- - xz=5.2.6=h166bdaf_0
- - zipp=3.15.0=pyhd8ed1ab_0
- - zlib=1.2.13=hd590300_5
- - zstd=1.5.2=h3eb15da_6
diff --git a/env/conda_nxf.yml b/env/conda_nxf.yml
new file mode 100644
index 0000000..a3d5c41
--- /dev/null
+++ b/env/conda_nxf.yml
@@ -0,0 +1,82 @@
+name: nextflow
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - _libgcc_mutex=0.1=conda_forge
+ - _openmp_mutex=4.5=2_gnu
+ - alsa-lib=1.2.10=hd590300_0
+ - bzip2=1.0.8=hd590300_5
+ - c-ares=1.26.0=hd590300_0
+ - ca-certificates=2024.2.2=hbcca054_0
+ - cairo=1.18.0=h3faef2a_0
+ - coreutils=9.4=hd590300_0
+ - curl=8.5.0=hca28451_0
+ - expat=2.5.0=hcb278e6_1
+ - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
+ - font-ttf-inconsolata=3.000=h77eed37_0
+ - font-ttf-source-code-pro=2.038=h77eed37_0
+ - font-ttf-ubuntu=0.83=h77eed37_1
+ - fontconfig=2.14.2=h14ed4e7_0
+ - fonts-conda-ecosystem=1=0
+ - fonts-conda-forge=1=0
+ - freetype=2.12.1=h267a509_2
+ - gettext=0.21.1=h27087fc_0
+ - giflib=5.2.1=h0b41bf4_3
+ - graphite2=1.3.13=h58526e2_1001
+ - harfbuzz=8.3.0=h3d44ed6_0
+ - icu=73.2=h59595ed_0
+ - keyutils=1.6.1=h166bdaf_0
+ - krb5=1.21.2=h659d440_0
+ - lcms2=2.16=hb7c19ff_0
+ - lerc=4.0.0=h27087fc_0
+ - libcups=2.3.3=h4637d8d_4
+ - libcurl=8.5.0=hca28451_0
+ - libdeflate=1.19=hd590300_0
+ - libedit=3.1.20191231=he28a2e2_2
+ - libev=4.33=hd590300_2
+ - libexpat=2.5.0=hcb278e6_1
+ - libffi=3.4.2=h7f98852_5
+ - libgcc-ng=13.2.0=h807b86a_5
+ - libglib=2.78.3=h783c2da_0
+ - libgomp=13.2.0=h807b86a_5
+ - libiconv=1.17=hd590300_2
+ - libjpeg-turbo=3.0.0=hd590300_1
+ - libnghttp2=1.58.0=h47da74e_1
+ - libpng=1.6.42=h2797004_0
+ - libssh2=1.11.0=h0841786_0
+ - libstdcxx-ng=13.2.0=h7e041cc_5
+ - libtiff=4.6.0=ha9c0a0a_2
+ - libuuid=2.38.1=h0b41bf4_0
+ - libwebp-base=1.3.2=hd590300_0
+ - libxcb=1.15=h0b41bf4_0
+ - libzlib=1.2.13=hd590300_5
+ - ncurses=6.4=h59595ed_2
+ - nextflow=23.10.1=hdfd78af_0
+ - openjdk=17.0.10=h4260e57_0
+ - openssl=3.2.1=hd590300_0
+ - pcre2=10.42=hcad00b1_0
+ - pixman=0.43.2=h59595ed_0
+ - pthread-stubs=0.4=h36c2ea0_1001
+ - xorg-fixesproto=5.0=h7f98852_1002
+ - xorg-inputproto=2.3.2=h7f98852_1002
+ - xorg-kbproto=1.0.7=h7f98852_1002
+ - xorg-libice=1.1.1=hd590300_0
+ - xorg-libsm=1.2.4=h7391055_0
+ - xorg-libx11=1.8.7=h8ee46fc_0
+ - xorg-libxau=1.0.11=hd590300_0
+ - xorg-libxdmcp=1.1.3=h7f98852_0
+ - xorg-libxext=1.3.4=h0b41bf4_2
+ - xorg-libxfixes=5.0.3=h7f98852_1004
+ - xorg-libxi=1.7.10=h7f98852_0
+ - xorg-libxrender=0.9.11=hd590300_0
+ - xorg-libxt=1.3.0=hd590300_1
+ - xorg-libxtst=1.2.3=h7f98852_1002
+ - xorg-recordproto=1.14.2=h7f98852_1002
+ - xorg-renderproto=0.11.1=h7f98852_1002
+ - xorg-xextproto=7.3.0=h0b41bf4_1003
+ - xorg-xproto=7.0.31=h7f98852_1007
+ - xz=5.2.6=h166bdaf_0
+ - zlib=1.2.13=hd590300_5
+ - zstd=1.5.5=hfc55251_0
diff --git a/env/conda_picard.yaml b/env/conda_picard.yaml
deleted file mode 100644
index 4a044ba..0000000
--- a/env/conda_picard.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-name: picard
-channels:
- - conda-forge
- - bioconda
- - defaults
-dependencies:
- - _libgcc_mutex=0.1=conda_forge
- - _openmp_mutex=4.5=2_gnu
- - _r-mutex=1.0.1=anacondar_1
- - alsa-lib=1.2.9=hd590300_0
- - binutils_impl_linux-64=2.40=hf600244_0
- - bwidget=1.9.14=ha770c72_1
- - bzip2=1.0.8=h7f98852_4
- - c-ares=1.19.1=hd590300_0
- - ca-certificates=2023.5.7=hbcca054_0
- - cairo=1.16.0=hbbf8b49_1016
- - curl=8.1.2=h409715c_0
- - expat=2.5.0=hcb278e6_1
- - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
- - font-ttf-inconsolata=3.000=h77eed37_0
- - font-ttf-source-code-pro=2.038=h77eed37_0
- - font-ttf-ubuntu=0.83=hab24e00_0
- - fontconfig=2.14.2=h14ed4e7_0
- - fonts-conda-ecosystem=1=0
- - fonts-conda-forge=1=0
- - freetype=2.12.1=hca18f0e_1
- - fribidi=1.0.10=h36c2ea0_0
- - gcc_impl_linux-64=13.1.0=hc4be1a9_0
- - gettext=0.21.1=h27087fc_0
- - gfortran_impl_linux-64=13.1.0=hd511a9b_0
- - giflib=5.2.1=h0b41bf4_3
- - graphite2=1.3.13=h58526e2_1001
- - gsl=2.7=he838d99_0
- - gxx_impl_linux-64=13.1.0=hc4be1a9_0
- - harfbuzz=7.3.0=hdb3a94d_0
- - icu=72.1=hcb278e6_0
- - kernel-headers_linux-64=2.6.32=he073ed8_15
- - keyutils=1.6.1=h166bdaf_0
- - krb5=1.20.1=h81ceb04_0
- - lcms2=2.15=haa2dc70_1
- - ld_impl_linux-64=2.40=h41732ed_0
- - lerc=4.0.0=h27087fc_0
- - libblas=3.9.0=17_linux64_openblas
- - libcblas=3.9.0=17_linux64_openblas
- - libcups=2.3.3=h36d4200_3
- - libcurl=8.1.2=h409715c_0
- - libdeflate=1.18=h0b41bf4_0
- - libedit=3.1.20191231=he28a2e2_2
- - libev=4.33=h516909a_1
- - libexpat=2.5.0=hcb278e6_1
- - libffi=3.4.2=h7f98852_5
- - libgcc-devel_linux-64=13.1.0=he3cc6c4_0
- - libgcc-ng=13.1.0=he5830b7_0
- - libgfortran-ng=13.1.0=h69a702a_0
- - libgfortran5=13.1.0=h15d22d2_0
- - libglib=2.76.3=hebfc3b9_0
- - libgomp=13.1.0=he5830b7_0
- - libiconv=1.17=h166bdaf_0
- - libjpeg-turbo=2.1.5.1=h0b41bf4_0
- - liblapack=3.9.0=17_linux64_openblas
- - libnghttp2=1.52.0=h61bc06f_0
- - libopenblas=0.3.23=pthreads_h80387f5_0
- - libpng=1.6.39=h753d276_0
- - libsanitizer=13.1.0=hfd8a6a1_0
- - libssh2=1.11.0=h0841786_0
- - libstdcxx-devel_linux-64=13.1.0=he3cc6c4_0
- - libstdcxx-ng=13.1.0=hfd8a6a1_0
- - libtiff=4.5.1=h8b53f26_0
- - libuuid=2.38.1=h0b41bf4_0
- - libwebp-base=1.3.0=h0b41bf4_0
- - libxcb=1.15=h0b41bf4_0
- - libxml2=2.11.4=h0d562d8_0
- - libzlib=1.2.13=hd590300_5
- - make=4.3=hd18ef5c_1
- - ncurses=6.4=hcb278e6_0
- - openjdk=20.0.0=h8e330f5_0
- - openssl=3.1.1=hd590300_1
- - pango=1.50.14=heaa33ce_1
- - pcre2=10.40=hc3806b6_0
- - picard=3.0.0=hdfd78af_1
- - pixman=0.40.0=h36c2ea0_0
- - pthread-stubs=0.4=h36c2ea0_1001
- - r-base=4.3.0=hfabd6f2_1
- - readline=8.2=h8228510_1
- - sed=4.8=he412f7d_0
- - sysroot_linux-64=2.12=he073ed8_15
- - tk=8.6.12=h27826a3_0
- - tktable=2.10=hb7b940f_3
- - xorg-fixesproto=5.0=h7f98852_1002
- - xorg-inputproto=2.3.2=h7f98852_1002
- - xorg-kbproto=1.0.7=h7f98852_1002
- - xorg-libice=1.0.10=h7f98852_0
- - xorg-libsm=1.2.3=hd9c2040_1000
- - xorg-libx11=1.8.6=h8ee46fc_0
- - xorg-libxau=1.0.11=hd590300_0
- - xorg-libxdmcp=1.1.3=h7f98852_0
- - xorg-libxext=1.3.4=h0b41bf4_2
- - xorg-libxfixes=5.0.3=h7f98852_1004
- - xorg-libxi=1.7.10=h7f98852_0
- - xorg-libxrender=0.9.10=h7f98852_1003
- - xorg-libxt=1.3.0=hd590300_0
- - xorg-libxtst=1.2.3=h7f98852_1002
- - xorg-recordproto=1.14.2=h7f98852_1002
- - xorg-renderproto=0.11.1=h7f98852_1002
- - xorg-xextproto=7.3.0=h0b41bf4_1003
- - xorg-xproto=7.0.31=h7f98852_1007
- - xz=5.2.6=h166bdaf_0
- - zlib=1.2.13=hd590300_5
- - zstd=1.5.2=h3eb15da_6
-prefix: /scratch/krannicht/miniconda3/envs/picard
diff --git a/env/conda_snakemake7.yaml b/env/conda_snakemake7.yaml
deleted file mode 100644
index 212a885..0000000
--- a/env/conda_snakemake7.yaml
+++ /dev/null
@@ -1,211 +0,0 @@
-name: snakemake7
-channels:
- - conda-forge
- - bioconda
- - defaults
-dependencies:
- - _libgcc_mutex=0.1=conda_forge
- - _openmp_mutex=4.5=2_gnu
- - aioeasywebdav=2.4.0=pyha770c72_0
- - aiohttp=3.8.4=py311h2582759_0
- - aiosignal=1.3.1=pyhd8ed1ab_0
- - amply=0.1.5=pyhd8ed1ab_0
- - appdirs=1.4.4=pyh9f0ad1d_0
- - async-timeout=4.0.2=pyhd8ed1ab_0
- - attmap=0.13.2=pyhd8ed1ab_0
- - attrs=23.1.0=pyh71513ae_1
- - backports=1.0=pyhd8ed1ab_3
- - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
- - bcrypt=3.2.2=py311hd4cff14_1
- - boltons=23.0.0=pyhd8ed1ab_0
- - boto3=1.26.133=pyhd8ed1ab_0
- - botocore=1.29.133=pyhd8ed1ab_0
- - brotlipy=0.7.0=py311hd4cff14_1005
- - bzip2=1.0.8=h7f98852_4
- - c-ares=1.18.1=h7f98852_0
- - ca-certificates=2023.5.7=hbcca054_0
- - cachetools=5.3.0=pyhd8ed1ab_0
- - certifi=2023.5.7=pyhd8ed1ab_0
- - cffi=1.15.1=py311h409f033_3
- - charset-normalizer=2.1.1=pyhd8ed1ab_0
- - coin-or-cbc=2.10.10=h9002f0b_0
- - coin-or-cgl=0.60.7=h516709c_0
- - coin-or-clp=1.17.8=h1ee7a9c_0
- - coin-or-osi=0.108.8=ha2443b9_0
- - coin-or-utils=2.11.9=hee58242_0
- - coincbc=2.10.10=0_metapackage
- - colorama=0.4.6=pyhd8ed1ab_0
- - conda=23.3.1=py311h38be061_0
- - conda-package-handling=2.0.2=pyh38be061_0
- - conda-package-streaming=0.7.0=pyhd8ed1ab_1
- - configargparse=1.5.3=pyhd8ed1ab_0
- - connection_pool=0.0.3=pyhd3deb0d_0
- - cryptography=40.0.2=py311h9b4c7bb_0
- - datrie=0.8.2=py311hd4cff14_6
- - defusedxml=0.7.1=pyhd8ed1ab_0
- - docutils=0.20=py311h38be061_0
- - dpath=2.1.5=pyha770c72_1
- - dropbox=11.36.0=pyhd8ed1ab_0
- - exceptiongroup=1.1.1=pyhd8ed1ab_0
- - filechunkio=1.8=py_2
- - filelock=3.12.0=pyhd8ed1ab_0
- - fmt=9.1.0=h924138e_0
- - frozenlist=1.3.3=py311hd4cff14_0
- - ftputil=5.0.4=pyhd8ed1ab_0
- - gitdb=4.0.10=pyhd8ed1ab_0
- - gitpython=3.1.31=pyhd8ed1ab_0
- - google-api-core=2.11.0=pyhd8ed1ab_0
- - google-api-python-client=2.86.0=pyhd8ed1ab_0
- - google-auth=2.18.0=pyh1a96a4e_0
- - google-auth-httplib2=0.1.0=pyhd8ed1ab_1
- - google-cloud-core=2.3.2=pyhd8ed1ab_0
- - google-cloud-storage=2.9.0=pyh1a96a4e_0
- - google-crc32c=1.1.2=py311h98db957_4
- - google-resumable-media=2.5.0=pyhd8ed1ab_0
- - googleapis-common-protos=1.57.1=pyhd8ed1ab_0
- - grpcio=1.54.2=py311hcafe171_0
- - httplib2=0.22.0=pyhd8ed1ab_0
- - humanfriendly=10.0=py311h38be061_4
- - icu=72.1=hcb278e6_0
- - idna=3.4=pyhd8ed1ab_0
- - importlib-metadata=6.6.0=pyha770c72_0
- - importlib_resources=5.12.0=pyhd8ed1ab_0
- - iniconfig=2.0.0=pyhd8ed1ab_0
- - jinja2=3.1.2=pyhd8ed1ab_1
- - jmespath=1.0.1=pyhd8ed1ab_0
- - jsonpatch=1.32=pyhd8ed1ab_0
- - jsonpointer=2.0=py_0
- - jsonschema=4.17.3=pyhd8ed1ab_0
- - jupyter_core=5.3.0=py311h38be061_0
- - keyutils=1.6.1=h166bdaf_0
- - krb5=1.20.1=h81ceb04_0
- - ld_impl_linux-64=2.40=h41732ed_0
- - libabseil=20230125.0=cxx17_hcb278e6_1
- - libarchive=3.6.2=h3d51595_0
- - libblas=3.9.0=16_linux64_openblas
- - libcblas=3.9.0=16_linux64_openblas
- - libcrc32c=1.1.2=h9c3ff4c_0
- - libcurl=8.0.1=h588be90_0
- - libedit=3.1.20191231=he28a2e2_2
- - libev=4.33=h516909a_1
- - libexpat=2.5.0=hcb278e6_1
- - libffi=3.4.2=h7f98852_5
- - libgcc-ng=12.2.0=h65d4601_19
- - libgfortran-ng=12.2.0=h69a702a_19
- - libgfortran5=12.2.0=h337968e_19
- - libgomp=12.2.0=h65d4601_19
- - libgrpc=1.54.2=hcf146ea_0
- - libiconv=1.17=h166bdaf_0
- - liblapack=3.9.0=16_linux64_openblas
- - liblapacke=3.9.0=16_linux64_openblas
- - libmamba=1.4.2=hcea66bb_0
- - libmambapy=1.4.2=py311h1f88262_0
- - libnghttp2=1.52.0=h61bc06f_0
- - libnsl=2.0.0=h7f98852_0
- - libopenblas=0.3.21=pthreads_h78a6416_3
- - libprotobuf=3.21.12=h3eb15da_0
- - libsodium=1.0.18=h36c2ea0_1
- - libsolv=0.7.23=h3eb15da_0
- - libsqlite=3.41.2=h2797004_1
- - libssh2=1.10.0=hf14f497_3
- - libstdcxx-ng=12.2.0=h46fd767_19
- - libuuid=2.38.1=h0b41bf4_0
- - libxml2=2.10.4=hfdac1af_0
- - libzlib=1.2.13=h166bdaf_4
- - logmuse=0.2.6=pyh8c360ce_0
- - lz4-c=1.9.4=hcb278e6_0
- - lzo=2.10=h516909a_1000
- - mamba=1.4.2=py311h3072747_0
- - markdown-it-py=2.2.0=pyhd8ed1ab_0
- - markupsafe=2.1.2=py311h2582759_0
- - mdurl=0.1.0=pyhd8ed1ab_0
- - multidict=6.0.4=py311h2582759_0
- - nbformat=5.8.0=pyhd8ed1ab_0
- - ncurses=6.3=h27087fc_1
- - numpy=1.24.3=py311h64a7726_0
- - oauth2client=4.1.3=py_0
- - openssl=3.1.0=hd590300_3
- - packaging=23.1=pyhd8ed1ab_0
- - pandas=2.0.1=py311h320fe9a_1
- - paramiko=3.1.0=pyhd8ed1ab_0
- - peppy=0.35.5=pyhd8ed1ab_0
- - pip=23.1.2=pyhd8ed1ab_0
- - pkgutil-resolve-name=1.3.10=pyhd8ed1ab_0
- - plac=1.3.5=pyhd8ed1ab_0
- - platformdirs=3.5.1=pyhd8ed1ab_0
- - pluggy=1.0.0=pyhd8ed1ab_5
- - ply=3.11=py_1
- - prettytable=3.7.0=pyhd8ed1ab_0
- - protobuf=4.21.12=py311hcafe171_0
- - psutil=5.9.5=py311h2582759_0
- - pulp=2.7.0=py311h38be061_0
- - pyasn1=0.4.8=py_0
- - pyasn1-modules=0.2.7=py_0
- - pybind11-abi=4=hd8ed1ab_3
- - pycosat=0.6.4=py311hd4cff14_1
- - pycparser=2.21=pyhd8ed1ab_0
- - pygments=2.15.1=pyhd8ed1ab_0
- - pynacl=1.5.0=py311hd4cff14_2
- - pyopenssl=23.1.1=pyhd8ed1ab_0
- - pyparsing=3.0.9=pyhd8ed1ab_0
- - pyrsistent=0.19.3=py311h2582759_0
- - pysftp=0.2.9=py_1
- - pysocks=1.7.1=pyha2e5f31_6
- - pytest=7.3.1=pyhd8ed1ab_0
- - python=3.11.3=h2755cc3_0_cpython
- - python-dateutil=2.8.2=pyhd8ed1ab_0
- - python-fastjsonschema=2.16.3=pyhd8ed1ab_0
- - python-irodsclient=1.1.6=pyhd8ed1ab_0
- - python-tzdata=2023.3=pyhd8ed1ab_0
- - python_abi=3.11=3_cp311
- - pytz=2023.3=pyhd8ed1ab_0
- - pyu2f=0.1.5=pyhd8ed1ab_0
- - pyyaml=6.0=py311hd4cff14_5
- - re2=2023.02.02=hcb278e6_0
- - readline=8.2=h8228510_1
- - reproc=14.2.4=h0b41bf4_0
- - reproc-cpp=14.2.4=hcb278e6_0
- - requests=2.29.0=pyhd8ed1ab_0
- - reretry=0.11.8=pyhd8ed1ab_0
- - rich=13.3.5=pyhd8ed1ab_0
- - rsa=4.9=pyhd8ed1ab_0
- - ruamel.yaml=0.17.26=py311h459d7ec_0
- - ruamel.yaml.clib=0.2.7=py311h2582759_1
- - s3transfer=0.6.1=pyhd8ed1ab_0
- - setuptools=67.7.2=pyhd8ed1ab_0
- - setuptools-scm=7.1.0=pyhd8ed1ab_0
- - six=1.16.0=pyh6c4a22f_0
- - slacker=0.14.0=py_0
- - smart_open=6.3.0=pyhd8ed1ab_1
- - smmap=3.0.5=pyh44b312d_0
- - snakemake=7.25.3=hdfd78af_0
- - snakemake-minimal=7.25.3=pyhdfd78af_0
- - stone=3.3.1=pyhd8ed1ab_0
- - stopit=1.1.2=py_0
- - tabulate=0.9.0=pyhd8ed1ab_1
- - throttler=1.2.1=pyhd8ed1ab_0
- - tk=8.6.12=h27826a3_0
- - tomli=2.0.1=pyhd8ed1ab_0
- - toolz=0.12.0=pyhd8ed1ab_0
- - toposort=1.10=pyhd8ed1ab_0
- - tqdm=4.65.0=pyhd8ed1ab_1
- - traitlets=5.9.0=pyhd8ed1ab_0
- - typing-extensions=4.5.0=hd8ed1ab_0
- - typing_extensions=4.5.0=pyha770c72_0
- - tzdata=2023c=h71feb2d_0
- - ubiquerg=0.6.2=pyhd8ed1ab_0
- - uritemplate=4.1.1=pyhd8ed1ab_0
- - urllib3=1.26.15=pyhd8ed1ab_0
- - veracitools=0.1.3=py_0
- - wcwidth=0.2.6=pyhd8ed1ab_0
- - wheel=0.40.0=pyhd8ed1ab_0
- - wrapt=1.15.0=py311h2582759_0
- - xz=5.2.6=h166bdaf_0
- - yaml=0.2.5=h7f98852_2
- - yaml-cpp=0.7.0=h27087fc_2
- - yarl=1.9.1=py311h459d7ec_0
- - yte=1.5.1=py311h38be061_1
- - zipp=3.15.0=pyhd8ed1ab_0
- - zlib=1.2.13=h166bdaf_4
- - zstandard=0.19.0=py311hbe0fcd7_1
- - zstd=1.5.2=h3eb15da_6
diff --git a/eval.nf b/eval.nf
new file mode 100644
index 0000000..857505e
--- /dev/null
+++ b/eval.nf
@@ -0,0 +1,34 @@
+// include modules - here, modules are single processes
+include { SAMTOOLS_FAIDX } from './modules/samtools/faidx/main.nf'
+include { HAPPY } from './modules/happy/main.nf'
+
+
+workflow{
+ // ------------------
+ // | Input channels |
+ // ------------------
+ ch_ref = Channel.value("$baseDir/" + params.reference)
+ ch_ref_idx = SAMTOOLS_FAIDX(ch_ref)
+
+ ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}")
+ ch_callsets
+ .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('.vcf', '').replaceFirst('.gz', '').toInteger(), file(it)) }
+ .set {ch_callsets}
+ //ch_callsets.view()
+
+ ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf")
+ ch_truthsets
+ .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('hap', '').replaceFirst('.vcf', '').toInteger(), file(it)) }
+ .set {ch_truthsets}
+ //ch_truthsets.view()
+
+ ch_truthsets.join(ch_callsets, by: 0)
+ .set {ch_variantsets_map}
+ //ch_variantsets_map.view()
+
+
+ // ------------------
+ // | Main processes |
+ // ------------------
+ HAPPY(ch_variantsets_map,ch_ref,ch_ref_idx)
+}
diff --git a/hap.nf b/hap.nf
new file mode 100644
index 0000000..bfff46b
--- /dev/null
+++ b/hap.nf
@@ -0,0 +1,36 @@
+// include modules - here, modules are single processes
+//include { AMPLISIM } from './modules/amplisim/main.nf'
+include { MASON_SIMULATOR } from './modules/mason/simulator/main.nf'
+include { MASON_VARIATOR } from './modules/mason/variator/main.nf'
+include { NANOSIM } from './modules/nanosim/main.nf'
+//include { NORM_VCF } from './subworkflows/norm_vcf/main.nf'
+include { SAMTOOLS_FAIDX } from './modules/samtools/faidx/main.nf'
+
+
+
+workflow{
+ // Input channels
+ ch_ids = Channel.of(1..params.n)
+ ch_ref = Channel.value("$baseDir/" + params.reference)
+ ch_ref_idx = SAMTOOLS_FAIDX(ch_ref)
+
+ // Generate samples (haplotype consensus sequence + VCF)
+ (ch_haplotypes,ch_vcf) = MASON_VARIATOR(ch_ids,ch_ref,ch_ref_idx)
+
+ // Normalize, sort and index the VCF files
+ //NORM_VCF(ch_vcf,ch_ref)
+
+ ch_vcf
+ .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('hap', '').replaceFirst('\\.vcf', '').toInteger(), file(it)) }
+ .set {ch_sample_vcf_map}
+
+ // Generate reads
+ if (params.read_type == 'ngs'){
+ MASON_SIMULATOR(ch_sample_vcf_map,ch_ref,ch_ref_idx)
+ }
+ //else if (params.read_type == 'nano'){
+ // NANOSIM(ch_ids, ch_haplotypes)
+ //}
+
+
+}
diff --git a/modules/bcftools/index/main.nf b/modules/bcftools/index/main.nf
new file mode 100644
index 0000000..b56a469
--- /dev/null
+++ b/modules/bcftools/index/main.nf
@@ -0,0 +1,27 @@
+process BCFOOTLS_INDEX {
+ // Job label
+ // tag "${sample}"
+
+ // Store results
+ //publishDir "${params.outdir}", mode: 'copy', pattern: "*.tbi"
+
+ // Engine settings
+ conda 'bioconda::bcftools=1.19'
+
+ // Resources
+ cpus 1
+
+ // Process I/O
+ input:
+ path vcffile
+
+ output:
+ path "${vcffile.getName()}.tbi", emit: index_vcf
+
+ // Job script
+ """
+ bcftools index -t ${vcffile}
+ """
+
+
+}
\ No newline at end of file
diff --git a/modules/bcftools/norm/main.nf b/modules/bcftools/norm/main.nf
new file mode 100644
index 0000000..8e8eebb
--- /dev/null
+++ b/modules/bcftools/norm/main.nf
@@ -0,0 +1,30 @@
+process BCFOOTLS_NORM {
+ // Job label
+ // tag "${sample}"
+
+ // Engine settings
+ conda 'bioconda::bcftools=1.19'
+
+ // Resources
+ cpus 1
+
+ // Process I/O
+ input:
+ each vcffile
+ path ref
+
+ output:
+ path "${vcffile.getSimpleName()}.normalized.vcf", emit: norm_vcf
+
+ // Job script
+ """
+ bcftools norm \
+ --fasta-ref ${ref} \
+ --check-ref s \
+ --multiallelics -both \
+ -o ${vcffile.getSimpleName()}.normalized.vcf \
+ ${vcffile}
+ """
+
+
+}
\ No newline at end of file
diff --git a/modules/bcftools/sort/main.nf b/modules/bcftools/sort/main.nf
new file mode 100644
index 0000000..e03ceca
--- /dev/null
+++ b/modules/bcftools/sort/main.nf
@@ -0,0 +1,30 @@
+process BCFOOTLS_SORT {
+ // Job label
+ // tag "${sample}"
+
+ // Store results
+ //publishDir "${params.outdir}", mode: 'copy', pattern: "*.normalized.sorted.vcf.gz"
+
+ // Engine settings
+ conda 'bioconda::bcftools=1.19'
+
+ // Resources
+ cpus 1
+
+ // Process I/O
+ input:
+ path vcffile
+
+ output:
+ path "${vcffile.getSimpleName()}.normalized.sorted.vcf.gz", emit: sort_vcf
+
+ // Job script
+ """
+ bcftools sort \
+ -o ${vcffile.getSimpleName()}.normalized.sorted.vcf.gz \
+ -O z \
+ ${vcffile}
+ """
+
+
+}
\ No newline at end of file
diff --git a/modules/happy/main.nf b/modules/happy/main.nf
new file mode 100644
index 0000000..3564256
--- /dev/null
+++ b/modules/happy/main.nf
@@ -0,0 +1,37 @@
+process HAPPY {
+ // Job label
+ tag "${sample}"
+
+ // Store results
+ publishDir "${params.outdir}", mode: 'copy', pattern: "*.sompy.*"
+
+ // Engine settings
+ conda 'bioconda::hap.py=0.3.15'
+
+ // Resources
+ cpus 1
+
+ // Process I/O
+ input:
+ tuple val(sample), path(truthset), path(callset)
+ val ref
+ val ref_idx
+
+ output:
+ path "simulated_hap${sample}.sompy.stats.csv", emit: csv
+ path "simulated_hap${sample}.sompy.metrics.json", emit: json
+
+ // Job script
+ """
+ som.py \
+ --no-fixchr-truth \
+ --no-fixchr-query \
+ --normalize-all \
+ -r ${ref} \
+ -o simulated_hap${sample}.sompy \
+ ${truthset} \
+ ${callset}
+ """
+
+
+}
\ No newline at end of file
diff --git a/modules/mason/simulator/main.nf b/modules/mason/simulator/main.nf
new file mode 100644
index 0000000..eb4b80b
--- /dev/null
+++ b/modules/mason/simulator/main.nf
@@ -0,0 +1,46 @@
+process MASON_SIMULATOR {
+
+ // Job label
+ // tag "${sample}"
+
+ // Store results
+ publishDir "${params.outdir}", mode: 'copy', pattern: "*.{NGSWGS.R1.fastq,NGSWGS.R2.fastq,bam}"
+
+ // Engine settings
+ conda 'bioconda::mason=2.0.9'
+
+ // Resources
+ cpus 2
+
+ // Process I/O
+ input:
+ tuple val(sample), path(vcf)
+ val ref
+ val ref_idx
+
+ output:
+ path "simulated_hap${sample}.NGSWGS.{R1,R2}.fastq", emit: fastqs
+ path "simulated_hap${sample}.bam", emit: bam
+
+ // Job script
+ script:
+ unique_seed = (params.seed * sample ) % 2147483647 // that's (2^31)-1, the upper bound for mason
+ """
+ mason_simulator \
+ -ir ${ref} \
+ -iv ${vcf} \
+ -o simulated_hap${sample}.NGSWGS.R1.fastq \
+ -or simulated_hap${sample}.NGSWGS.R2.fastq \
+ -oa simulated_hap${sample}.bam \
+ --seed ${unique_seed} \
+ --num-threads ${task.cpus} \
+ --num-fragments ${params.nb_frag} \
+ --fragment-min-size ${params.fragment_min_size} \
+ --fragment-max-size ${params.fragment_max_size} \
+ --fragment-mean-size ${params.fragment_mean_size} \
+ --fragment-size-std-dev ${params.fragment_size_std_dev} \
+ --illumina-read-length ${params.illumina_read_length}
+ """
+
+
+}
diff --git a/modules/mason/variator/main.nf b/modules/mason/variator/main.nf
new file mode 100644
index 0000000..5640ef1
--- /dev/null
+++ b/modules/mason/variator/main.nf
@@ -0,0 +1,46 @@
+process MASON_VARIATOR {
+
+ // Job label
+ // tag "${sample}"
+
+ // Store results
+ publishDir "${params.outdir}", mode: 'copy', pattern: "simulated_hap*"
+
+ // Engine settings
+ conda 'bioconda::mason=2.0.9'
+
+ // Resources
+ cpus 1
+
+ // Process I/O
+ input:
+ val sample
+ val ref
+ val ref_idx
+
+ output:
+ path "simulated_hap${sample}.fasta", emit: fasta
+ path "simulated_hap${sample}.vcf", emit: vcf
+
+ // Job script
+ script:
+ unique_seed = (params.seed * sample) % 2147483647 // that's (2^31)-1, the upper bound for mason_variator
+ """
+ mason_variator \
+ --in-reference ${ref} \
+ --out-fasta simulated_hap${sample}.fasta \
+ --out-vcf simulated_hap${sample}.vcf \
+ --seed ${unique_seed} \
+ --snp-rate 0.01 \
+ --small-indel-rate 0.005 \
+ --min-small-indel-size 1 \
+ --max-small-indel-size 20 \
+ --sv-indel-rate 0 \
+ --sv-inversion-rate 0 \
+ --sv-translocation-rate 0 \
+ --sv-duplication-rate 0 \
+ 2> ${sample}.log
+ """
+
+
+}
diff --git a/modules/nanosim/main.nf b/modules/nanosim/main.nf
new file mode 100644
index 0000000..f2ad18d
--- /dev/null
+++ b/modules/nanosim/main.nf
@@ -0,0 +1,39 @@
+process NANOSIM {
+
+ // Job label
+ // tag "${sample}"
+
+ // Store results
+ publishDir "${params.outdir}", mode: 'copy', pattern: "simulated_hap${id}.NANOWGS{_aligned_error_profile,_aligned_reads.fasta}"
+
+ // Engine settings
+ conda 'bioconda::nanosim=3.1.0'
+
+ // Resources
+ cpus 2
+
+ // Process I/O
+ input:
+ val id
+ path "simulated_hap${id}.fasta" // haplotype sequence, e.g. simulated_1.fasta
+
+ output:
+ path "simulated_hap${id}.NANOWGS{_aligned_error_profile,_aligned_reads.fasta}", emit: fastq
+
+ // Job script
+ script:
+ unique_seed = (params.seed * id) % 2147483647 // that's (2^31)-1, the upper bound for mason
+ """
+ simulator.py genome \
+ -dna_type ${params.dna_type} \
+ -rg simulated_hap${id}.fasta \
+ -c ${projectDir}/${params.model_prefix} \
+ -b ${params.model_caller} \
+ -med ${params.median_length} \
+ -sd ${params.sd_length} \
+ -n ${params.nb_reads} \
+ -o simulated_hap${id}.NANOWGS \
+ --seed ${unique_seed} \
+ -t ${task.cpus}
+ """
+}
diff --git a/modules/samtools/faidx/main.nf b/modules/samtools/faidx/main.nf
new file mode 100644
index 0000000..8c8a6a3
--- /dev/null
+++ b/modules/samtools/faidx/main.nf
@@ -0,0 +1,26 @@
+process SAMTOOLS_FAIDX {
+ // Job label
+ // tag "${sample}"
+
+ // Engine settings
+ conda 'bioconda::samtools=1.19.2'
+
+ // Resources
+ cpus 1
+
+ // Process I/O
+ input:
+ path ref
+
+ output:
+ val "${ref}.fai"
+
+ // Job script
+ """
+ samtools faidx \
+ ${ref} \
+ -o ${ref}.fai
+ """
+
+
+}
diff --git a/nextflow.config b/nextflow.config
new file mode 100644
index 0000000..71e1dad
--- /dev/null
+++ b/nextflow.config
@@ -0,0 +1,66 @@
+// Pipeline meta-information
+manifest {
+ name = 'CIEVaD'
+ description = 'A workflow for a simple, streamlined and rapid evaluation of variant callsets '
+ author = 'Thomas Krannich'
+ nextflowVersion = '>=20.04.0'
+ version = '0.1.0-nf'
+}
+
+// Parameters that are accessible in the pipeline script
+params {
+ // Individual parameters
+ n = 3
+ reference = 'reference/Sars-Cov-2/Wuhan-Hu-1/MN908947.3.fasta'
+ read_type = 'ngs'
+
+ // General parameters
+ seed = 479
+ outdir = 'results'
+
+ // NGS (WGS) - Read simulation parameters
+ nb_frag = 3000
+ fragment_min_size = 450
+ fragment_max_size = 550
+ fragment_mean_size = 500
+ fragment_size_std_dev = 20
+ illumina_read_length = 150
+
+ // Nanopore (WGS) - Read simulation parameters
+ dna_type = 'linear'
+ model_prefix = 'aux/nanosim_model/human_NA12878_DNA_FAB49712_guppy/training'
+ model_caller = 'guppy'
+ median_length = 5000
+ sd_length = 1.05
+ nb_reads = 180
+
+ // Evaluation parameters
+ callsets_dir = 'data'
+}
+
+// Enable execution report
+def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')
+report {
+ enabled = true
+ file = "${params.outdir}/execution_report_${trace_timestamp}.html"
+}
+
+// Predefined configurations for the user. Can be selected using the -profile command line option.
+// Profiles can be combined by separating the profile names with a comma.
+profiles {
+ // engines
+ conda {
+ conda.enabled = true
+ conda.useMamba = false
+ }
+ mamba {
+ conda.enabled = true
+ conda.useMamba = true
+ }
+
+ // executors
+ local {
+ executor.name = "local"
+ executor.cpus = 4
+ }
+}
diff --git a/python/__init__.py b/python/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/python/configGenerators.py b/python/configGenerators.py
deleted file mode 100644
index f80a696..0000000
--- a/python/configGenerators.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import math
-from python.myUtil import trim_trailing_slash, mkdir_if_not_present
-
-
-def generate_hap_config(args):
-
- # check if configs directory exists, create if not
- mkdir_if_not_present("configs")
-
- # generate config
- with open("configs/snake_config_haplotype.yaml", 'w') as config :
-
- config.write('HEAD_DIR:\n')
- config.write(' ' + trim_trailing_slash(args.head_dir) + '\n\n')
- config.write('REF:\n')
- config.write(' ' + args.reference + '\n\n')
- config.write('SEED:\n')
- config.write(' ' + str(args.seed) + '\n\n')
- config.write('SAMPLES:\n')
-
- padding = int(math.log2(args.nb_samples))
- for i in range(1, (args.nb_samples)+1):
- config.write(' - \"' + str(i).zfill(padding) + '\"\n')
-
- print("New config file is created at configs/snake_config_haplotype.yaml\n")
-
-
-def generate_ngs_config(args):
-
- # check if configs directory exists, create if not
- mkdir_if_not_present("configs")
-
- # generate config
- with open("configs/snake_config_ngs.yaml", 'w') as config :
-
- config.write('HEAD_DIR:\n')
- config.write(' ' + trim_trailing_slash(args.head_dir) + '\n\n')
- config.write('NGS_NB_FRAGS:\n')
- config.write(' ' + str(args.nb_frags) + '\n\n')
- config.write('SEED:\n')
- config.write(' ' + str(args.seed) + '\n\n')
- config.write('SAMPLES:\n')
-
- padding = int(math.log2(args.nb_samples))
- for i in range(1, (args.nb_samples)+1):
- config.write(' - \"' + str(i).zfill(padding) + '\"\n')
-
- print("New config file is created at configs/snake_config_ngs.yaml\n")
-
-
-def generate_ampli_config(args):
-
- # check if configs directory exists, create if not
- mkdir_if_not_present("configs")
-
- # generate config
- with open("configs/snake_config_amplicon.yaml", 'w') as config :
-
- config.write('HEAD_DIR:\n')
- config.write(' ' + trim_trailing_slash(args.head_dir) + '\n\n')
- config.write('REF:\n')
- config.write(' ' + args.reference + '\n\n')
- config.write('SEED:\n')
- config.write(' ' + str(args.seed) + '\n\n')
- config.write('PRIMER:\n')
- config.write(' ' + args.primers + '\n\n')
- config.write('SAMPLES:\n')
-
- padding = int(math.log2(args.nb_samples))
- for i in range(1, (args.nb_samples)+1):
- config.write(' - \"' + str(i).zfill(padding) + '\"\n')
-
- print("New config file is created at configs/snake_config_amplicon.yaml\n")
-
-
-def generate_nanopore_config(args):
-
- # check if configs directory exists, create if not
- mkdir_if_not_present("configs")
-
- # generate config
- with open("configs/snake_config_nanopore.yaml", 'w') as config :
-
- config.write('HEAD_DIR:\n')
- config.write(' ' + trim_trailing_slash(args.head_dir) + '\n\n')
- config.write('MODEL_PREFIX:\n')
- config.write(' ' + str(args.model_prefix) + '\n\n')
- config.write('DNA_TYPE:\n')
- config.write(' ' + str(args.dna_type) + '\n\n')
- config.write('MODEL_CALLER:\n')
- config.write(' ' + str(args.model_caller) + '\n\n')
- config.write('MEDIAN_LENGTH:\n')
- config.write(' ' + str(args.median_length) + '\n\n')
- config.write('SD_LENGTH:\n')
- config.write(' ' + str(args.sd_length) + '\n\n')
- config.write('NB_READS:\n')
- config.write(' ' + str(args.nb_reads) + '\n\n')
- config.write('SEED:\n')
- config.write(' ' + str(args.seed) + '\n\n')
- config.write('SAMPLES:\n')
-
- padding = int(math.log2(args.nb_samples))
- for i in range(1, (args.nb_samples)+1):
- config.write(' - \"' + str(i).zfill(padding) + '\"\n')
-
- print("New config file is created at configs/snake_config_nanopore.yaml\n")
-
-
-def generate_eval_config(args):
-
- # check if configs directory exists, create if not
- mkdir_if_not_present("configs")
-
- # generate config
- with open("configs/snake_config_eval.yaml", 'w') as config :
-
- config.write('HEAD_DIR:\n')
- config.write(' ' + trim_trailing_slash(args.head_dir) + '\n\n')
- config.write('SAMPLES:\n')
-
- padding = int(math.log2(args.nb_samples))
- for i in range(1, (args.nb_samples)+1):
- config.write(' - \"' + str(i).zfill(padding) + '\"\n')
-
- print("New config file is created at configs/snake_config_eval.yaml\n")
diff --git a/python/myUtil.py b/python/myUtil.py
deleted file mode 100644
index a12c13f..0000000
--- a/python/myUtil.py
+++ /dev/null
@@ -1,14 +0,0 @@
-import os
-
-
-def trim_trailing_slash(s:str):
- if s[-1] == '/':
- return s[:-1]
- return s
-
-
-def mkdir_if_not_present(cfg_path:str):
- dir_exists = os.path.exists(cfg_path)
- if not dir_exists:
- os.makedirs(cfg_path)
- print("New config directory is created at " + cfg_path + "!")
diff --git a/python/runWorkflows.py b/python/runWorkflows.py
deleted file mode 100644
index 4e0248c..0000000
--- a/python/runWorkflows.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import os
-import python.configGenerators
-
-
-def run_hap(args):
- print("Running haplotype simulation...\n")
-
- if args.command == 'hap':
-
- if args.config is not None:
-
- os.system('snakemake -p --use-conda --cores 1 --configfile ' + args.config + ' -s ' + args.snakefile)
-
- else:
-
- python.configGenerators.generate_hap_config(args)
-
- os.system('snakemake -p --use-conda --cores 1 --configfile configs/snake_config_haplotype.yaml -s ' + args.snakefile)
-
-
-def run_ngs(args):
- print("Running NGS read simulation...\n")
-
- if args.command == 'ngs':
-
- if args.config is not None:
-
- os.system('snakemake -p --use-conda --cores ' + str(args.threads) + '--configfile ' + args.config + ' -s ' + args.snakefile)
-
- else:
-
- python.configGenerators.generate_ngs_config(args)
-
- os.system('snakemake -p --use-conda --cores ' + str(args.threads) + ' --configfile configs/snake_config_ngs.yaml -s ' + args.snakefile)
-
-
-def run_ampli(args):
- print("Running amplicon and NGS read simulation...\n")
-
- if args.command == 'ampli':
-
- if args.config is not None:
-
- os.system('snakemake -p --use-conda --cores 1 --configfile ' + args.config + ' -s ' + args.snakefile)
-
- else:
-
- python.configGenerators.generate_ampli_config(args)
-
- os.system('snakemake -p --use-conda --cores 1 --configfile configs/snake_config_amplicon.yaml -s ' + args.snakefile)
-
-
-def run_nanopore(args):
- print("Running Nanopore read simulation...\n")
-
- if args.command == 'nano':
-
- if args.config is not None:
-
- os.system('snakemake -p --use-conda --cores ' + str(args.threads) + ' --configfile ' + args.config + ' -s ' + args.snakefile)
-
- else:
-
- python.configGenerators.generate_nanopore_config(args)
-
- os.system('snakemake -p --use-conda --cores ' + str(args.threads) + ' --configfile configs/snake_config_nanopore.yaml -s ' + args.snakefile)
-
-
-def run_eval(args):
- print("Running VCF file-based evaluation of variants...\n")
-
- if args.command == 'eval':
-
- if args.config is not None:
-
- os.system('snakemake -p --use-conda --cores 1 --configfile ' + args.config + ' -s ' + args.snakefile)
-
- else:
-
- python.configGenerators.generate_eval_config(args)
-
- os.system('snakemake -p --use-conda --cores 1 --configfile configs/snake_config_eval.yaml -s ' + args.snakefile)
\ No newline at end of file
diff --git a/snakemake/amplicon/Snakefile b/snakemake/amplicon/Snakefile
deleted file mode 100644
index 3ef1984..0000000
--- a/snakemake/amplicon/Snakefile
+++ /dev/null
@@ -1,67 +0,0 @@
-#################### PREAMBLE
-from snakemake.utils import min_version
-min_version("6.0")
-
-
-#################### CONFIG
-configfile: "configs/snake_config_amplicon.yaml"
-
-
-#################### INCLUDES
-
-#################### PARAMS
-HEAD_DIR = config["HEAD_DIR"]
-SAMPLES = config["SAMPLES"]
-SEED = config["SEED"]
-
-
-#################### RULES
-rule all:
- input:
- simulated_read = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.amplicon.ngs.{r}.fastq", s=SAMPLES, r=["R1", "R2"])
-
-rule amplicon_simulator:
- input:
- reference = config["REF"],
- primer = config["PRIMER"]
- output:
- amplicons = config["HEAD_DIR"] + "/data/simulated_hap{sample}/amplicons.fa"
- params:
- seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound
- log:
- config["HEAD_DIR"] + "/logs/amplisim/amplisim.hap{sample}.log"
- shell:
- """
- # STATIC COMPILED BINARY OF AMPLISIM WILL BE REPLACED BY A LIVE BUILD OR CONDA
- ./bin/amplisim-v0_1_0-ubuntu_20_04 \
- -s {params.seed} \
- -o {output.amplicons} \
- {input.reference} \
- {input.primer} \
- > {log} 2>&1
- """
-
-rule ngs_read_simulator:
- input:
- amplicons = rules.amplicon_simulator.output.amplicons
- output:
- r1 = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.amplicon.ngs.R1.fastq",
- r2 = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.amplicon.ngs.R2.fastq"
- params:
- seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound
- threads:
- workflow.cores
- conda:
- config["HEAD_DIR"] + "/env/conda_mason.yaml"
- log:
- config["HEAD_DIR"] + "/logs/mason/mason_frag_sequencing.hap{sample}.log"
- shell:
- """
- mason_frag_sequencing \
- -i {input.amplicons} \
- -o {output.r1} \
- -or {output.r2} \
- --seed {params.seed} \
- --illumina-read-length 150 \
- > {log} 2>&1
- """
diff --git a/snakemake/eval/Snakefile b/snakemake/eval/Snakefile
deleted file mode 100644
index fe1e6e2..0000000
--- a/snakemake/eval/Snakefile
+++ /dev/null
@@ -1,61 +0,0 @@
-#################### PREAMBLE
-from snakemake.utils import min_version
-min_version("6.0")
-
-
-#################### CONFIG
-configfile: "configs/snake_config_eval.yaml"
-
-
-#################### INCLUDES
-include: "../include/vcf-norm-call.smk"
-
-
-#################### PARAMS
-HEAD_DIR = config["HEAD_DIR"]
-SAMPLES = config["SAMPLES"]
-
-
-#################### RULES
-rule all:
- input:
- expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/callset.normalized.sorted.vcf.gz.tbi", s=SAMPLES),
- expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/eval.picard.{e}", e=["variant_calling_detail_metrics", "variant_calling_summary_metrics"], s=SAMPLES),
- HEAD_DIR + "/results/variant_calling_summary_ngs"
-
-
-rule vcf_evaluation:
- input:
- truthset = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.normalized.sorted.vcf.gz",
- truthidx = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.normalized.sorted.vcf.gz.tbi",
- callset = config["HEAD_DIR"] + "/data/simulated_hap{sample}/callset.normalized.sorted.vcf.gz",
- callidx = config["HEAD_DIR"] + "/data/simulated_hap{sample}/callset.normalized.sorted.vcf.gz.tbi"
- params:
- prefix = config["HEAD_DIR"] + "/data/simulated_hap{sample}/eval.picard"
- output:
- detail = config["HEAD_DIR"] + "/data/simulated_hap{sample}/eval.picard.variant_calling_detail_metrics",
- summary = config["HEAD_DIR"] + "/data/simulated_hap{sample}/eval.picard.variant_calling_summary_metrics"
- conda:
- config["HEAD_DIR"] + "/env/conda_picard.yaml"
- log:
- config["HEAD_DIR"] + "/logs/picard/evaluation_hap{sample}.log"
- shell:
- """
- picard CollectVariantCallingMetrics \
- --INPUT {input.callset} \
- --DBSNP {input.truthset} \
- -O {params.prefix}
- """
-
-
-rule report:
- input:
- expand(config["HEAD_DIR"] + "/data/simulated_hap{sample}/eval.picard.variant_calling_summary_metrics", sample=config["SAMPLES"])
- output:
- config["HEAD_DIR"] + "/results/variant_calling_summary_ngs"
- params:
- head_dir = config["HEAD_DIR"]
- shell:
- """
- sh {params.head_dir}/aux/picard_summary_of_summaries.sh {params.head_dir} > {output}
- """
\ No newline at end of file
diff --git a/snakemake/hap/Snakefile b/snakemake/hap/Snakefile
deleted file mode 100644
index 9752c8a..0000000
--- a/snakemake/hap/Snakefile
+++ /dev/null
@@ -1,58 +0,0 @@
-#################### PREAMBLE
-from snakemake.utils import min_version
-min_version("6.0")
-
-
-#################### CONFIG
-configfile: "configs/snake_config_haplotype.yaml"
-
-
-#################### INCLUDES
-include: "../include/vcf-norm-truth.smk"
-
-
-#################### PARAMS
-HEAD_DIR = config["HEAD_DIR"]
-REF = config["REF"]
-SAMPLES = config["SAMPLES"]
-SEED = config["SEED"]
-
-
-#################### RULES
-rule all:
- input:
- simulated_haplotypes = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.fasta", s=SAMPLES),
- simulated_variants = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.vcf" , s=SAMPLES),
- simulated_norm_variants = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.normalized.sorted.vcf.gz" , s=SAMPLES),
- simulated_norm_variants_idx = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.normalized.sorted.vcf.gz.tbi" , s=SAMPLES)
-
-
-rule hap_simulator:
- input:
- ref = config["REF"]
- output:
- fasta = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.fasta",
- vcf = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.vcf",
- params:
- seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound for mason_variator
- conda:
- config["HEAD_DIR"] + "/env/conda_mason.yaml"
- log:
- config["HEAD_DIR"] + "/logs/mason/mason_variator.hap{sample}.log"
- shell:
- """
- mason_variator \
- --in-reference {input.ref} \
- --out-fasta {output.fasta} \
- --out-vcf {output.vcf} \
- --seed {params.seed} \
- --snp-rate 0.01 \
- --small-indel-rate 0.005 \
- --min-small-indel-size 1 \
- --max-small-indel-size 20 \
- --sv-indel-rate 0 \
- --sv-inversion-rate 0 \
- --sv-translocation-rate 0 \
- --sv-duplication-rate 0 \
- 2> {log}
- """
\ No newline at end of file
diff --git a/snakemake/include/vcf-norm-call.smk b/snakemake/include/vcf-norm-call.smk
deleted file mode 100644
index 30c736f..0000000
--- a/snakemake/include/vcf-norm-call.smk
+++ /dev/null
@@ -1,37 +0,0 @@
-from snakemake.utils import min_version
-min_version("6.0")
-
-module bcftools:
- snakefile:
- config["HEAD_DIR"] + "/snakemake/modules/bcftools/Snakefile"
-
-
-use rule bcftools_norm_noref from bcftools as norm_callset with:
- input:
- vcf = config["HEAD_DIR"] + "/data/simulated_hap{sample}/callset.vcf.gz"
- output:
- vcf = temp(config["HEAD_DIR"] + "/data/simulated_hap{sample}/callset.normalized.vcf.gz")
- conda:
- config["HEAD_DIR"] + "/env/conda_bcftools.yaml"
- log:
- config["HEAD_DIR"] + "/logs/bcftools_norm.hap{sample}.callset.log"
-
-
-use rule bcftools_sort from bcftools as sort_callset with:
- input:
- vcf = rules.norm_callset.output
- output:
- vcf = config["HEAD_DIR"] + "/data/simulated_hap{sample}/callset.normalized.sorted.vcf.gz"
- conda:
- config["HEAD_DIR"] + "/env/conda_bcftools.yaml"
- log:
- config["HEAD_DIR"] + "/logs/bcftools/bcftools_sort.hap{sample}.callset.log"
-
-
-use rule bcftools_index from bcftools as index_callset with:
- input:
- vcf = rules.sort_callset.output
- output:
- tbi = config["HEAD_DIR"] + "/data/simulated_hap{sample}/callset.normalized.sorted.vcf.gz.tbi"
- conda:
- config["HEAD_DIR"] + "/env/conda_bcftools.yaml"
\ No newline at end of file
diff --git a/snakemake/include/vcf-norm-truth.smk b/snakemake/include/vcf-norm-truth.smk
deleted file mode 100644
index d9fcc93..0000000
--- a/snakemake/include/vcf-norm-truth.smk
+++ /dev/null
@@ -1,38 +0,0 @@
-from snakemake.utils import min_version
-min_version("6.0")
-
-module bcftools:
- snakefile:
- config["HEAD_DIR"] + "/snakemake/modules/bcftools/Snakefile"
-
-
-use rule bcftools_norm from bcftools as norm_truthset with:
- input:
- vcf = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.vcf",
- ref = config["REF"]
- output:
- vcf = temp(config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.normalized.vcf")
- conda:
- config["HEAD_DIR"] + "/env/conda_bcftools.yaml"
- log:
- config["HEAD_DIR"] + "/logs/bcftools_norm.hap{sample}.truthset.log"
-
-
-use rule bcftools_sort from bcftools as sort_truthset with:
- input:
- vcf = rules.norm_truthset.output
- output:
- vcf = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.normalized.sorted.vcf.gz"
- conda:
- config["HEAD_DIR"] + "/env/conda_bcftools.yaml"
- log:
- config["HEAD_DIR"] + "/logs/bcftools_sort.hap{sample}.truthset.log"
-
-
-use rule bcftools_index from bcftools as index_truthset with:
- input:
- vcf = rules.sort_truthset.output
- output:
- tbi = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.normalized.sorted.vcf.gz.tbi"
- conda:
- config["HEAD_DIR"] + "/env/conda_bcftools.yaml"
\ No newline at end of file
diff --git a/snakemake/modules/bcftools/Snakefile b/snakemake/modules/bcftools/Snakefile
deleted file mode 100644
index 537e763..0000000
--- a/snakemake/modules/bcftools/Snakefile
+++ /dev/null
@@ -1,74 +0,0 @@
-from snakemake.utils import min_version
-min_version("6.0")
-
-
-rule bcftools_norm:
- input:
- ref = "genome.fasta",
- vcf = "variants.vcf"
- output:
- vcf = "variants.norm.vcf"
- conda:
- "bcftools.yaml"
- log:
- "bcftools.norm.log"
- shell:
- """
- bcftools norm \
- --fasta-ref {input.ref} \
- --check-ref s \
- --multiallelics -both \
- -o {output.vcf} \
- {input.vcf} \
- > {log}
- """
-
-
-rule bcftools_norm_noref:
- input:
- vcf = "variants.vcf"
- output:
- vcf = "variants.norm.vcf"
- conda:
- "bcftools.yaml"
- log:
- "bcftools.norm.noref.log"
- shell:
- """
- bcftools norm \
- --multiallelics -both \
- -o {output.vcf} \
- {input.vcf} \
- > {log}
- """
-
-
-rule bcftools_sort:
- input:
- vcf = "variants.vcf"
- output:
- vcf = "variants.sorted.vcf.gz"
- conda:
- "bcftools.yaml"
- log:
- "bcftools.sort.log"
- shell:
- """
- bcftools sort \
- -o {output.vcf} \
- -O z \
- {input.vcf}
- """
-
-
-rule bcftools_index:
- input:
- vcf = "variants.sorted.vcf.gz"
- output:
- tbi = "variants.sorted.vcf.gz.tbi"
- conda:
- "bcftools.yaml"
- shell:
- """
- bcftools index -t {input.vcf}
- """
\ No newline at end of file
diff --git a/snakemake/nanopore/Snakefile b/snakemake/nanopore/Snakefile
deleted file mode 100644
index 0c745e6..0000000
--- a/snakemake/nanopore/Snakefile
+++ /dev/null
@@ -1,67 +0,0 @@
-#################### PREAMBLE
-from snakemake.utils import min_version
-min_version("6.0")
-
-
-#################### CONFIG
-configfile: "configs/snake_config_nanopore.yaml"
-
-
-#################### INCLUDES
-
-
-#################### PARAMS
-HEAD_DIR = config["HEAD_DIR"]
-SAMPLES = config["SAMPLES"]
-DNA_TYPE = config["DNA_TYPE"]
-MODEL_PREFIX = config["MODEL_PREFIX"]
-MODEL_CALLER = config["MODEL_CALLER"]
-MEDIAN_LENGTH = config["MEDIAN_LENGTH"]
-SD_LENGTH = config["SD_LENGTH"]
-NB_READS = config["NB_READS"]
-SEED = config["SEED"]
-
-
-#################### RULES
-rule all:
- input:
- reads = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.nanopore_aligned_reads.fasta", s=SAMPLES),
- errors = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.nanopore_aligned_error_profile", s=SAMPLES)
-
-
-rule nanopore_read_simulator:
- input:
- simu_hap = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.fasta",
- model = expand(config["HEAD_DIR"] + "/" + config["MODEL_PREFIX"] + "{f}", f=["_aligned_reads.pkl", "_aligned_region.pkl", "_chimeric_info", "_error_markov_model", "_error_rate.tsv", "_first_match.hist", "_gap_length.pkl", "_ht_length.pkl", "_ht_ratio.pkl", "_match_markov_model", "_model_profile", "_reads_alignment_rate", "_strandness_rate", "_unaligned_length.pkl"])
- output:
- reads = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.nanopore_aligned_reads.fasta",
- errors = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.nanopore_aligned_error_profile"
- params:
- dna_type = config["DNA_TYPE"],
- model_caller = config["MODEL_CALLER"],
- model_prefix = config["MODEL_PREFIX"],
- median_length = config["MEDIAN_LENGTH"],
- sd_length = config["SD_LENGTH"],
- nb_reads = config["NB_READS"],
- seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound
- threads:
- workflow.cores
- conda:
- config["HEAD_DIR"] + "/env/conda_nanosim.yaml"
- log:
- config["HEAD_DIR"] + "/logs/nanosim/nanosim.hap{sample}.log"
- shell:
- """
- simulator.py genome \
- -dna_type {params.dna_type} \
- -rg {input.simu_hap} \
- -c {params.model_prefix} \
- --b {params.model_caller} \
- -med {params.median_length} \
- -sd {params.sd_length} \
- -n {params.nb_reads} \
- -o data/simulated_hap{wildcards.sample}/simulated.nanopore \
- --seed {params.seed} \
- -t {threads} \
- > {log}
- """
\ No newline at end of file
diff --git a/snakemake/ngs/Snakefile b/snakemake/ngs/Snakefile
deleted file mode 100644
index d130147..0000000
--- a/snakemake/ngs/Snakefile
+++ /dev/null
@@ -1,80 +0,0 @@
-#################### PREAMBLE
-from snakemake.utils import min_version
-min_version("6.0")
-
-
-#################### CONFIG
-configfile: "configs/snake_config_ngs.yaml"
-
-
-#################### INCLUDES
-
-
-#################### PARAMS
-HEAD_DIR = config["HEAD_DIR"]
-NGS_NB_FRAGS = config["NGS_NB_FRAGS"]
-SAMPLES = config["SAMPLES"]
-SEED = config["SEED"]
-
-#################### RULES
-rule all:
- input:
- simulated_read = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.ngs.{r}.fastq", s=SAMPLES, r=["R1", "R2"])
-
-
-rule ngs_read_simulator:
- input:
- simu_hap = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.fasta"
- output:
- r1 = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.ngs.R1.fastq",
- r2 = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.ngs.R2.fastq"
-# ra = temp(config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.bam")
- params:
- nb_frag = config["NGS_NB_FRAGS"],
- seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound
- threads:
- workflow.cores
- conda:
- config["HEAD_DIR"] + "/env/conda_mason.yaml"
- log:
- config["HEAD_DIR"] + "/logs/mason/mason_simulator.hap{sample}.log"
- shell:
- """
- mason_simulator \
- -ir {input} \
- -n {params.nb_frag} \
- -o {output.r1} \
- -or {output.r2} \
- --seed {params.seed} \
- --num-threads {threads} \
- --fragment-min-size 450 \
- --fragment-max-size 550 \
- --fragment-mean-size 500 \
- --fragment-size-std-dev 20 \
- --illumina-read-length 150
- """
-# --out-alignment {output.ra} \
-
-#rule ngs_read_sort:
-# input:
-# rules.ngs_simulator.output.ra
-# output:
-# config["HEAD_DIR"] + "/data/simulated_hap{samples}/simulated.sorted.bam"
-# conda:
-# config["HEAD_DIR"] + "/env/conda_bwa_and_samtools.yaml"
-# shell:
-# """
-# samtools sort -o {output} {input}
-# """
-
-#rule ngs_read_index:
-# input:
-# rules.ngs_read_sort.output
-# output:
-# config["HEAD_DIR"] + "/data-ci/simulated_hap{samples}/simulated.sorted.bam.bai"
-# conda:
-# config["HEAD_DIR"] + "/env/conda_bwa_and_samtools.yaml"
-# shell:
-# """
-# samtools index {input}
-# """
\ No newline at end of file
diff --git a/subworkflows/norm_vcf/main.nf b/subworkflows/norm_vcf/main.nf
new file mode 100644
index 0000000..0c2834a
--- /dev/null
+++ b/subworkflows/norm_vcf/main.nf
@@ -0,0 +1,18 @@
+// include modules
+include { BCFOOTLS_INDEX } from '../../modules/bcftools/index/main.nf'
+include { BCFOOTLS_NORM } from '../../modules/bcftools/norm/main.nf'
+include { BCFOOTLS_SORT } from '../../modules/bcftools/sort/main.nf'
+
+
+workflow NORM_VCF{
+ take:
+ vcffiles
+ reference_genome
+
+ main:
+ BCFOOTLS_NORM(vcffiles,reference_genome) | BCFOOTLS_SORT | BCFOOTLS_INDEX
+
+ emit:
+ ch_normed_sorted_vcffiles = BCFOOTLS_SORT.out
+ ch_index_of_vcffiles = BCFOOTLS_INDEX.out
+}
\ No newline at end of file