diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0d3be98..ff1b28b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -9,21 +9,20 @@ on: # designed as in: https://github.com/marketplace/actions/setup-miniconda jobs: CI: - name: CI tests using linux + name: CI (Linux) runs-on: "ubuntu-latest" defaults: run: shell: bash -el {0} steps: - - uses: actions/checkout@v2 - - uses: conda-incubator/setup-miniconda@v2 + - uses: actions/checkout@v4 + - uses: conda-incubator/setup-miniconda@v3 with: miniconda-version: "latest" - python-version: "3.11.3" - activate-environment: snakemake7 - environment-file: env/conda_snakemake7.yaml + activate-environment: nextflow + environment-file: env/conda_nxf.yml channels: conda-forge,bioconda,defaults - channel-priority: strict + channel-priority: true auto-activate-base: false - name: Test conda installation @@ -33,29 +32,22 @@ jobs: conda config --show-sources conda config --show - - name: Test snakemake installation + - name: Test nextflow installation run: | - snakemake --version + nextflow -version - name : Download reference run: | wget https://www.ebi.ac.uk/ena/browser/api/fasta/MN908947.3 sed 's/>ENA|MN908947|MN908947.3 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome./>MN908947.3/g' MN908947.3 > MN908947.3.fasta - - - name: Test CIEVaD principal functionality - run: | - python cievad.py --help + mkdir -p reference/Sars-Cov-2/Wuhan-Hu-1/ + mv MN908947.3.fasta reference/Sars-Cov-2/Wuhan-Hu-1/ - name: Test haplotype simulation run: | - python cievad.py hap -n 3 -r MN908947.3.fasta + nextflow run hap.nf -profile local,conda - - name: Test NGS simulation + - name: Test callset evaluation run: | - python cievad.py ngs -n 3 -f 1000 + nextflow run eval.nf -profile local,conda --callsets_dir aux/ci_data/ - - name: Test Nanopore simulation - run: | - python cievad.py nano -n 3 -r 100 - - diff --git a/.gitignore b/.gitignore index 157fdeb..823ec9f 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,8 @@ results/ aux/nanosim_model/human_NA12878_DNA_FAB49712_guppy.tar.gz -*.pyc \ No newline at end of file +*.pyc + +.nextflow.log* +.nextflow/ +work/ \ No newline at end of file diff --git a/README.md b/README.md index 704b2f0..219a94e 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,31 @@ ![Static Badge](https://img.shields.io/badge/requires-conda-blue) -![Static Badge](https://img.shields.io/badge/requires-snakemake-blue) +![Static Badge](https://img.shields.io/badge/requires-nextflow-blue) # CIEVaD -Continuous Integration and Evaluation for Variant Detection. This repository provides a tool suite for simple, streamlined and rapid creation and evaluation of genomic variant callsets. It is primarily designed for continuous integration of variant detection software and a plain containment check between sets of variants. The tools suite utilizes the _conda_ package management system and _Snakemake_ workflow language. +Continuous Integration and Evaluation for Variant Detection. This repository provides a tool suite for simple, streamlined and rapid creation and evaluation of genomic variant callsets. It is primarily designed for continuous integration of variant detection software and a plain containment check between sets of variants. The tools suite utilizes the _conda_ package management system and _nextflow_ workflow language. ## Contents: 1. [System requirements](#system-requirements) 2. [Installation](#installation) 3. [Usage](#usage) -4. [Help](#help) +4. [Output](#output) +5. [Help](#help) ## System requirements: -This tool suite was developed under Linux/UNIX and is the only officially supported operating system here. -Having any derivative of the `conda` package management system installed is the only strict system requirement. -Having a recent `snakemake` (≥6.0.0) and `python` (≥3.2) version installed is required too but both can be installed via conda (see [Installation](#installation)). +This tool suite was developed for Linux and is the only officially supported operating system here. +Having any derivative of the conda package management system installed is the only strict system requirement. +A recent version (≥20.04.0) of nextflow is required to execute the workflows, but can easily be installed via conda. +For an installation instruction of nextflow via conda see [Installation](#installation). -
🛠️ See tested setups: +
🛠️ See list of tested setups: | Requirement | Tested with | | --- | --- | -| 64 bits operating system | Ubuntu 20.04.5 LTS | -| [Conda](https://docs.conda.io/en/latest/) | vers. 23.5.0 | -| [Snakemake](https://snakemake.readthedocs.io/en/stable/) | vers. 7.25.3 | +| 64 bits Linux operating system | Ubuntu 20.04.5 LTS | +| [Conda](https://docs.conda.io/en/latest/) | vers. 23.5.0, 24.1.2| +| [Nextflow](https://nextflow.io/) | vers. 20.04.0, 23.10.1 |
@@ -32,43 +34,59 @@ Having a recent `snakemake` (≥6.0.0) and `python` (≥3.2) version installed i 1. Download the repository: ``` -git clone https://github.com/rki-mf1/imsmp-variant-calling-benchmark.git +git clone https://github.com/rki-mf1/cievad.git ``` -2. [Optional] Install Snakemake if not yet on your system. You can use the conda environment description file provided in this repository: +2. [Optional] Install nextflow if not yet on your system. For good practise you should use a new conda environment: ``` conda deactivate -conda env create -f env/conda_snakemake7.yaml -conda activate snakemake7 +conda create -n cievad -c bioconda nextflow +conda activate cievad ``` ## Usage: -This tool suite provides multiple workflows to generate synthetic sequencing data and evaluate sets of predicted variants (callsets). -A full list of workflows, their respective modules in the python command line interface (CLI) and a detailed description of input and output files can be found in this [wiki](https://github.com/rki-mf1/imsmp-variant-calling-benchmark/wiki) page of the repository. -The current list of principal functionality is: -* Generating synthetic haplotypes from a given reference genome -* Generating synthetic NGS reads from a given haplotype -* Generating synthetic amplicon sequences from a given reference genome and generating synthetic NGS reads from the amplicons -* Generating synthetic long-reads from a given haplotype -* Evaluate compliance between sets of variants - -The repository provides a simple CLI for a convenient application-like user experience with the underlying Snakemake workflows. -The CLI is started from the root directory via +This tool suite provides multiple functional features to generate synthetic sequencing data, generate sets of ground truth variants (truthsets) and evaluate sets of predicted variants (callsets). +There are two main workflows, `hap.nf` and `eval.nf`. +Both workflows are executed via the nextflow command line interface (CLI). +The current list and roadmap of principal functionality is: +* [x] Generating synthetic haplotypes from a given reference genome. This returns a haplotype sequence (FASTA) and its set of variants (VCF) with respect to the reference. +* [x] Generating synthetic NGS reads from a given haplotype +* [ ] Generating synthetic amplicon sequences from a given reference genome and generating synthetic reads from those amplicons +* [ ] Generating synthetic long-reads from a given haplotype +* [x] Evaluate compliance between sets of variants + +### Generating haplotype data +The minimal command to generate haplotype data is ``` -python cievad.py --help +nextflow run hap.py -profile local,conda ``` -and each individual module provides another help page via its sub-command + +### Evaluating variant calls +The minimal command to evaluate the accordance between a truthset (generated data) and a callset is ``` -python cievad.py --help +nextflow run eval.nf -profile local,conda --callsets_dir ``` +where `--callsets_dir` is the parameter to specify a folder containing the callset VCF files. +Currently, a callset within this folder has to follow the naming convention `callset_.vcf[.gz]` where _\_ is the integer of the corresponding truthset. +Callsets can optionally be _gzip_ compressed. + +🚧 For convenience, the `eval.nf` will get an option to provide a sample sheet as an alternative input format in the future.
⚠️ Run commands from the root directory Without further ado, please run the commands from a terminal at the top folder (root directory) of this repository. Otherwise relative paths within the workflows might be invalid.
+### Tuning the workflows via CLI parameters +\ + +### Tuning the workflows via the config file +\ + +## Output +\ ## Help: diff --git a/aux/ci_data/README.md b/aux/ci_data/README.md new file mode 100644 index 0000000..fe59605 --- /dev/null +++ b/aux/ci_data/README.md @@ -0,0 +1,3 @@ +# CI Data + +(10.04.2024) The `callset_{1,2,3}.vcf.gz` are renamed but original `hap{1,2,3}.filtered.gt_adjust.filtered_indels.vcf.gz` VCF files containing variants from the CovPipe2 workflow using default parameters. diff --git a/aux/ci_data/callset_1.vcf.gz b/aux/ci_data/callset_1.vcf.gz new file mode 100644 index 0000000..ed01b66 Binary files /dev/null and b/aux/ci_data/callset_1.vcf.gz differ diff --git a/aux/ci_data/callset_2.vcf.gz b/aux/ci_data/callset_2.vcf.gz new file mode 100644 index 0000000..cb77477 Binary files /dev/null and b/aux/ci_data/callset_2.vcf.gz differ diff --git a/aux/ci_data/callset_3.vcf.gz b/aux/ci_data/callset_3.vcf.gz new file mode 100644 index 0000000..f99a80d Binary files /dev/null and b/aux/ci_data/callset_3.vcf.gz differ diff --git a/bin/SURVIVOR b/bin/SURVIVOR deleted file mode 100755 index 00725eb..0000000 Binary files a/bin/SURVIVOR and /dev/null differ diff --git a/bin/amplisim-v0_1_0-ubuntu_20_04 b/bin/amplisim-v0_1_0-ubuntu_20_04 deleted file mode 100755 index 4e35900..0000000 Binary files a/bin/amplisim-v0_1_0-ubuntu_20_04 and /dev/null differ diff --git a/bin/mason_simulator b/bin/mason_simulator deleted file mode 100755 index 4de06ea..0000000 Binary files a/bin/mason_simulator and /dev/null differ diff --git a/cievad.py b/cievad.py deleted file mode 100644 index 859e287..0000000 --- a/cievad.py +++ /dev/null @@ -1,264 +0,0 @@ -# ---------------------------------------------------------------------------------------- -# SETUP -# ---------------------------------------------------------------------------------------- -import os -import sys -import argparse -import time -from python.runWorkflows import run_hap, run_ngs, run_ampli, run_nanopore, run_eval - -if sys.version_info.major != 3: - print("Error: Abort: This UI requires python3.") - exit(1) - - -# ---------------------------------------------------------------------------------------- -# PARSER -# ---------------------------------------------------------------------------------------- -if __name__ == "__main__": - __version_info__ = ('0','1','0') - __version__ = '.'.join(__version_info__) - - parser = argparse.ArgumentParser( - prog='cievad', - description='CIEVaD - A tool suite to facilitate continuous integration and evaluation of variant detection.', - epilog='For more help and bug reports please refer to the GitHub repository.') - parser.add_argument('--version', action='version', version="%(prog)s ("+__version__+")") - - subparsers = parser.add_subparsers(help='sub-command help', dest='command') - - # ---------------- - # SUB PARSERS | - # ---------------- - - # parser for haplotype simulation - parser_hap = subparsers.add_parser('hap', - help='Module to generate haplotypes from a given reference.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser_hap.add_argument( - '-s', '--snakefile', - help='Path to the Snakefile.', - default='snakemake/hap/Snakefile') - parser_hap_group1 = parser_hap.add_argument_group('Run with config', 'Use a config file (yaml) to generate haplotypes.') - parser_hap_group1.add_argument( - '-c', '--config', - metavar='FILE', - default = None, - help='Path to a config file for the snakemake pipeline.') - parser_hap_group2 = parser_hap.add_argument_group('Run with parameter', 'Specify parameters to generate haplotypes.') - parser_hap_group2.add_argument( - '-d', '--head-dir', - metavar='DIR', - default= os.path.realpath(os.path.dirname(__file__)), - help='Root directory path.') - parser_hap_group2.add_argument( - '-n', '--nb-samples', - type = int, - metavar='INT', - default = 10, - help='Specify the number of samples to be simulated.') - parser_hap_group2.add_argument( - '--seed', - type = int, - metavar='INT', - default = int(round(time.time())), - help='Specify a random seed. Default is current system time in seconds.') - parser_hap_group2.add_argument( - '-r', '--reference', - metavar='FASTA', - help='Path to reference genome.') - parser_hap.set_defaults(func=run_hap) - - # parser for NGS read simulation - parser_ngs = subparsers.add_parser('ngs', - help='Module to generate NGS reads from a given reference.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser_ngs.add_argument( - '-s', '--snakefile', - help='Path to the Snakefile.', - default='snakemake/ngs/Snakefile') - parser_ngs.add_argument( - '-t', '--threads', - help='Number of CPU threads for the task.', - metavar='INT', - default = 1) - parser_ngs_group1 = parser_ngs.add_argument_group('Run with config', 'Use a config file (yaml) to generate NGS reads.') - parser_ngs_group1.add_argument( - '-c', '--config', - metavar='FILE', - default = None, - help='Path to a config file for the snakemake pipeline.') - parser_ngs_group2 = parser_ngs.add_argument_group('Run with parameter', 'Specify parameters to generate NGS reads.') - parser_ngs_group2.add_argument( - '-d', '--head-dir', - metavar='DIR', - default= os.path.realpath(os.path.dirname(__file__)), - help='Root directory path.') - parser_ngs_group2.add_argument( - '-n', '--nb-samples', - type = int, - metavar='INT', - default = 10, - help='Specify the number of samples to be simulated.') - parser_ngs_group2.add_argument( - '--seed', - type = int, - metavar='INT', - default = int(round(time.time())), - help='Specify a random seed. Default is current system time in seconds.') - parser_ngs_group2.add_argument( - '-f', '--nb-frags', - type = int, - metavar='INT', - default = 3000, - help='Specify the number of genomic fragments used for the reads simulation. This INT*2 will result in the total number of NGS reads.') - parser_ngs.set_defaults(func=run_ngs) - - # parser for generating amplicons and NGS reads - parser_ampli = subparsers.add_parser('ampli', - help='Module to generate amplicons and NGS reads from a given reference.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser_ampli.add_argument( - '-s', '--snakefile', - help='Path to the Snakefile.', - default='snakemake/amplicon/Snakefile') - parser_ampli_group1 = parser_ampli.add_argument_group('Run with config', 'Use a config file (yaml) to generate amplicons and NGS reads.') - parser_ampli_group1.add_argument( - '-c', '--config', - metavar='FILE', - default = None, - help='Path to a config file for the snakemake pipeline.') - parser_ampli_group2 = parser_ampli.add_argument_group('Run with parameter', 'Specify parameters to generate amplicons and NGS reads.') - parser_ampli_group2.add_argument( - '-d', '--head-dir', - metavar='DIR', - default= os.path.realpath(os.path.dirname(__file__)), - help='Root directory path.') - parser_ampli_group2.add_argument( - '-n', '--nb-samples', - type = int, - metavar='INT', - default = 10, - help='Specify the number of samples to be simulated.') - parser_ampli_group2.add_argument( - '--seed', - type = int, - metavar='INT', - default = int(round(time.time())), - help='Specify a random seed. Default is current system time in seconds.') - parser_ampli_group2.add_argument( - '-r', '--reference', - metavar='FASTA', - help='Path to reference genome.') - parser_ampli_group2.add_argument( - '-p', '--primers', - metavar = 'BED', - help='Path to primer file.') - parser_ampli.set_defaults(func=run_ampli) - - # parser for generating nanopore reads - parser_nanopore = subparsers.add_parser('nano', - help='Module to generate Oxford-Nanopore-style long reads from a given reference.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser_nanopore.add_argument( - '-s', '--snakefile', - help='Path to the Snakefile.', - default='snakemake/nanopore/Snakefile') - parser_nanopore.add_argument( - '-t', '--threads', - help='Number of CPU threads for the task.', - metavar='INT', - default = 1) - parser_nanopore_group1 = parser_nanopore.add_argument_group('Run with config', 'Use a config file (yaml) to generate ONT-style long reads.') - parser_nanopore_group1.add_argument( - '-c', '--config', - metavar='FILE', - default = None, - help='Path to a config file for the snakemake pipeline.') - parser_nanopore_group2 = parser_nanopore.add_argument_group('Run with parameter', 'Specify parameters to generate ONT-style long reads.') - parser_nanopore_group2.add_argument( - '-d', '--head-dir', - metavar='DIR', - default= os.path.realpath(os.path.dirname(__file__)), - help='Root directory path.') - parser_nanopore_group2.add_argument( - '-n', '--nb-samples', - type = int, - metavar='INT', - default = 10, - help='Specify the number of samples to be simulated.') - parser_nanopore_group2.add_argument( - '--seed', - type = int, - metavar='INT', - default = int(round(time.time())), - help='Specify a random seed. Default is current system time in seconds.') - parser_nanopore_group2.add_argument( - '-m', '--model-prefix', - metavar='STR', - default = 'aux/nanosim_model/human_NA12878_DNA_FAB49712_guppy/training', - help='Specify a path (relative to the HEAD_DIR) to the prefix of a nanosim model.') - parser_nanopore_group2.add_argument( - '-g', '--model-caller', - metavar='STR', - default = 'guppy', - help='Specify a caller of the nanosim model specified with -m.') - parser_nanopore_group2.add_argument( - '-y', '--dna-type', - metavar='STR', - default = 'linear', - help='Specify a dna type for the nanosim simulator.') - parser_nanopore_group2.add_argument( - '-l', '--median-length', - type = int, - metavar='INT', - default = 5000, - help='Specify a median read length for the nanosim simulator.') - parser_nanopore_group2.add_argument( - '-a', '--sd-length', - type = float, - metavar='FLOAT', - default = 1.05, - help='Specify a standard deviation of the read length for the nanosim simulator.') - parser_nanopore_group2.add_argument( - '-r', '--nb-reads', - type = int, - metavar='INT', - default = 180, - help='Specify the number of long reads to be simulated per sample.') - parser_nanopore.set_defaults(func=run_nanopore) - - # parser for variant evaluation - parser_eval = subparsers.add_parser('eval', - help='Module for variant set evaluation.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser_eval.add_argument( - '-s', '--snakefile', - help='Path to the Snakefile.', - default='snakemake/eval/Snakefile') - parser_eval_group1 = parser_eval.add_argument_group('Run with config', 'Use a config file (yaml) to evaluate a variant callset.') - parser_eval_group1.add_argument( - '-c', '--config', - metavar='FILE', - default = None, - help='Path to a config file for the snakemake pipeline.') - parser_eval_group2 = parser_eval.add_argument_group('Run with parameter', 'Specify parameters to evaluate a variant callset.') - parser_eval_group2.add_argument( - '-d', '--head-dir', - metavar='DIR', - default= os.path.realpath(os.path.dirname(__file__)), - help='Root directory path.') - parser_eval_group2.add_argument( - '-n', '--nb-samples', - type = int, - metavar='INT', - default = 10, - help='Specify the number of samples to be simulated.') - parser_eval.set_defaults(func=run_eval) - - # --------------- - # PARSE ARGS | - # --------------- - - args = parser.parse_args() - args.func(args) if len(sys.argv)>1 else print("Error: Abort: Too few arguments. See help page: python vc_benchmark.py --help") \ No newline at end of file diff --git a/env/conda_bcftools.yaml b/env/conda_bcftools.yaml deleted file mode 100644 index a27495b..0000000 --- a/env/conda_bcftools.yaml +++ /dev/null @@ -1,38 +0,0 @@ -name: bcftools -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_gnu - - bcftools=1.17=h3cc50cf_1 - - bzip2=1.0.8=h7f98852_4 - - c-ares=1.19.1=hd590300_0 - - ca-certificates=2023.5.7=hbcca054_0 - - gsl=2.7=he838d99_0 - - htslib=1.17=h81da01d_2 - - keyutils=1.6.1=h166bdaf_0 - - krb5=1.20.1=h81ceb04_0 - - libblas=3.9.0=17_linux64_openblas - - libcblas=3.9.0=17_linux64_openblas - - libcurl=8.1.2=h409715c_0 - - libdeflate=1.18=h0b41bf4_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=h516909a_1 - - libgcc-ng=13.1.0=he5830b7_0 - - libgfortran-ng=13.1.0=h69a702a_0 - - libgfortran5=13.1.0=h15d22d2_0 - - libgomp=13.1.0=he5830b7_0 - - libnghttp2=1.52.0=h61bc06f_0 - - libnsl=2.0.0=h7f98852_0 - - libopenblas=0.3.23=pthreads_h80387f5_0 - - libssh2=1.11.0=h0841786_0 - - libstdcxx-ng=13.1.0=hfd8a6a1_0 - - libzlib=1.2.13=hd590300_5 - - ncurses=6.4=hcb278e6_0 - - openssl=3.1.1=hd590300_1 - - perl=5.32.1=2_h7f98852_perl5 - - xz=5.2.6=h166bdaf_0 - - zlib=1.2.13=hd590300_5 - - zstd=1.5.2=h3eb15da_6 diff --git a/env/conda_bwa_and_samtools.yaml b/env/conda_bwa_and_samtools.yaml deleted file mode 100644 index 2b9f6ad..0000000 --- a/env/conda_bwa_and_samtools.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: samtools -channels: - - bioconda - - defaults -dependencies: - - _libgcc_mutex=0.1=main - - _openmp_mutex=5.1=1_gnu - - bwa=0.7.17=h5bf99c6_8 - - bzip2=1.0.8=h7b6447c_0 - - c-ares=1.19.0=h5eee18b_0 - - ca-certificates=2023.01.10=h06a4308_0 - - curl=7.88.1=h5eee18b_0 - - gdbm=1.18=hd4cb3f1_4 - - krb5=1.19.4=h568e23c_0 - - libcurl=7.88.1=h91b91d3_0 - - libedit=3.1.20221030=h5eee18b_0 - - libev=4.33=h7f8727e_1 - - libgcc-ng=11.2.0=h1234567_1 - - libgomp=11.2.0=h1234567_1 - - libnghttp2=1.46.0=hce63b2e_0 - - libssh2=1.10.0=h8f2d780_0 - - libstdcxx-ng=11.2.0=h1234567_1 - - ncurses=6.4=h6a678d5_0 - - openssl=1.1.1t=h7f8727e_0 - - perl=5.34.0=h5eee18b_2 - - readline=8.2=h5eee18b_0 - - samtools=1.6=hb116620_7 - - xz=5.2.10=h5eee18b_1 - - zlib=1.2.13=h5eee18b_0 diff --git a/env/conda_ci.yaml b/env/conda_ci.yaml deleted file mode 100644 index 3bde0a5..0000000 --- a/env/conda_ci.yaml +++ /dev/null @@ -1,72 +0,0 @@ -name: sc2-gr-ci -channels: - - bioconda - - conda-forge - - defaults -dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_gnu - - bcftools=1.17=h3cc50cf_1 - - bzip2=1.0.8=h7f98852_4 - - c-ares=1.19.1=hd590300_0 - - ca-certificates=2023.5.7=hbcca054_0 - - certifi=2016.9.26=py36_0 - - docutils=0.16=py36h5fab9bb_3 - - dropbox=5.2.1=py36_0 - - ecdsa=0.18.0=pyhd8ed1ab_0 - - filechunkio=1.6=py36_0 - - ftputil=3.2=py36_0 - - gsl=2.7=he838d99_0 - - htslib=1.17=h6bc39ce_1 - - keyutils=1.6.1=h166bdaf_0 - - krb5=1.19.3=h3790be6_0 - - ld_impl_linux-64=2.40=h41732ed_0 - - libblas=3.9.0=16_linux64_openblas - - libcblas=3.9.0=16_linux64_openblas - - libcurl=7.87.0=h91b91d3_0 - - libdeflate=1.18=h0b41bf4_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=h516909a_1 - - libffi=3.4.2=h7f98852_5 - - libgcc-ng=12.2.0=h65d4601_19 - - libgfortran-ng=12.2.0=h69a702a_19 - - libgfortran5=12.2.0=h337968e_19 - - libgomp=12.2.0=h65d4601_19 - - liblapack=3.9.0=16_linux64_openblas - - libnghttp2=1.46.0=hce63b2e_0 - - libnsl=2.0.0=h7f98852_0 - - libopenblas=0.3.21=pthreads_h78a6416_3 - - libsqlite=3.42.0=h2797004_0 - - libssh2=1.10.0=haa6b8db_3 - - libstdcxx-ng=12.2.0=h46fd767_19 - - libzlib=1.2.13=h166bdaf_4 - - mason=2.0.9=h9ee0642_1 - - ncurses=6.3=h27087fc_1 - - numpy=1.19.5=py36hfc0c790_2 - - openssl=1.1.1u=hd590300_0 - - pandas=1.1.5=py36h284efc9_0 - - paramiko=1.18.2=py36_0 - - perl=5.32.1=2_h7f98852_perl5 - - pip=20.0.2=py36_1 - - psutil=4.4.2=py36_0 - - pycrypto=2.6.1=py36he6145b8_1005 - - pysftp=0.2.9=py36_0 - - python=3.6.15=hb7a2778_0_cpython - - python-dateutil=2.8.2=pyhd8ed1ab_0 - - python_abi=3.6=2_cp36m - - pytz=2023.3=pyhd8ed1ab_0 - - pyyaml=5.4.1=py36h8f6f2f9_1 - - readline=8.2=h8228510_1 - - requests=2.12.5=py36_0 - - samtools=1.17=hd87286a_1 - - setuptools=49.6.0=py36h5fab9bb_3 - - six=1.16.0=pyh6c4a22f_0 - - snakemake=3.13.3=py36_0 - - sqlite=3.42.0=h2c6b66d_0 - - tk=8.6.12=h27826a3_0 - - urllib3=1.12=py36_0 - - wheel=0.36.2=pyhd3deb0d_0 - - wrapt=1.12.1=py36h8f6f2f9_3 - - xz=5.2.6=h166bdaf_0 - - yaml=0.2.5=h7f98852_2 - - zlib=1.2.13=h166bdaf_4 diff --git a/env/conda_freebayes.yaml b/env/conda_freebayes.yaml deleted file mode 100644 index 887ca6c..0000000 --- a/env/conda_freebayes.yaml +++ /dev/null @@ -1,49 +0,0 @@ -name: freebayes -channels: - - bioconda - - conda-forge - - defaults -dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_gnu - - bc=1.07.1=h7f98852_0 - - bzip2=1.0.8=h7f98852_4 - - c-ares=1.19.0=hd590300_0 - - ca-certificates=2023.5.7=hbcca054_0 - - freebayes=1.3.6=h6f59eb7_3 - - htslib=1.17=h81da01d_2 - - keyutils=1.6.1=h166bdaf_0 - - krb5=1.20.1=h81ceb04_0 - - ld_impl_linux-64=2.40=h41732ed_0 - - libcurl=8.1.0=h409715c_0 - - libdeflate=1.18=h0b41bf4_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=h516909a_1 - - libexpat=2.5.0=hcb278e6_1 - - libffi=3.4.2=h7f98852_5 - - libgcc-ng=12.2.0=h65d4601_19 - - libgomp=12.2.0=h65d4601_19 - - libnghttp2=1.52.0=h61bc06f_0 - - libnsl=2.0.0=h7f98852_0 - - libsqlite=3.42.0=h2797004_0 - - libssh2=1.10.0=hf14f497_3 - - libstdcxx-ng=12.2.0=h46fd767_19 - - libuuid=2.38.1=h0b41bf4_0 - - libzlib=1.2.13=h166bdaf_4 - - ncurses=6.3=h27087fc_1 - - openssl=3.1.0=hd590300_3 - - parallel=20170422=pl5.22.0_0 - - perl=5.22.0.1=0 - - pip=23.1.2=pyhd8ed1ab_0 - - python=3.11.3=h2755cc3_0_cpython - - readline=8.2=h8228510_1 - - samtools=1.17=hd87286a_1 - - setuptools=67.7.2=pyhd8ed1ab_0 - - tabixpp=1.1.0=h6448e42_12 - - tk=8.6.12=h27826a3_0 - - tzdata=2023c=h71feb2d_0 - - vcflib=1.0.3=h6b7c446_3 - - wheel=0.40.0=pyhd8ed1ab_0 - - xz=5.2.6=h166bdaf_0 - - zlib=1.2.13=h166bdaf_4 - - zstd=1.5.2=h3eb15da_6 diff --git a/env/conda_mason.yaml b/env/conda_mason.yaml deleted file mode 100644 index 6e5d7fe..0000000 --- a/env/conda_mason.yaml +++ /dev/null @@ -1,7 +0,0 @@ -name: mason -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - mason=2.0.9=h9ee0642_1 diff --git a/env/conda_nanosim.yaml b/env/conda_nanosim.yaml deleted file mode 100644 index 478b3f2..0000000 --- a/env/conda_nanosim.yaml +++ /dev/null @@ -1,138 +0,0 @@ -name: nanosim -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_gnu - - bedtools=2.31.0=hf5e1c6e_2 - - brotli=1.0.9=h166bdaf_9 - - brotli-bin=1.0.9=h166bdaf_9 - - brotli-python=1.0.9=py38hfa26641_9 - - bzip2=1.0.8=h7f98852_4 - - c-ares=1.19.1=hd590300_0 - - ca-certificates=2023.5.7=hbcca054_0 - - cairo=1.16.0=hbbf8b49_1016 - - certifi=2023.5.7=pyhd8ed1ab_0 - - charset-normalizer=3.1.0=pyhd8ed1ab_0 - - contourpy=1.1.0=py38h7f3f72f_0 - - cycler=0.11.0=pyhd8ed1ab_0 - - expat=2.5.0=hcb278e6_1 - - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 - - font-ttf-inconsolata=3.000=h77eed37_0 - - font-ttf-source-code-pro=2.038=h77eed37_0 - - font-ttf-ubuntu=0.83=hab24e00_0 - - fontconfig=2.14.2=h14ed4e7_0 - - fonts-conda-ecosystem=1=0 - - fonts-conda-forge=1=0 - - fonttools=4.40.0=py38h01eb140_0 - - freetype=2.12.1=hca18f0e_1 - - fribidi=1.0.10=h36c2ea0_0 - - genometools-genometools=1.6.2=py38hf7b97cc_6 - - gettext=0.21.1=h27087fc_0 - - graphite2=1.3.13=h58526e2_1001 - - harfbuzz=7.3.0=hdb3a94d_0 - - htseq=2.0.3=py38h8c35140_1 - - htslib=1.17=h81da01d_2 - - icu=72.1=hcb278e6_0 - - idna=3.4=pyhd8ed1ab_0 - - importlib-resources=5.12.0=pyhd8ed1ab_0 - - importlib_resources=5.12.0=pyhd8ed1ab_0 - - joblib=1.3.0=pyhd8ed1ab_1 - - k8=0.2.5=hdcf5f25_4 - - keyutils=1.6.1=h166bdaf_0 - - kiwisolver=1.4.4=py38h43d8883_1 - - krb5=1.20.1=h81ceb04_0 - - last=1454=h5b5514e_0 - - lcms2=2.15=haa2dc70_1 - - ld_impl_linux-64=2.40=h41732ed_0 - - lerc=4.0.0=h27087fc_0 - - libblas=3.9.0=17_linux64_openblas - - libbrotlicommon=1.0.9=h166bdaf_9 - - libbrotlidec=1.0.9=h166bdaf_9 - - libbrotlienc=1.0.9=h166bdaf_9 - - libcblas=3.9.0=17_linux64_openblas - - libcurl=8.1.2=h409715c_0 - - libdeflate=1.18=h0b41bf4_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=h516909a_1 - - libexpat=2.5.0=hcb278e6_1 - - libffi=3.4.2=h7f98852_5 - - libgcc-ng=13.1.0=he5830b7_0 - - libgfortran-ng=13.1.0=h69a702a_0 - - libgfortran5=13.1.0=h15d22d2_0 - - libglib=2.76.3=hebfc3b9_0 - - libgomp=13.1.0=he5830b7_0 - - libiconv=1.17=h166bdaf_0 - - libjpeg-turbo=2.1.5.1=h0b41bf4_0 - - liblapack=3.9.0=17_linux64_openblas - - libnghttp2=1.52.0=h61bc06f_0 - - libnsl=2.0.0=h7f98852_0 - - libopenblas=0.3.23=pthreads_h80387f5_0 - - libpng=1.6.39=h753d276_0 - - libsqlite=3.42.0=h2797004_0 - - libssh2=1.11.0=h0841786_0 - - libstdcxx-ng=13.1.0=hfd8a6a1_0 - - libtiff=4.5.1=h8b53f26_0 - - libuuid=2.38.1=h0b41bf4_0 - - libwebp-base=1.3.1=hd590300_0 - - libxcb=1.15=h0b41bf4_0 - - libzlib=1.2.13=hd590300_5 - - matplotlib-base=3.7.1=py38hd6c3c57_0 - - minimap2=2.26=he4a0461_1 - - munkres=1.1.4=pyh9f0ad1d_0 - - nanosim=3.1.0=hdfd78af_0 - - ncurses=6.4=hcb278e6_0 - - numpy=1.24.4=py38h59b608b_0 - - openjpeg=2.5.0=hfec8fc6_2 - - openssl=3.1.1=hd590300_1 - - packaging=23.1=pyhd8ed1ab_0 - - pandas=2.0.3=py38h01efb38_0 - - pango=1.50.14=heaa33ce_1 - - parallel=20230522=ha770c72_0 - - pcre2=10.40=hc3806b6_0 - - perl=5.32.1=3_hd590300_perl5 - - pillow=10.0.0=py38h885162f_0 - - pip=23.1.2=pyhd8ed1ab_0 - - pixman=0.40.0=h36c2ea0_0 - - platformdirs=3.8.0=pyhd8ed1ab_0 - - pooch=1.7.0=pyha770c72_3 - - pthread-stubs=0.4=h36c2ea0_1001 - - pybedtools=0.9.0=py38he0f268d_2 - - pyparsing=3.1.0=pyhd8ed1ab_0 - - pysam=0.21.0=py38h15b938a_1 - - pysocks=1.7.1=pyha2e5f31_6 - - python=3.8.17=he550d4f_0_cpython - - python-dateutil=2.8.2=pyhd8ed1ab_0 - - python-tzdata=2023.3=pyhd8ed1ab_0 - - python_abi=3.8=3_cp38 - - pytz=2023.3=pyhd8ed1ab_0 - - readline=8.2=h8228510_1 - - requests=2.31.0=pyhd8ed1ab_0 - - samtools=1.17=hd87286a_1 - - scikit-learn=0.22.1=py38hcdab131_1 - - scipy=1.10.1=py38h59b608b_3 - - setuptools=68.0.0=pyhd8ed1ab_0 - - six=1.16.0=pyh6c4a22f_0 - - tk=8.6.12=h27826a3_0 - - typing-extensions=4.7.1=hd8ed1ab_0 - - typing_extensions=4.7.1=pyha770c72_0 - - unicodedata2=15.0.0=py38h0a891b7_0 - - urllib3=2.0.3=pyhd8ed1ab_1 - - wheel=0.40.0=pyhd8ed1ab_0 - - xorg-kbproto=1.0.7=h7f98852_1002 - - xorg-libice=1.1.1=hd590300_0 - - xorg-libsm=1.2.4=h7391055_0 - - xorg-libx11=1.8.6=h8ee46fc_0 - - xorg-libxau=1.0.11=hd590300_0 - - xorg-libxdmcp=1.1.3=h7f98852_0 - - xorg-libxext=1.3.4=h0b41bf4_2 - - xorg-libxrender=0.9.11=hd590300_0 - - xorg-renderproto=0.11.1=h7f98852_1002 - - xorg-xextproto=7.3.0=h0b41bf4_1003 - - xorg-xproto=7.0.31=h7f98852_1007 - - xz=5.2.6=h166bdaf_0 - - zipp=3.15.0=pyhd8ed1ab_0 - - zlib=1.2.13=hd590300_5 - - zstd=1.5.2=h3eb15da_6 diff --git a/env/conda_nxf.yml b/env/conda_nxf.yml new file mode 100644 index 0000000..a3d5c41 --- /dev/null +++ b/env/conda_nxf.yml @@ -0,0 +1,82 @@ +name: nextflow +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_gnu + - alsa-lib=1.2.10=hd590300_0 + - bzip2=1.0.8=hd590300_5 + - c-ares=1.26.0=hd590300_0 + - ca-certificates=2024.2.2=hbcca054_0 + - cairo=1.18.0=h3faef2a_0 + - coreutils=9.4=hd590300_0 + - curl=8.5.0=hca28451_0 + - expat=2.5.0=hcb278e6_1 + - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 + - font-ttf-inconsolata=3.000=h77eed37_0 + - font-ttf-source-code-pro=2.038=h77eed37_0 + - font-ttf-ubuntu=0.83=h77eed37_1 + - fontconfig=2.14.2=h14ed4e7_0 + - fonts-conda-ecosystem=1=0 + - fonts-conda-forge=1=0 + - freetype=2.12.1=h267a509_2 + - gettext=0.21.1=h27087fc_0 + - giflib=5.2.1=h0b41bf4_3 + - graphite2=1.3.13=h58526e2_1001 + - harfbuzz=8.3.0=h3d44ed6_0 + - icu=73.2=h59595ed_0 + - keyutils=1.6.1=h166bdaf_0 + - krb5=1.21.2=h659d440_0 + - lcms2=2.16=hb7c19ff_0 + - lerc=4.0.0=h27087fc_0 + - libcups=2.3.3=h4637d8d_4 + - libcurl=8.5.0=hca28451_0 + - libdeflate=1.19=hd590300_0 + - libedit=3.1.20191231=he28a2e2_2 + - libev=4.33=hd590300_2 + - libexpat=2.5.0=hcb278e6_1 + - libffi=3.4.2=h7f98852_5 + - libgcc-ng=13.2.0=h807b86a_5 + - libglib=2.78.3=h783c2da_0 + - libgomp=13.2.0=h807b86a_5 + - libiconv=1.17=hd590300_2 + - libjpeg-turbo=3.0.0=hd590300_1 + - libnghttp2=1.58.0=h47da74e_1 + - libpng=1.6.42=h2797004_0 + - libssh2=1.11.0=h0841786_0 + - libstdcxx-ng=13.2.0=h7e041cc_5 + - libtiff=4.6.0=ha9c0a0a_2 + - libuuid=2.38.1=h0b41bf4_0 + - libwebp-base=1.3.2=hd590300_0 + - libxcb=1.15=h0b41bf4_0 + - libzlib=1.2.13=hd590300_5 + - ncurses=6.4=h59595ed_2 + - nextflow=23.10.1=hdfd78af_0 + - openjdk=17.0.10=h4260e57_0 + - openssl=3.2.1=hd590300_0 + - pcre2=10.42=hcad00b1_0 + - pixman=0.43.2=h59595ed_0 + - pthread-stubs=0.4=h36c2ea0_1001 + - xorg-fixesproto=5.0=h7f98852_1002 + - xorg-inputproto=2.3.2=h7f98852_1002 + - xorg-kbproto=1.0.7=h7f98852_1002 + - xorg-libice=1.1.1=hd590300_0 + - xorg-libsm=1.2.4=h7391055_0 + - xorg-libx11=1.8.7=h8ee46fc_0 + - xorg-libxau=1.0.11=hd590300_0 + - xorg-libxdmcp=1.1.3=h7f98852_0 + - xorg-libxext=1.3.4=h0b41bf4_2 + - xorg-libxfixes=5.0.3=h7f98852_1004 + - xorg-libxi=1.7.10=h7f98852_0 + - xorg-libxrender=0.9.11=hd590300_0 + - xorg-libxt=1.3.0=hd590300_1 + - xorg-libxtst=1.2.3=h7f98852_1002 + - xorg-recordproto=1.14.2=h7f98852_1002 + - xorg-renderproto=0.11.1=h7f98852_1002 + - xorg-xextproto=7.3.0=h0b41bf4_1003 + - xorg-xproto=7.0.31=h7f98852_1007 + - xz=5.2.6=h166bdaf_0 + - zlib=1.2.13=hd590300_5 + - zstd=1.5.5=hfc55251_0 diff --git a/env/conda_picard.yaml b/env/conda_picard.yaml deleted file mode 100644 index 4a044ba..0000000 --- a/env/conda_picard.yaml +++ /dev/null @@ -1,110 +0,0 @@ -name: picard -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_gnu - - _r-mutex=1.0.1=anacondar_1 - - alsa-lib=1.2.9=hd590300_0 - - binutils_impl_linux-64=2.40=hf600244_0 - - bwidget=1.9.14=ha770c72_1 - - bzip2=1.0.8=h7f98852_4 - - c-ares=1.19.1=hd590300_0 - - ca-certificates=2023.5.7=hbcca054_0 - - cairo=1.16.0=hbbf8b49_1016 - - curl=8.1.2=h409715c_0 - - expat=2.5.0=hcb278e6_1 - - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 - - font-ttf-inconsolata=3.000=h77eed37_0 - - font-ttf-source-code-pro=2.038=h77eed37_0 - - font-ttf-ubuntu=0.83=hab24e00_0 - - fontconfig=2.14.2=h14ed4e7_0 - - fonts-conda-ecosystem=1=0 - - fonts-conda-forge=1=0 - - freetype=2.12.1=hca18f0e_1 - - fribidi=1.0.10=h36c2ea0_0 - - gcc_impl_linux-64=13.1.0=hc4be1a9_0 - - gettext=0.21.1=h27087fc_0 - - gfortran_impl_linux-64=13.1.0=hd511a9b_0 - - giflib=5.2.1=h0b41bf4_3 - - graphite2=1.3.13=h58526e2_1001 - - gsl=2.7=he838d99_0 - - gxx_impl_linux-64=13.1.0=hc4be1a9_0 - - harfbuzz=7.3.0=hdb3a94d_0 - - icu=72.1=hcb278e6_0 - - kernel-headers_linux-64=2.6.32=he073ed8_15 - - keyutils=1.6.1=h166bdaf_0 - - krb5=1.20.1=h81ceb04_0 - - lcms2=2.15=haa2dc70_1 - - ld_impl_linux-64=2.40=h41732ed_0 - - lerc=4.0.0=h27087fc_0 - - libblas=3.9.0=17_linux64_openblas - - libcblas=3.9.0=17_linux64_openblas - - libcups=2.3.3=h36d4200_3 - - libcurl=8.1.2=h409715c_0 - - libdeflate=1.18=h0b41bf4_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=h516909a_1 - - libexpat=2.5.0=hcb278e6_1 - - libffi=3.4.2=h7f98852_5 - - libgcc-devel_linux-64=13.1.0=he3cc6c4_0 - - libgcc-ng=13.1.0=he5830b7_0 - - libgfortran-ng=13.1.0=h69a702a_0 - - libgfortran5=13.1.0=h15d22d2_0 - - libglib=2.76.3=hebfc3b9_0 - - libgomp=13.1.0=he5830b7_0 - - libiconv=1.17=h166bdaf_0 - - libjpeg-turbo=2.1.5.1=h0b41bf4_0 - - liblapack=3.9.0=17_linux64_openblas - - libnghttp2=1.52.0=h61bc06f_0 - - libopenblas=0.3.23=pthreads_h80387f5_0 - - libpng=1.6.39=h753d276_0 - - libsanitizer=13.1.0=hfd8a6a1_0 - - libssh2=1.11.0=h0841786_0 - - libstdcxx-devel_linux-64=13.1.0=he3cc6c4_0 - - libstdcxx-ng=13.1.0=hfd8a6a1_0 - - libtiff=4.5.1=h8b53f26_0 - - libuuid=2.38.1=h0b41bf4_0 - - libwebp-base=1.3.0=h0b41bf4_0 - - libxcb=1.15=h0b41bf4_0 - - libxml2=2.11.4=h0d562d8_0 - - libzlib=1.2.13=hd590300_5 - - make=4.3=hd18ef5c_1 - - ncurses=6.4=hcb278e6_0 - - openjdk=20.0.0=h8e330f5_0 - - openssl=3.1.1=hd590300_1 - - pango=1.50.14=heaa33ce_1 - - pcre2=10.40=hc3806b6_0 - - picard=3.0.0=hdfd78af_1 - - pixman=0.40.0=h36c2ea0_0 - - pthread-stubs=0.4=h36c2ea0_1001 - - r-base=4.3.0=hfabd6f2_1 - - readline=8.2=h8228510_1 - - sed=4.8=he412f7d_0 - - sysroot_linux-64=2.12=he073ed8_15 - - tk=8.6.12=h27826a3_0 - - tktable=2.10=hb7b940f_3 - - xorg-fixesproto=5.0=h7f98852_1002 - - xorg-inputproto=2.3.2=h7f98852_1002 - - xorg-kbproto=1.0.7=h7f98852_1002 - - xorg-libice=1.0.10=h7f98852_0 - - xorg-libsm=1.2.3=hd9c2040_1000 - - xorg-libx11=1.8.6=h8ee46fc_0 - - xorg-libxau=1.0.11=hd590300_0 - - xorg-libxdmcp=1.1.3=h7f98852_0 - - xorg-libxext=1.3.4=h0b41bf4_2 - - xorg-libxfixes=5.0.3=h7f98852_1004 - - xorg-libxi=1.7.10=h7f98852_0 - - xorg-libxrender=0.9.10=h7f98852_1003 - - xorg-libxt=1.3.0=hd590300_0 - - xorg-libxtst=1.2.3=h7f98852_1002 - - xorg-recordproto=1.14.2=h7f98852_1002 - - xorg-renderproto=0.11.1=h7f98852_1002 - - xorg-xextproto=7.3.0=h0b41bf4_1003 - - xorg-xproto=7.0.31=h7f98852_1007 - - xz=5.2.6=h166bdaf_0 - - zlib=1.2.13=hd590300_5 - - zstd=1.5.2=h3eb15da_6 -prefix: /scratch/krannicht/miniconda3/envs/picard diff --git a/env/conda_snakemake7.yaml b/env/conda_snakemake7.yaml deleted file mode 100644 index 212a885..0000000 --- a/env/conda_snakemake7.yaml +++ /dev/null @@ -1,211 +0,0 @@ -name: snakemake7 -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_gnu - - aioeasywebdav=2.4.0=pyha770c72_0 - - aiohttp=3.8.4=py311h2582759_0 - - aiosignal=1.3.1=pyhd8ed1ab_0 - - amply=0.1.5=pyhd8ed1ab_0 - - appdirs=1.4.4=pyh9f0ad1d_0 - - async-timeout=4.0.2=pyhd8ed1ab_0 - - attmap=0.13.2=pyhd8ed1ab_0 - - attrs=23.1.0=pyh71513ae_1 - - backports=1.0=pyhd8ed1ab_3 - - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0 - - bcrypt=3.2.2=py311hd4cff14_1 - - boltons=23.0.0=pyhd8ed1ab_0 - - boto3=1.26.133=pyhd8ed1ab_0 - - botocore=1.29.133=pyhd8ed1ab_0 - - brotlipy=0.7.0=py311hd4cff14_1005 - - bzip2=1.0.8=h7f98852_4 - - c-ares=1.18.1=h7f98852_0 - - ca-certificates=2023.5.7=hbcca054_0 - - cachetools=5.3.0=pyhd8ed1ab_0 - - certifi=2023.5.7=pyhd8ed1ab_0 - - cffi=1.15.1=py311h409f033_3 - - charset-normalizer=2.1.1=pyhd8ed1ab_0 - - coin-or-cbc=2.10.10=h9002f0b_0 - - coin-or-cgl=0.60.7=h516709c_0 - - coin-or-clp=1.17.8=h1ee7a9c_0 - - coin-or-osi=0.108.8=ha2443b9_0 - - coin-or-utils=2.11.9=hee58242_0 - - coincbc=2.10.10=0_metapackage - - colorama=0.4.6=pyhd8ed1ab_0 - - conda=23.3.1=py311h38be061_0 - - conda-package-handling=2.0.2=pyh38be061_0 - - conda-package-streaming=0.7.0=pyhd8ed1ab_1 - - configargparse=1.5.3=pyhd8ed1ab_0 - - connection_pool=0.0.3=pyhd3deb0d_0 - - cryptography=40.0.2=py311h9b4c7bb_0 - - datrie=0.8.2=py311hd4cff14_6 - - defusedxml=0.7.1=pyhd8ed1ab_0 - - docutils=0.20=py311h38be061_0 - - dpath=2.1.5=pyha770c72_1 - - dropbox=11.36.0=pyhd8ed1ab_0 - - exceptiongroup=1.1.1=pyhd8ed1ab_0 - - filechunkio=1.8=py_2 - - filelock=3.12.0=pyhd8ed1ab_0 - - fmt=9.1.0=h924138e_0 - - frozenlist=1.3.3=py311hd4cff14_0 - - ftputil=5.0.4=pyhd8ed1ab_0 - - gitdb=4.0.10=pyhd8ed1ab_0 - - gitpython=3.1.31=pyhd8ed1ab_0 - - google-api-core=2.11.0=pyhd8ed1ab_0 - - google-api-python-client=2.86.0=pyhd8ed1ab_0 - - google-auth=2.18.0=pyh1a96a4e_0 - - google-auth-httplib2=0.1.0=pyhd8ed1ab_1 - - google-cloud-core=2.3.2=pyhd8ed1ab_0 - - google-cloud-storage=2.9.0=pyh1a96a4e_0 - - google-crc32c=1.1.2=py311h98db957_4 - - google-resumable-media=2.5.0=pyhd8ed1ab_0 - - googleapis-common-protos=1.57.1=pyhd8ed1ab_0 - - grpcio=1.54.2=py311hcafe171_0 - - httplib2=0.22.0=pyhd8ed1ab_0 - - humanfriendly=10.0=py311h38be061_4 - - icu=72.1=hcb278e6_0 - - idna=3.4=pyhd8ed1ab_0 - - importlib-metadata=6.6.0=pyha770c72_0 - - importlib_resources=5.12.0=pyhd8ed1ab_0 - - iniconfig=2.0.0=pyhd8ed1ab_0 - - jinja2=3.1.2=pyhd8ed1ab_1 - - jmespath=1.0.1=pyhd8ed1ab_0 - - jsonpatch=1.32=pyhd8ed1ab_0 - - jsonpointer=2.0=py_0 - - jsonschema=4.17.3=pyhd8ed1ab_0 - - jupyter_core=5.3.0=py311h38be061_0 - - keyutils=1.6.1=h166bdaf_0 - - krb5=1.20.1=h81ceb04_0 - - ld_impl_linux-64=2.40=h41732ed_0 - - libabseil=20230125.0=cxx17_hcb278e6_1 - - libarchive=3.6.2=h3d51595_0 - - libblas=3.9.0=16_linux64_openblas - - libcblas=3.9.0=16_linux64_openblas - - libcrc32c=1.1.2=h9c3ff4c_0 - - libcurl=8.0.1=h588be90_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=h516909a_1 - - libexpat=2.5.0=hcb278e6_1 - - libffi=3.4.2=h7f98852_5 - - libgcc-ng=12.2.0=h65d4601_19 - - libgfortran-ng=12.2.0=h69a702a_19 - - libgfortran5=12.2.0=h337968e_19 - - libgomp=12.2.0=h65d4601_19 - - libgrpc=1.54.2=hcf146ea_0 - - libiconv=1.17=h166bdaf_0 - - liblapack=3.9.0=16_linux64_openblas - - liblapacke=3.9.0=16_linux64_openblas - - libmamba=1.4.2=hcea66bb_0 - - libmambapy=1.4.2=py311h1f88262_0 - - libnghttp2=1.52.0=h61bc06f_0 - - libnsl=2.0.0=h7f98852_0 - - libopenblas=0.3.21=pthreads_h78a6416_3 - - libprotobuf=3.21.12=h3eb15da_0 - - libsodium=1.0.18=h36c2ea0_1 - - libsolv=0.7.23=h3eb15da_0 - - libsqlite=3.41.2=h2797004_1 - - libssh2=1.10.0=hf14f497_3 - - libstdcxx-ng=12.2.0=h46fd767_19 - - libuuid=2.38.1=h0b41bf4_0 - - libxml2=2.10.4=hfdac1af_0 - - libzlib=1.2.13=h166bdaf_4 - - logmuse=0.2.6=pyh8c360ce_0 - - lz4-c=1.9.4=hcb278e6_0 - - lzo=2.10=h516909a_1000 - - mamba=1.4.2=py311h3072747_0 - - markdown-it-py=2.2.0=pyhd8ed1ab_0 - - markupsafe=2.1.2=py311h2582759_0 - - mdurl=0.1.0=pyhd8ed1ab_0 - - multidict=6.0.4=py311h2582759_0 - - nbformat=5.8.0=pyhd8ed1ab_0 - - ncurses=6.3=h27087fc_1 - - numpy=1.24.3=py311h64a7726_0 - - oauth2client=4.1.3=py_0 - - openssl=3.1.0=hd590300_3 - - packaging=23.1=pyhd8ed1ab_0 - - pandas=2.0.1=py311h320fe9a_1 - - paramiko=3.1.0=pyhd8ed1ab_0 - - peppy=0.35.5=pyhd8ed1ab_0 - - pip=23.1.2=pyhd8ed1ab_0 - - pkgutil-resolve-name=1.3.10=pyhd8ed1ab_0 - - plac=1.3.5=pyhd8ed1ab_0 - - platformdirs=3.5.1=pyhd8ed1ab_0 - - pluggy=1.0.0=pyhd8ed1ab_5 - - ply=3.11=py_1 - - prettytable=3.7.0=pyhd8ed1ab_0 - - protobuf=4.21.12=py311hcafe171_0 - - psutil=5.9.5=py311h2582759_0 - - pulp=2.7.0=py311h38be061_0 - - pyasn1=0.4.8=py_0 - - pyasn1-modules=0.2.7=py_0 - - pybind11-abi=4=hd8ed1ab_3 - - pycosat=0.6.4=py311hd4cff14_1 - - pycparser=2.21=pyhd8ed1ab_0 - - pygments=2.15.1=pyhd8ed1ab_0 - - pynacl=1.5.0=py311hd4cff14_2 - - pyopenssl=23.1.1=pyhd8ed1ab_0 - - pyparsing=3.0.9=pyhd8ed1ab_0 - - pyrsistent=0.19.3=py311h2582759_0 - - pysftp=0.2.9=py_1 - - pysocks=1.7.1=pyha2e5f31_6 - - pytest=7.3.1=pyhd8ed1ab_0 - - python=3.11.3=h2755cc3_0_cpython - - python-dateutil=2.8.2=pyhd8ed1ab_0 - - python-fastjsonschema=2.16.3=pyhd8ed1ab_0 - - python-irodsclient=1.1.6=pyhd8ed1ab_0 - - python-tzdata=2023.3=pyhd8ed1ab_0 - - python_abi=3.11=3_cp311 - - pytz=2023.3=pyhd8ed1ab_0 - - pyu2f=0.1.5=pyhd8ed1ab_0 - - pyyaml=6.0=py311hd4cff14_5 - - re2=2023.02.02=hcb278e6_0 - - readline=8.2=h8228510_1 - - reproc=14.2.4=h0b41bf4_0 - - reproc-cpp=14.2.4=hcb278e6_0 - - requests=2.29.0=pyhd8ed1ab_0 - - reretry=0.11.8=pyhd8ed1ab_0 - - rich=13.3.5=pyhd8ed1ab_0 - - rsa=4.9=pyhd8ed1ab_0 - - ruamel.yaml=0.17.26=py311h459d7ec_0 - - ruamel.yaml.clib=0.2.7=py311h2582759_1 - - s3transfer=0.6.1=pyhd8ed1ab_0 - - setuptools=67.7.2=pyhd8ed1ab_0 - - setuptools-scm=7.1.0=pyhd8ed1ab_0 - - six=1.16.0=pyh6c4a22f_0 - - slacker=0.14.0=py_0 - - smart_open=6.3.0=pyhd8ed1ab_1 - - smmap=3.0.5=pyh44b312d_0 - - snakemake=7.25.3=hdfd78af_0 - - snakemake-minimal=7.25.3=pyhdfd78af_0 - - stone=3.3.1=pyhd8ed1ab_0 - - stopit=1.1.2=py_0 - - tabulate=0.9.0=pyhd8ed1ab_1 - - throttler=1.2.1=pyhd8ed1ab_0 - - tk=8.6.12=h27826a3_0 - - tomli=2.0.1=pyhd8ed1ab_0 - - toolz=0.12.0=pyhd8ed1ab_0 - - toposort=1.10=pyhd8ed1ab_0 - - tqdm=4.65.0=pyhd8ed1ab_1 - - traitlets=5.9.0=pyhd8ed1ab_0 - - typing-extensions=4.5.0=hd8ed1ab_0 - - typing_extensions=4.5.0=pyha770c72_0 - - tzdata=2023c=h71feb2d_0 - - ubiquerg=0.6.2=pyhd8ed1ab_0 - - uritemplate=4.1.1=pyhd8ed1ab_0 - - urllib3=1.26.15=pyhd8ed1ab_0 - - veracitools=0.1.3=py_0 - - wcwidth=0.2.6=pyhd8ed1ab_0 - - wheel=0.40.0=pyhd8ed1ab_0 - - wrapt=1.15.0=py311h2582759_0 - - xz=5.2.6=h166bdaf_0 - - yaml=0.2.5=h7f98852_2 - - yaml-cpp=0.7.0=h27087fc_2 - - yarl=1.9.1=py311h459d7ec_0 - - yte=1.5.1=py311h38be061_1 - - zipp=3.15.0=pyhd8ed1ab_0 - - zlib=1.2.13=h166bdaf_4 - - zstandard=0.19.0=py311hbe0fcd7_1 - - zstd=1.5.2=h3eb15da_6 diff --git a/eval.nf b/eval.nf new file mode 100644 index 0000000..857505e --- /dev/null +++ b/eval.nf @@ -0,0 +1,34 @@ +// include modules - here, modules are single processes +include { SAMTOOLS_FAIDX } from './modules/samtools/faidx/main.nf' +include { HAPPY } from './modules/happy/main.nf' + + +workflow{ + // ------------------ + // | Input channels | + // ------------------ + ch_ref = Channel.value("$baseDir/" + params.reference) + ch_ref_idx = SAMTOOLS_FAIDX(ch_ref) + + ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}") + ch_callsets + .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('.vcf', '').replaceFirst('.gz', '').toInteger(), file(it)) } + .set {ch_callsets} + //ch_callsets.view() + + ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf") + ch_truthsets + .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('hap', '').replaceFirst('.vcf', '').toInteger(), file(it)) } + .set {ch_truthsets} + //ch_truthsets.view() + + ch_truthsets.join(ch_callsets, by: 0) + .set {ch_variantsets_map} + //ch_variantsets_map.view() + + + // ------------------ + // | Main processes | + // ------------------ + HAPPY(ch_variantsets_map,ch_ref,ch_ref_idx) +} diff --git a/hap.nf b/hap.nf new file mode 100644 index 0000000..bfff46b --- /dev/null +++ b/hap.nf @@ -0,0 +1,36 @@ +// include modules - here, modules are single processes +//include { AMPLISIM } from './modules/amplisim/main.nf' +include { MASON_SIMULATOR } from './modules/mason/simulator/main.nf' +include { MASON_VARIATOR } from './modules/mason/variator/main.nf' +include { NANOSIM } from './modules/nanosim/main.nf' +//include { NORM_VCF } from './subworkflows/norm_vcf/main.nf' +include { SAMTOOLS_FAIDX } from './modules/samtools/faidx/main.nf' + + + +workflow{ + // Input channels + ch_ids = Channel.of(1..params.n) + ch_ref = Channel.value("$baseDir/" + params.reference) + ch_ref_idx = SAMTOOLS_FAIDX(ch_ref) + + // Generate samples (haplotype consensus sequence + VCF) + (ch_haplotypes,ch_vcf) = MASON_VARIATOR(ch_ids,ch_ref,ch_ref_idx) + + // Normalize, sort and index the VCF files + //NORM_VCF(ch_vcf,ch_ref) + + ch_vcf + .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('hap', '').replaceFirst('\\.vcf', '').toInteger(), file(it)) } + .set {ch_sample_vcf_map} + + // Generate reads + if (params.read_type == 'ngs'){ + MASON_SIMULATOR(ch_sample_vcf_map,ch_ref,ch_ref_idx) + } + //else if (params.read_type == 'nano'){ + // NANOSIM(ch_ids, ch_haplotypes) + //} + + +} diff --git a/modules/bcftools/index/main.nf b/modules/bcftools/index/main.nf new file mode 100644 index 0000000..b56a469 --- /dev/null +++ b/modules/bcftools/index/main.nf @@ -0,0 +1,27 @@ +process BCFOOTLS_INDEX { + // Job label + // tag "${sample}" + + // Store results + //publishDir "${params.outdir}", mode: 'copy', pattern: "*.tbi" + + // Engine settings + conda 'bioconda::bcftools=1.19' + + // Resources + cpus 1 + + // Process I/O + input: + path vcffile + + output: + path "${vcffile.getName()}.tbi", emit: index_vcf + + // Job script + """ + bcftools index -t ${vcffile} + """ + + +} \ No newline at end of file diff --git a/modules/bcftools/norm/main.nf b/modules/bcftools/norm/main.nf new file mode 100644 index 0000000..8e8eebb --- /dev/null +++ b/modules/bcftools/norm/main.nf @@ -0,0 +1,30 @@ +process BCFOOTLS_NORM { + // Job label + // tag "${sample}" + + // Engine settings + conda 'bioconda::bcftools=1.19' + + // Resources + cpus 1 + + // Process I/O + input: + each vcffile + path ref + + output: + path "${vcffile.getSimpleName()}.normalized.vcf", emit: norm_vcf + + // Job script + """ + bcftools norm \ + --fasta-ref ${ref} \ + --check-ref s \ + --multiallelics -both \ + -o ${vcffile.getSimpleName()}.normalized.vcf \ + ${vcffile} + """ + + +} \ No newline at end of file diff --git a/modules/bcftools/sort/main.nf b/modules/bcftools/sort/main.nf new file mode 100644 index 0000000..e03ceca --- /dev/null +++ b/modules/bcftools/sort/main.nf @@ -0,0 +1,30 @@ +process BCFOOTLS_SORT { + // Job label + // tag "${sample}" + + // Store results + //publishDir "${params.outdir}", mode: 'copy', pattern: "*.normalized.sorted.vcf.gz" + + // Engine settings + conda 'bioconda::bcftools=1.19' + + // Resources + cpus 1 + + // Process I/O + input: + path vcffile + + output: + path "${vcffile.getSimpleName()}.normalized.sorted.vcf.gz", emit: sort_vcf + + // Job script + """ + bcftools sort \ + -o ${vcffile.getSimpleName()}.normalized.sorted.vcf.gz \ + -O z \ + ${vcffile} + """ + + +} \ No newline at end of file diff --git a/modules/happy/main.nf b/modules/happy/main.nf new file mode 100644 index 0000000..3564256 --- /dev/null +++ b/modules/happy/main.nf @@ -0,0 +1,37 @@ +process HAPPY { + // Job label + tag "${sample}" + + // Store results + publishDir "${params.outdir}", mode: 'copy', pattern: "*.sompy.*" + + // Engine settings + conda 'bioconda::hap.py=0.3.15' + + // Resources + cpus 1 + + // Process I/O + input: + tuple val(sample), path(truthset), path(callset) + val ref + val ref_idx + + output: + path "simulated_hap${sample}.sompy.stats.csv", emit: csv + path "simulated_hap${sample}.sompy.metrics.json", emit: json + + // Job script + """ + som.py \ + --no-fixchr-truth \ + --no-fixchr-query \ + --normalize-all \ + -r ${ref} \ + -o simulated_hap${sample}.sompy \ + ${truthset} \ + ${callset} + """ + + +} \ No newline at end of file diff --git a/modules/mason/simulator/main.nf b/modules/mason/simulator/main.nf new file mode 100644 index 0000000..eb4b80b --- /dev/null +++ b/modules/mason/simulator/main.nf @@ -0,0 +1,46 @@ +process MASON_SIMULATOR { + + // Job label + // tag "${sample}" + + // Store results + publishDir "${params.outdir}", mode: 'copy', pattern: "*.{NGSWGS.R1.fastq,NGSWGS.R2.fastq,bam}" + + // Engine settings + conda 'bioconda::mason=2.0.9' + + // Resources + cpus 2 + + // Process I/O + input: + tuple val(sample), path(vcf) + val ref + val ref_idx + + output: + path "simulated_hap${sample}.NGSWGS.{R1,R2}.fastq", emit: fastqs + path "simulated_hap${sample}.bam", emit: bam + + // Job script + script: + unique_seed = (params.seed * sample ) % 2147483647 // that's (2^31)-1, the upper bound for mason + """ + mason_simulator \ + -ir ${ref} \ + -iv ${vcf} \ + -o simulated_hap${sample}.NGSWGS.R1.fastq \ + -or simulated_hap${sample}.NGSWGS.R2.fastq \ + -oa simulated_hap${sample}.bam \ + --seed ${unique_seed} \ + --num-threads ${task.cpus} \ + --num-fragments ${params.nb_frag} \ + --fragment-min-size ${params.fragment_min_size} \ + --fragment-max-size ${params.fragment_max_size} \ + --fragment-mean-size ${params.fragment_mean_size} \ + --fragment-size-std-dev ${params.fragment_size_std_dev} \ + --illumina-read-length ${params.illumina_read_length} + """ + + +} diff --git a/modules/mason/variator/main.nf b/modules/mason/variator/main.nf new file mode 100644 index 0000000..5640ef1 --- /dev/null +++ b/modules/mason/variator/main.nf @@ -0,0 +1,46 @@ +process MASON_VARIATOR { + + // Job label + // tag "${sample}" + + // Store results + publishDir "${params.outdir}", mode: 'copy', pattern: "simulated_hap*" + + // Engine settings + conda 'bioconda::mason=2.0.9' + + // Resources + cpus 1 + + // Process I/O + input: + val sample + val ref + val ref_idx + + output: + path "simulated_hap${sample}.fasta", emit: fasta + path "simulated_hap${sample}.vcf", emit: vcf + + // Job script + script: + unique_seed = (params.seed * sample) % 2147483647 // that's (2^31)-1, the upper bound for mason_variator + """ + mason_variator \ + --in-reference ${ref} \ + --out-fasta simulated_hap${sample}.fasta \ + --out-vcf simulated_hap${sample}.vcf \ + --seed ${unique_seed} \ + --snp-rate 0.01 \ + --small-indel-rate 0.005 \ + --min-small-indel-size 1 \ + --max-small-indel-size 20 \ + --sv-indel-rate 0 \ + --sv-inversion-rate 0 \ + --sv-translocation-rate 0 \ + --sv-duplication-rate 0 \ + 2> ${sample}.log + """ + + +} diff --git a/modules/nanosim/main.nf b/modules/nanosim/main.nf new file mode 100644 index 0000000..f2ad18d --- /dev/null +++ b/modules/nanosim/main.nf @@ -0,0 +1,39 @@ +process NANOSIM { + + // Job label + // tag "${sample}" + + // Store results + publishDir "${params.outdir}", mode: 'copy', pattern: "simulated_hap${id}.NANOWGS{_aligned_error_profile,_aligned_reads.fasta}" + + // Engine settings + conda 'bioconda::nanosim=3.1.0' + + // Resources + cpus 2 + + // Process I/O + input: + val id + path "simulated_hap${id}.fasta" // haplotype sequence, e.g. simulated_1.fasta + + output: + path "simulated_hap${id}.NANOWGS{_aligned_error_profile,_aligned_reads.fasta}", emit: fastq + + // Job script + script: + unique_seed = (params.seed * id) % 2147483647 // that's (2^31)-1, the upper bound for mason + """ + simulator.py genome \ + -dna_type ${params.dna_type} \ + -rg simulated_hap${id}.fasta \ + -c ${projectDir}/${params.model_prefix} \ + -b ${params.model_caller} \ + -med ${params.median_length} \ + -sd ${params.sd_length} \ + -n ${params.nb_reads} \ + -o simulated_hap${id}.NANOWGS \ + --seed ${unique_seed} \ + -t ${task.cpus} + """ +} diff --git a/modules/samtools/faidx/main.nf b/modules/samtools/faidx/main.nf new file mode 100644 index 0000000..8c8a6a3 --- /dev/null +++ b/modules/samtools/faidx/main.nf @@ -0,0 +1,26 @@ +process SAMTOOLS_FAIDX { + // Job label + // tag "${sample}" + + // Engine settings + conda 'bioconda::samtools=1.19.2' + + // Resources + cpus 1 + + // Process I/O + input: + path ref + + output: + val "${ref}.fai" + + // Job script + """ + samtools faidx \ + ${ref} \ + -o ${ref}.fai + """ + + +} diff --git a/nextflow.config b/nextflow.config new file mode 100644 index 0000000..71e1dad --- /dev/null +++ b/nextflow.config @@ -0,0 +1,66 @@ +// Pipeline meta-information +manifest { + name = 'CIEVaD' + description = 'A workflow for a simple, streamlined and rapid evaluation of variant callsets ' + author = 'Thomas Krannich' + nextflowVersion = '>=20.04.0' + version = '0.1.0-nf' +} + +// Parameters that are accessible in the pipeline script +params { + // Individual parameters + n = 3 + reference = 'reference/Sars-Cov-2/Wuhan-Hu-1/MN908947.3.fasta' + read_type = 'ngs' + + // General parameters + seed = 479 + outdir = 'results' + + // NGS (WGS) - Read simulation parameters + nb_frag = 3000 + fragment_min_size = 450 + fragment_max_size = 550 + fragment_mean_size = 500 + fragment_size_std_dev = 20 + illumina_read_length = 150 + + // Nanopore (WGS) - Read simulation parameters + dna_type = 'linear' + model_prefix = 'aux/nanosim_model/human_NA12878_DNA_FAB49712_guppy/training' + model_caller = 'guppy' + median_length = 5000 + sd_length = 1.05 + nb_reads = 180 + + // Evaluation parameters + callsets_dir = 'data' +} + +// Enable execution report +def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') +report { + enabled = true + file = "${params.outdir}/execution_report_${trace_timestamp}.html" +} + +// Predefined configurations for the user. Can be selected using the -profile command line option. +// Profiles can be combined by separating the profile names with a comma. +profiles { + // engines + conda { + conda.enabled = true + conda.useMamba = false + } + mamba { + conda.enabled = true + conda.useMamba = true + } + + // executors + local { + executor.name = "local" + executor.cpus = 4 + } +} diff --git a/python/__init__.py b/python/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/python/configGenerators.py b/python/configGenerators.py deleted file mode 100644 index f80a696..0000000 --- a/python/configGenerators.py +++ /dev/null @@ -1,125 +0,0 @@ -import math -from python.myUtil import trim_trailing_slash, mkdir_if_not_present - - -def generate_hap_config(args): - - # check if configs directory exists, create if not - mkdir_if_not_present("configs") - - # generate config - with open("configs/snake_config_haplotype.yaml", 'w') as config : - - config.write('HEAD_DIR:\n') - config.write(' ' + trim_trailing_slash(args.head_dir) + '\n\n') - config.write('REF:\n') - config.write(' ' + args.reference + '\n\n') - config.write('SEED:\n') - config.write(' ' + str(args.seed) + '\n\n') - config.write('SAMPLES:\n') - - padding = int(math.log2(args.nb_samples)) - for i in range(1, (args.nb_samples)+1): - config.write(' - \"' + str(i).zfill(padding) + '\"\n') - - print("New config file is created at configs/snake_config_haplotype.yaml\n") - - -def generate_ngs_config(args): - - # check if configs directory exists, create if not - mkdir_if_not_present("configs") - - # generate config - with open("configs/snake_config_ngs.yaml", 'w') as config : - - config.write('HEAD_DIR:\n') - config.write(' ' + trim_trailing_slash(args.head_dir) + '\n\n') - config.write('NGS_NB_FRAGS:\n') - config.write(' ' + str(args.nb_frags) + '\n\n') - config.write('SEED:\n') - config.write(' ' + str(args.seed) + '\n\n') - config.write('SAMPLES:\n') - - padding = int(math.log2(args.nb_samples)) - for i in range(1, (args.nb_samples)+1): - config.write(' - \"' + str(i).zfill(padding) + '\"\n') - - print("New config file is created at configs/snake_config_ngs.yaml\n") - - -def generate_ampli_config(args): - - # check if configs directory exists, create if not - mkdir_if_not_present("configs") - - # generate config - with open("configs/snake_config_amplicon.yaml", 'w') as config : - - config.write('HEAD_DIR:\n') - config.write(' ' + trim_trailing_slash(args.head_dir) + '\n\n') - config.write('REF:\n') - config.write(' ' + args.reference + '\n\n') - config.write('SEED:\n') - config.write(' ' + str(args.seed) + '\n\n') - config.write('PRIMER:\n') - config.write(' ' + args.primers + '\n\n') - config.write('SAMPLES:\n') - - padding = int(math.log2(args.nb_samples)) - for i in range(1, (args.nb_samples)+1): - config.write(' - \"' + str(i).zfill(padding) + '\"\n') - - print("New config file is created at configs/snake_config_amplicon.yaml\n") - - -def generate_nanopore_config(args): - - # check if configs directory exists, create if not - mkdir_if_not_present("configs") - - # generate config - with open("configs/snake_config_nanopore.yaml", 'w') as config : - - config.write('HEAD_DIR:\n') - config.write(' ' + trim_trailing_slash(args.head_dir) + '\n\n') - config.write('MODEL_PREFIX:\n') - config.write(' ' + str(args.model_prefix) + '\n\n') - config.write('DNA_TYPE:\n') - config.write(' ' + str(args.dna_type) + '\n\n') - config.write('MODEL_CALLER:\n') - config.write(' ' + str(args.model_caller) + '\n\n') - config.write('MEDIAN_LENGTH:\n') - config.write(' ' + str(args.median_length) + '\n\n') - config.write('SD_LENGTH:\n') - config.write(' ' + str(args.sd_length) + '\n\n') - config.write('NB_READS:\n') - config.write(' ' + str(args.nb_reads) + '\n\n') - config.write('SEED:\n') - config.write(' ' + str(args.seed) + '\n\n') - config.write('SAMPLES:\n') - - padding = int(math.log2(args.nb_samples)) - for i in range(1, (args.nb_samples)+1): - config.write(' - \"' + str(i).zfill(padding) + '\"\n') - - print("New config file is created at configs/snake_config_nanopore.yaml\n") - - -def generate_eval_config(args): - - # check if configs directory exists, create if not - mkdir_if_not_present("configs") - - # generate config - with open("configs/snake_config_eval.yaml", 'w') as config : - - config.write('HEAD_DIR:\n') - config.write(' ' + trim_trailing_slash(args.head_dir) + '\n\n') - config.write('SAMPLES:\n') - - padding = int(math.log2(args.nb_samples)) - for i in range(1, (args.nb_samples)+1): - config.write(' - \"' + str(i).zfill(padding) + '\"\n') - - print("New config file is created at configs/snake_config_eval.yaml\n") diff --git a/python/myUtil.py b/python/myUtil.py deleted file mode 100644 index a12c13f..0000000 --- a/python/myUtil.py +++ /dev/null @@ -1,14 +0,0 @@ -import os - - -def trim_trailing_slash(s:str): - if s[-1] == '/': - return s[:-1] - return s - - -def mkdir_if_not_present(cfg_path:str): - dir_exists = os.path.exists(cfg_path) - if not dir_exists: - os.makedirs(cfg_path) - print("New config directory is created at " + cfg_path + "!") diff --git a/python/runWorkflows.py b/python/runWorkflows.py deleted file mode 100644 index 4e0248c..0000000 --- a/python/runWorkflows.py +++ /dev/null @@ -1,82 +0,0 @@ -import os -import python.configGenerators - - -def run_hap(args): - print("Running haplotype simulation...\n") - - if args.command == 'hap': - - if args.config is not None: - - os.system('snakemake -p --use-conda --cores 1 --configfile ' + args.config + ' -s ' + args.snakefile) - - else: - - python.configGenerators.generate_hap_config(args) - - os.system('snakemake -p --use-conda --cores 1 --configfile configs/snake_config_haplotype.yaml -s ' + args.snakefile) - - -def run_ngs(args): - print("Running NGS read simulation...\n") - - if args.command == 'ngs': - - if args.config is not None: - - os.system('snakemake -p --use-conda --cores ' + str(args.threads) + '--configfile ' + args.config + ' -s ' + args.snakefile) - - else: - - python.configGenerators.generate_ngs_config(args) - - os.system('snakemake -p --use-conda --cores ' + str(args.threads) + ' --configfile configs/snake_config_ngs.yaml -s ' + args.snakefile) - - -def run_ampli(args): - print("Running amplicon and NGS read simulation...\n") - - if args.command == 'ampli': - - if args.config is not None: - - os.system('snakemake -p --use-conda --cores 1 --configfile ' + args.config + ' -s ' + args.snakefile) - - else: - - python.configGenerators.generate_ampli_config(args) - - os.system('snakemake -p --use-conda --cores 1 --configfile configs/snake_config_amplicon.yaml -s ' + args.snakefile) - - -def run_nanopore(args): - print("Running Nanopore read simulation...\n") - - if args.command == 'nano': - - if args.config is not None: - - os.system('snakemake -p --use-conda --cores ' + str(args.threads) + ' --configfile ' + args.config + ' -s ' + args.snakefile) - - else: - - python.configGenerators.generate_nanopore_config(args) - - os.system('snakemake -p --use-conda --cores ' + str(args.threads) + ' --configfile configs/snake_config_nanopore.yaml -s ' + args.snakefile) - - -def run_eval(args): - print("Running VCF file-based evaluation of variants...\n") - - if args.command == 'eval': - - if args.config is not None: - - os.system('snakemake -p --use-conda --cores 1 --configfile ' + args.config + ' -s ' + args.snakefile) - - else: - - python.configGenerators.generate_eval_config(args) - - os.system('snakemake -p --use-conda --cores 1 --configfile configs/snake_config_eval.yaml -s ' + args.snakefile) \ No newline at end of file diff --git a/snakemake/amplicon/Snakefile b/snakemake/amplicon/Snakefile deleted file mode 100644 index 3ef1984..0000000 --- a/snakemake/amplicon/Snakefile +++ /dev/null @@ -1,67 +0,0 @@ -#################### PREAMBLE -from snakemake.utils import min_version -min_version("6.0") - - -#################### CONFIG -configfile: "configs/snake_config_amplicon.yaml" - - -#################### INCLUDES - -#################### PARAMS -HEAD_DIR = config["HEAD_DIR"] -SAMPLES = config["SAMPLES"] -SEED = config["SEED"] - - -#################### RULES -rule all: - input: - simulated_read = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.amplicon.ngs.{r}.fastq", s=SAMPLES, r=["R1", "R2"]) - -rule amplicon_simulator: - input: - reference = config["REF"], - primer = config["PRIMER"] - output: - amplicons = config["HEAD_DIR"] + "/data/simulated_hap{sample}/amplicons.fa" - params: - seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound - log: - config["HEAD_DIR"] + "/logs/amplisim/amplisim.hap{sample}.log" - shell: - """ - # STATIC COMPILED BINARY OF AMPLISIM WILL BE REPLACED BY A LIVE BUILD OR CONDA - ./bin/amplisim-v0_1_0-ubuntu_20_04 \ - -s {params.seed} \ - -o {output.amplicons} \ - {input.reference} \ - {input.primer} \ - > {log} 2>&1 - """ - -rule ngs_read_simulator: - input: - amplicons = rules.amplicon_simulator.output.amplicons - output: - r1 = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.amplicon.ngs.R1.fastq", - r2 = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.amplicon.ngs.R2.fastq" - params: - seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound - threads: - workflow.cores - conda: - config["HEAD_DIR"] + "/env/conda_mason.yaml" - log: - config["HEAD_DIR"] + "/logs/mason/mason_frag_sequencing.hap{sample}.log" - shell: - """ - mason_frag_sequencing \ - -i {input.amplicons} \ - -o {output.r1} \ - -or {output.r2} \ - --seed {params.seed} \ - --illumina-read-length 150 \ - > {log} 2>&1 - """ diff --git a/snakemake/eval/Snakefile b/snakemake/eval/Snakefile deleted file mode 100644 index fe1e6e2..0000000 --- a/snakemake/eval/Snakefile +++ /dev/null @@ -1,61 +0,0 @@ -#################### PREAMBLE -from snakemake.utils import min_version -min_version("6.0") - - -#################### CONFIG -configfile: "configs/snake_config_eval.yaml" - - -#################### INCLUDES -include: "../include/vcf-norm-call.smk" - - -#################### PARAMS -HEAD_DIR = config["HEAD_DIR"] -SAMPLES = config["SAMPLES"] - - -#################### RULES -rule all: - input: - expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/callset.normalized.sorted.vcf.gz.tbi", s=SAMPLES), - expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/eval.picard.{e}", e=["variant_calling_detail_metrics", "variant_calling_summary_metrics"], s=SAMPLES), - HEAD_DIR + "/results/variant_calling_summary_ngs" - - -rule vcf_evaluation: - input: - truthset = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.normalized.sorted.vcf.gz", - truthidx = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.normalized.sorted.vcf.gz.tbi", - callset = config["HEAD_DIR"] + "/data/simulated_hap{sample}/callset.normalized.sorted.vcf.gz", - callidx = config["HEAD_DIR"] + "/data/simulated_hap{sample}/callset.normalized.sorted.vcf.gz.tbi" - params: - prefix = config["HEAD_DIR"] + "/data/simulated_hap{sample}/eval.picard" - output: - detail = config["HEAD_DIR"] + "/data/simulated_hap{sample}/eval.picard.variant_calling_detail_metrics", - summary = config["HEAD_DIR"] + "/data/simulated_hap{sample}/eval.picard.variant_calling_summary_metrics" - conda: - config["HEAD_DIR"] + "/env/conda_picard.yaml" - log: - config["HEAD_DIR"] + "/logs/picard/evaluation_hap{sample}.log" - shell: - """ - picard CollectVariantCallingMetrics \ - --INPUT {input.callset} \ - --DBSNP {input.truthset} \ - -O {params.prefix} - """ - - -rule report: - input: - expand(config["HEAD_DIR"] + "/data/simulated_hap{sample}/eval.picard.variant_calling_summary_metrics", sample=config["SAMPLES"]) - output: - config["HEAD_DIR"] + "/results/variant_calling_summary_ngs" - params: - head_dir = config["HEAD_DIR"] - shell: - """ - sh {params.head_dir}/aux/picard_summary_of_summaries.sh {params.head_dir} > {output} - """ \ No newline at end of file diff --git a/snakemake/hap/Snakefile b/snakemake/hap/Snakefile deleted file mode 100644 index 9752c8a..0000000 --- a/snakemake/hap/Snakefile +++ /dev/null @@ -1,58 +0,0 @@ -#################### PREAMBLE -from snakemake.utils import min_version -min_version("6.0") - - -#################### CONFIG -configfile: "configs/snake_config_haplotype.yaml" - - -#################### INCLUDES -include: "../include/vcf-norm-truth.smk" - - -#################### PARAMS -HEAD_DIR = config["HEAD_DIR"] -REF = config["REF"] -SAMPLES = config["SAMPLES"] -SEED = config["SEED"] - - -#################### RULES -rule all: - input: - simulated_haplotypes = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.fasta", s=SAMPLES), - simulated_variants = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.vcf" , s=SAMPLES), - simulated_norm_variants = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.normalized.sorted.vcf.gz" , s=SAMPLES), - simulated_norm_variants_idx = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.normalized.sorted.vcf.gz.tbi" , s=SAMPLES) - - -rule hap_simulator: - input: - ref = config["REF"] - output: - fasta = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.fasta", - vcf = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.vcf", - params: - seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound for mason_variator - conda: - config["HEAD_DIR"] + "/env/conda_mason.yaml" - log: - config["HEAD_DIR"] + "/logs/mason/mason_variator.hap{sample}.log" - shell: - """ - mason_variator \ - --in-reference {input.ref} \ - --out-fasta {output.fasta} \ - --out-vcf {output.vcf} \ - --seed {params.seed} \ - --snp-rate 0.01 \ - --small-indel-rate 0.005 \ - --min-small-indel-size 1 \ - --max-small-indel-size 20 \ - --sv-indel-rate 0 \ - --sv-inversion-rate 0 \ - --sv-translocation-rate 0 \ - --sv-duplication-rate 0 \ - 2> {log} - """ \ No newline at end of file diff --git a/snakemake/include/vcf-norm-call.smk b/snakemake/include/vcf-norm-call.smk deleted file mode 100644 index 30c736f..0000000 --- a/snakemake/include/vcf-norm-call.smk +++ /dev/null @@ -1,37 +0,0 @@ -from snakemake.utils import min_version -min_version("6.0") - -module bcftools: - snakefile: - config["HEAD_DIR"] + "/snakemake/modules/bcftools/Snakefile" - - -use rule bcftools_norm_noref from bcftools as norm_callset with: - input: - vcf = config["HEAD_DIR"] + "/data/simulated_hap{sample}/callset.vcf.gz" - output: - vcf = temp(config["HEAD_DIR"] + "/data/simulated_hap{sample}/callset.normalized.vcf.gz") - conda: - config["HEAD_DIR"] + "/env/conda_bcftools.yaml" - log: - config["HEAD_DIR"] + "/logs/bcftools_norm.hap{sample}.callset.log" - - -use rule bcftools_sort from bcftools as sort_callset with: - input: - vcf = rules.norm_callset.output - output: - vcf = config["HEAD_DIR"] + "/data/simulated_hap{sample}/callset.normalized.sorted.vcf.gz" - conda: - config["HEAD_DIR"] + "/env/conda_bcftools.yaml" - log: - config["HEAD_DIR"] + "/logs/bcftools/bcftools_sort.hap{sample}.callset.log" - - -use rule bcftools_index from bcftools as index_callset with: - input: - vcf = rules.sort_callset.output - output: - tbi = config["HEAD_DIR"] + "/data/simulated_hap{sample}/callset.normalized.sorted.vcf.gz.tbi" - conda: - config["HEAD_DIR"] + "/env/conda_bcftools.yaml" \ No newline at end of file diff --git a/snakemake/include/vcf-norm-truth.smk b/snakemake/include/vcf-norm-truth.smk deleted file mode 100644 index d9fcc93..0000000 --- a/snakemake/include/vcf-norm-truth.smk +++ /dev/null @@ -1,38 +0,0 @@ -from snakemake.utils import min_version -min_version("6.0") - -module bcftools: - snakefile: - config["HEAD_DIR"] + "/snakemake/modules/bcftools/Snakefile" - - -use rule bcftools_norm from bcftools as norm_truthset with: - input: - vcf = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.vcf", - ref = config["REF"] - output: - vcf = temp(config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.normalized.vcf") - conda: - config["HEAD_DIR"] + "/env/conda_bcftools.yaml" - log: - config["HEAD_DIR"] + "/logs/bcftools_norm.hap{sample}.truthset.log" - - -use rule bcftools_sort from bcftools as sort_truthset with: - input: - vcf = rules.norm_truthset.output - output: - vcf = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.normalized.sorted.vcf.gz" - conda: - config["HEAD_DIR"] + "/env/conda_bcftools.yaml" - log: - config["HEAD_DIR"] + "/logs/bcftools_sort.hap{sample}.truthset.log" - - -use rule bcftools_index from bcftools as index_truthset with: - input: - vcf = rules.sort_truthset.output - output: - tbi = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.normalized.sorted.vcf.gz.tbi" - conda: - config["HEAD_DIR"] + "/env/conda_bcftools.yaml" \ No newline at end of file diff --git a/snakemake/modules/bcftools/Snakefile b/snakemake/modules/bcftools/Snakefile deleted file mode 100644 index 537e763..0000000 --- a/snakemake/modules/bcftools/Snakefile +++ /dev/null @@ -1,74 +0,0 @@ -from snakemake.utils import min_version -min_version("6.0") - - -rule bcftools_norm: - input: - ref = "genome.fasta", - vcf = "variants.vcf" - output: - vcf = "variants.norm.vcf" - conda: - "bcftools.yaml" - log: - "bcftools.norm.log" - shell: - """ - bcftools norm \ - --fasta-ref {input.ref} \ - --check-ref s \ - --multiallelics -both \ - -o {output.vcf} \ - {input.vcf} \ - > {log} - """ - - -rule bcftools_norm_noref: - input: - vcf = "variants.vcf" - output: - vcf = "variants.norm.vcf" - conda: - "bcftools.yaml" - log: - "bcftools.norm.noref.log" - shell: - """ - bcftools norm \ - --multiallelics -both \ - -o {output.vcf} \ - {input.vcf} \ - > {log} - """ - - -rule bcftools_sort: - input: - vcf = "variants.vcf" - output: - vcf = "variants.sorted.vcf.gz" - conda: - "bcftools.yaml" - log: - "bcftools.sort.log" - shell: - """ - bcftools sort \ - -o {output.vcf} \ - -O z \ - {input.vcf} - """ - - -rule bcftools_index: - input: - vcf = "variants.sorted.vcf.gz" - output: - tbi = "variants.sorted.vcf.gz.tbi" - conda: - "bcftools.yaml" - shell: - """ - bcftools index -t {input.vcf} - """ \ No newline at end of file diff --git a/snakemake/nanopore/Snakefile b/snakemake/nanopore/Snakefile deleted file mode 100644 index 0c745e6..0000000 --- a/snakemake/nanopore/Snakefile +++ /dev/null @@ -1,67 +0,0 @@ -#################### PREAMBLE -from snakemake.utils import min_version -min_version("6.0") - - -#################### CONFIG -configfile: "configs/snake_config_nanopore.yaml" - - -#################### INCLUDES - - -#################### PARAMS -HEAD_DIR = config["HEAD_DIR"] -SAMPLES = config["SAMPLES"] -DNA_TYPE = config["DNA_TYPE"] -MODEL_PREFIX = config["MODEL_PREFIX"] -MODEL_CALLER = config["MODEL_CALLER"] -MEDIAN_LENGTH = config["MEDIAN_LENGTH"] -SD_LENGTH = config["SD_LENGTH"] -NB_READS = config["NB_READS"] -SEED = config["SEED"] - - -#################### RULES -rule all: - input: - reads = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.nanopore_aligned_reads.fasta", s=SAMPLES), - errors = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.nanopore_aligned_error_profile", s=SAMPLES) - - -rule nanopore_read_simulator: - input: - simu_hap = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.fasta", - model = expand(config["HEAD_DIR"] + "/" + config["MODEL_PREFIX"] + "{f}", f=["_aligned_reads.pkl", "_aligned_region.pkl", "_chimeric_info", "_error_markov_model", "_error_rate.tsv", "_first_match.hist", "_gap_length.pkl", "_ht_length.pkl", "_ht_ratio.pkl", "_match_markov_model", "_model_profile", "_reads_alignment_rate", "_strandness_rate", "_unaligned_length.pkl"]) - output: - reads = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.nanopore_aligned_reads.fasta", - errors = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.nanopore_aligned_error_profile" - params: - dna_type = config["DNA_TYPE"], - model_caller = config["MODEL_CALLER"], - model_prefix = config["MODEL_PREFIX"], - median_length = config["MEDIAN_LENGTH"], - sd_length = config["SD_LENGTH"], - nb_reads = config["NB_READS"], - seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound - threads: - workflow.cores - conda: - config["HEAD_DIR"] + "/env/conda_nanosim.yaml" - log: - config["HEAD_DIR"] + "/logs/nanosim/nanosim.hap{sample}.log" - shell: - """ - simulator.py genome \ - -dna_type {params.dna_type} \ - -rg {input.simu_hap} \ - -c {params.model_prefix} \ - --b {params.model_caller} \ - -med {params.median_length} \ - -sd {params.sd_length} \ - -n {params.nb_reads} \ - -o data/simulated_hap{wildcards.sample}/simulated.nanopore \ - --seed {params.seed} \ - -t {threads} \ - > {log} - """ \ No newline at end of file diff --git a/snakemake/ngs/Snakefile b/snakemake/ngs/Snakefile deleted file mode 100644 index d130147..0000000 --- a/snakemake/ngs/Snakefile +++ /dev/null @@ -1,80 +0,0 @@ -#################### PREAMBLE -from snakemake.utils import min_version -min_version("6.0") - - -#################### CONFIG -configfile: "configs/snake_config_ngs.yaml" - - -#################### INCLUDES - - -#################### PARAMS -HEAD_DIR = config["HEAD_DIR"] -NGS_NB_FRAGS = config["NGS_NB_FRAGS"] -SAMPLES = config["SAMPLES"] -SEED = config["SEED"] - -#################### RULES -rule all: - input: - simulated_read = expand(config["HEAD_DIR"] + "/data/simulated_hap{s}/simulated.ngs.{r}.fastq", s=SAMPLES, r=["R1", "R2"]) - - -rule ngs_read_simulator: - input: - simu_hap = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.fasta" - output: - r1 = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.ngs.R1.fastq", - r2 = config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.ngs.R2.fastq" -# ra = temp(config["HEAD_DIR"] + "/data/simulated_hap{sample}/simulated.bam") - params: - nb_frag = config["NGS_NB_FRAGS"], - seed = lambda w: (SEED * int(w.sample.lstrip('0'))) % 2147483647 # that's (2^31)-1, the upper bound - threads: - workflow.cores - conda: - config["HEAD_DIR"] + "/env/conda_mason.yaml" - log: - config["HEAD_DIR"] + "/logs/mason/mason_simulator.hap{sample}.log" - shell: - """ - mason_simulator \ - -ir {input} \ - -n {params.nb_frag} \ - -o {output.r1} \ - -or {output.r2} \ - --seed {params.seed} \ - --num-threads {threads} \ - --fragment-min-size 450 \ - --fragment-max-size 550 \ - --fragment-mean-size 500 \ - --fragment-size-std-dev 20 \ - --illumina-read-length 150 - """ -# --out-alignment {output.ra} \ - -#rule ngs_read_sort: -# input: -# rules.ngs_simulator.output.ra -# output: -# config["HEAD_DIR"] + "/data/simulated_hap{samples}/simulated.sorted.bam" -# conda: -# config["HEAD_DIR"] + "/env/conda_bwa_and_samtools.yaml" -# shell: -# """ -# samtools sort -o {output} {input} -# """ - -#rule ngs_read_index: -# input: -# rules.ngs_read_sort.output -# output: -# config["HEAD_DIR"] + "/data-ci/simulated_hap{samples}/simulated.sorted.bam.bai" -# conda: -# config["HEAD_DIR"] + "/env/conda_bwa_and_samtools.yaml" -# shell: -# """ -# samtools index {input} -# """ \ No newline at end of file diff --git a/subworkflows/norm_vcf/main.nf b/subworkflows/norm_vcf/main.nf new file mode 100644 index 0000000..0c2834a --- /dev/null +++ b/subworkflows/norm_vcf/main.nf @@ -0,0 +1,18 @@ +// include modules +include { BCFOOTLS_INDEX } from '../../modules/bcftools/index/main.nf' +include { BCFOOTLS_NORM } from '../../modules/bcftools/norm/main.nf' +include { BCFOOTLS_SORT } from '../../modules/bcftools/sort/main.nf' + + +workflow NORM_VCF{ + take: + vcffiles + reference_genome + + main: + BCFOOTLS_NORM(vcffiles,reference_genome) | BCFOOTLS_SORT | BCFOOTLS_INDEX + + emit: + ch_normed_sorted_vcffiles = BCFOOTLS_SORT.out + ch_index_of_vcffiles = BCFOOTLS_INDEX.out +} \ No newline at end of file