Skip to content

Commit

Permalink
Merge pull request #13 from RVanDamme/nf20.07.1
Browse files Browse the repository at this point in the history
Nf20.07.1
  • Loading branch information
RVanDamme authored Aug 11, 2020
2 parents 681a050 + 3096766 commit bf3ef37
Show file tree
Hide file tree
Showing 32 changed files with 215 additions and 180 deletions.
43 changes: 23 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,23 +94,8 @@ nextflow run RVanDamme/MUFFIN --parameters.....
```

### For conda usage
If you use conda you need to install Metawrap in an environment you create yourself, this is due to a known issue that will be fixed soon.

```sh

#create an env and install metawrap
conda create -y -p /path/to/install/metawrap-env python=2.7
conda activate /path/to/install/metawrap-env
conda config --add channels defaults
conda config --add channels conda-forge
conda config --add channels bioconda
conda config --add channels ursky
conda install -y -c ursky metawrap-mg
conda deactivate

#edit MAFIN/modules/metawrap_refine_bin.nf to use the env of metawrap
#you need to change the line 3 and 25 to the path of your env (/path/to/install/metawrap-env)
```
If you use conda, you don't need extra installations.
An error might occur with the installation of metawrap, if so please consult [Troubleshooting](#troubleshooting).

### For gcloud usage
If you use the google lifescience ressources you first need to setup a few parameters.
Expand Down Expand Up @@ -146,10 +131,10 @@ If you desire run on gcloud without the preemptible parameter activated just edi
### For containers usage
If you use containers either docker or singularity, you don't need extra installations
If you use containers either docker or singularity, you don't need extra installations.
### For usage of software installed locally
You just need to have all the software used in the pipeline (see table above) installed and in your $PATH
You just need to have all the software used in the pipeline (see table above) installed and in your $PATH.
## Test the pipeline
To test the pipeline we have a subset of 5 bins available at https://osf.io/9xmh4/
Expand Down Expand Up @@ -228,7 +213,25 @@ If you run "annotate" without "classify" use "--bin_annotate"
## Troubleshooting
* If metawrap fail using conda check that you installed metawrap in a conda environment and put the path in "modules/metawrap_refine_bin.nf"
* If metawrap installation fail while using conda. A known fix is to install metawrap in a conda environment and put the path in "modules/metawrap_refine_bin.nf"
To do so run the following command:
```sh
#create an env and install metawrap
conda create -y -p /path/to/install/metawrap-env python=2.7
conda activate /path/to/install/metawrap-env
conda config --add channels defaults
conda config --add channels conda-forge
conda config --add channels bioconda
conda config --add channels ursky
conda install -y -c ursky metawrap-mg
conda deactivate
#edit MAFIN/modules/metawrap_refine_bin.nf to use the env of metawrap
#you need to change the line 3 and 25 to the path of your env (/path/to/install/metawrap-env)
```
* If you run the pipeline with google life sciences and get error code 14
It means the process was killed by google, you just need to run the pipeline again don't forget to add "-resume"
Expand Down
4 changes: 2 additions & 2 deletions configs/conda.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ process {
withLabel : spades { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::spades=3.13.2'}
withLabel : flye { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::flye=2.7'}
conda = 'bioconda::flye=2.8'}
withLabel : racon { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::racon=1.4.13 '}
withLabel : medaka { cpus = params.cpus ; memory = params.memory
Expand All @@ -28,7 +28,7 @@ process {
withLabel : checkm { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::checkm-genome=1.0.13'}
withLabel : metawrap { cpus = params.cpus ; memory = params.memory;
conda = 'ursky::metawrap-mg=1.2.2'}
conda = 'ursky::metawrap-mg=1.3'}
withLabel : seqtk { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::seqtk=1.3 bioconda::samtools=1.9 '}
withLabel : unicycler { cpus = params.cpus ; memory = params.memory
Expand Down
4 changes: 2 additions & 2 deletions configs/container.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ process {
withLabel: concoct { cpus = 8 ; memory = '30g' ; container = 'nanozoo/concoct:1.1.0--03a3888' }
withLabel: fastp { cpus = 8 ; memory = '30g' ; container = 'nanozoo/fastp:0.20.0--78a7c63' }
withLabel: filtlong { cpus = 8 ; memory = '14g' ; container = 'nanozoo/filtlong:v0.2.0--afa175e' }
withLabel: flye { cpus = 8 ; memory = '30g' ; container = 'nanozoo/flye:2.7--957a1a1' }
withLabel: flye { cpus = 8 ; memory = '30g' ; container = 'nanozoo/flye:2.8--95b6dca' }
withLabel: maxbin2 { cpus = 8 ; memory = '30g' ; container = 'nanozoo/maxbin2:2.2.7--b643a6b' }
withLabel: medaka { cpus = 8 ; memory = '30g' ; container = 'nanozoo/medaka:1.0.3--7c62d67' }
withLabel: metabat2 { cpus = 8 ; memory = '30g' ; container = 'nanozoo/metabat2:2.13--0e2577e' }
withLabel: metawrap { cpus = 24 ; memory = '150g' ; container = 'nanozoo/metawrap:1.2.2--de94241' }
withLabel: metawrap { cpus = 24 ; memory = '150g' ; container = 'nanozoo/metawrap:v1.3--a7eb9af' }
withLabel: minimap2 { cpus = 8 ; memory = '30g' ; container = 'nanozoo/minimap2:2.17--caba7af' }
withLabel: checkm { cpus = 24 ; memory = '150g' ; container = 'nanozoo/checkm:1.0.13--248242f' }
// withLabel: nanoplot { cpus = ; memory = '32g' ; container = 'nanozoo/nanoplot:1.25.0--4e2882f' }
Expand Down
104 changes: 52 additions & 52 deletions main.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl=2
nextflow.enable.dsl=2

start_var = """
start_var = Channel.from("""
*********Start running MUFFIN*********
MUFFIN is a hybrid assembly and differential binning workflow for metagenomics, transcriptomics and pathway analysis.
Expand All @@ -15,8 +15,8 @@ Van Damme R., Hölzer M., Viehweger H., Müller B., Bongcam-Rudloff E., Brandt C
"Metagenomics workflow for hybrid assembly, differential coverage binning, transcriptomics and pathway analysis (MUFFIN)",
doi: https://doi.org/10.1101/2020.02.08.939843
**************************************
"""
println start_var
""")
start_var.view()

if (params.help) { exit 0, helpMSG() }

Expand Down Expand Up @@ -98,7 +98,7 @@ def helpMSG() {
}

if( !nextflow.version.matches('20.+') ) {
println "This workflow requires Nextflow version 19.07 or greater and under version 20 -- You are running version $nextflow.version"
ch_ver=Channel.from("This workflow requires Nextflow version 20.07 or greater -- You are running version $nextflow.version").view()
exit 1
}

Expand Down Expand Up @@ -131,61 +131,61 @@ workflow { //start of the workflow

//module for assemble
if (params.modular=="full" | params.modular=="assemble" | params.modular=="assem-class" | params.modular=="assem-annot") {
include sourmash_download_db from './modules/sourmashgetdatabase'
include checkm_setup_db from './modules/checkmsetupDB'
include checkm_download_db from './modules/checkmgetdatabases'
include discard_short from './modules/ont_qc' params(short_qc : params.short_qc)
include filtlong from './modules/ont_qc' params(short_qc : params.short_qc)
include merge from './modules/ont_qc' params(output : params.output)
include fastp from './modules/fastp' params(output : params.output) // simple QC done by fastp
include spades from './modules/spades' params(output : params.output)
include sourmash_genome_size from './modules/sourmash'
include flye from './modules/flye' params(output : params.output)
include minimap_polish from'./modules/minimap2'
include racon from './modules/polish'
include medaka from './modules/polish' params(model : params.model)
include pilon from './modules/polish' params(output : params.output)
include minimap2 from './modules/minimap2' //mapping for the binning
include extra_minimap2 from './modules/minimap2'
include bwa from './modules/bwa' //mapping for the binning
include extra_bwa from './modules/bwa'
include metabat2_extra from './modules/metabat2' params(output : params.output)
include metabat2 from './modules/metabat2' params(output : params.output)
include maxbin2 from './modules/maxbin2' params(output : params.output)
include concoct_extra from './modules/concoct' params(output : params.output)
include concoct from './modules/concoct' params(output : params.output)
include refine2 from './modules/metawrap_refine_bin' params(output : params.output)
include refine3 from './modules/metawrap_refine_bin' params(output : params.output)
include contig_list from './modules/list_ids'
include cat_all_bins from './modules/cat_all_bins'
include bwa_bin from './modules/bwa'
include minimap2_bin from './modules/minimap2'
include reads_retrieval from './modules/seqtk_retrieve_reads' params(output : params.output)
include unmapped_retrieve from './modules/seqtk_retrieve_reads' params(output : params.output)
include {sourmash_download_db} from './modules/sourmashgetdatabase'
include {checkm_setup_db} from './modules/checkmsetupDB'
include {checkm_download_db} from './modules/checkmgetdatabases'
include {discard_short} from './modules/ont_qc' params(short_qc : params.short_qc)
include {filtlong} from './modules/ont_qc' params(short_qc : params.short_qc)
include {merge} from './modules/ont_qc' params(output : params.output)
include {fastp} from './modules/fastp' params(output : params.output) // simple QC done by fastp
include {spades} from './modules/spades' params(output : params.output)
//include {sourmash_genome_size} from './modules/sourmash' deprecated by flye 2.8
include {flye} from './modules/flye' params(output : params.output)
include {minimap_polish} from'./modules/minimap2'
include {racon} from './modules/polish'
include {medaka} from './modules/polish' params(model : params.model)
include {pilon} from './modules/polish' params(output : params.output)
include {minimap2} from './modules/minimap2' //mapping for the binning
include {extra_minimap2} from './modules/minimap2'
include {bwa} from './modules/bwa' //mapping for the binning
include {extra_bwa} from './modules/bwa'
include {metabat2_extra} from './modules/metabat2' params(output : params.output)
include {metabat2} from './modules/metabat2' params(output : params.output)
include {maxbin2} from './modules/maxbin2' params(output : params.output)
include {concoct_extra} from './modules/concoct' params(output : params.output)
include {concoct} from './modules/concoct' params(output : params.output)
include {refine2} from './modules/metawrap_refine_bin' params(output : params.output)
include {refine3} from './modules/metawrap_refine_bin' params(output : params.output)
include {contig_list} from './modules/list_ids'
include {cat_all_bins} from './modules/cat_all_bins'
include {bwa_bin} from './modules/bwa'
include {minimap2_bin} from './modules/minimap2'
include {reads_retrieval} from './modules/seqtk_retrieve_reads' params(output : params.output)
include {unmapped_retrieve} from './modules/seqtk_retrieve_reads' params(output : params.output)
//include unicycler './modules/unicycler_reassemble_from_bin' params(output : params.output)
}
//module for classify
if (params.modular=="full" | params.modular=="classify" | params.modular=="assem-class" | params.modular=="class-annot") {
include checkm from './modules/checkm'params(output : params.output)
include sourmash_bins from './modules/sourmash'params(output : params.output)
include sourmash_checkm_parser from './modules/checkm_sourmash_parser'params(output: params.output)
include {checkm} from './modules/checkm'params(output : params.output)
include {sourmash_bins} from './modules/sourmash'params(output : params.output)
include {sourmash_checkm_parser} from './modules/checkm_sourmash_parser'params(output: params.output)
}
if (params.modular=="classify" | params.modular=="class-annot") {
include sourmash_download_db from './modules/sourmashgetdatabase'
include checkm_setup_db from './modules/checkmsetupDB'
include checkm_download_db from './modules/checkmgetdatabases'
include {sourmash_download_db} from './modules/sourmashgetdatabase'
include {checkm_setup_db} from './modules/checkmsetupDB'
include {checkm_download_db} from './modules/checkmgetdatabases'
}
//module for annotate
if (params.modular=="full" | params.modular=="annotate" | params.modular=="assem-annot" | params.modular=="class-annot") {
include eggnog_download_db from './modules/eggnog_get_databases'
include eggnog_bin from './modules/eggnog'params(output : params.output)
include fastp_rna from './modules/fastp'params(output : params.output)
include de_novo_transcript_and_quant from './modules/trinity_and_salmon'params(output : params.output)
include eggnog_rna from './modules/eggnog'params(output : params.output)
include parser_bin_RNA from './modules/parser'params(output: params.output)
include parser_bin from './modules/parser'params(output: params.output)
include {eggnog_download_db} from './modules/eggnog_get_databases'
include {eggnog_bin} from './modules/eggnog'params(output : params.output)
include {fastp_rna} from './modules/fastp'params(output : params.output)
include {de_novo_transcript_and_quant} from './modules/trinity_and_salmon'params(output : params.output)
include {eggnog_rna} from './modules/eggnog'params(output : params.output)
include {parser_bin_RNA} from './modules/parser'params(output: params.output)
include {parser_bin} from './modules/parser'params(output: params.output)
}
include readme_output from './modules/readme_output'params(output: params.output)
include {readme_output} from './modules/readme_output'params(output: params.output)

//*************************************************
// STEP 1 Assemble using hybrid method
Expand All @@ -199,7 +199,7 @@ workflow { //start of the workflow

// DATA INPUT TEST
if (workflow.profile.contains('test')) {
include test from './modules/test_data_dll'
include {test} from './modules/test_data_dll'
test()
illumina_input_ch = test.out[0]
ont_input_ch = test.out[1]
Expand Down Expand Up @@ -304,7 +304,7 @@ workflow { //start of the workflow

if (params.assembler=="metaflye") { // metagenomic assembly by flye + hybrid polishing (combo racon; medaka; pilon with short reads)
// FLYE + Pilon
flye(sourmash_genome_size(ont_input_ch,database_sourmash))
flye(ont_input_ch)
flye_to_map = flye.out.join(ont_input_ch)
minimap_polish(flye_to_map)
map_to_racon = ont_input_ch.join(flye.out).join(minimap_polish.out)
Expand Down
12 changes: 6 additions & 6 deletions modules/bwa.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ process bwa {
label 'bwa'
//publishDir "${params.output}/${name}_bam/", mode: 'copy', pattern: "illumina.bam"
//SINCE THIS module is use multiple times it migh not be advise to output the same name file mutiple times
errorStrategy { task.exitStatus in 14..14 ? 'retry' : 'finish'}
maxRetries 3
errorStrategy = { task.exitStatus==14 ? 'retry' : 'terminate' }
maxRetries = 5
input:
tuple val(name), path(assembly), path(illumina)
output:
Expand All @@ -20,8 +20,8 @@ process bwa {

process extra_bwa {
label 'bwa'
errorStrategy { task.exitStatus in 14..14 ? 'retry' : 'finish'}
maxRetries 3
errorStrategy = { task.exitStatus==14 ? 'retry' : 'terminate' }
maxRetries = 5
//publishDir "${params.output}/${name}_bam/", mode: 'copy', pattern: "illumina.bam"
//SINCE THIS module is use multiple times it migh not be advise to output the same name file mutiple times
input:
Expand All @@ -40,8 +40,8 @@ process extra_bwa {

process bwa_bin {
label 'bwa'
errorStrategy { task.exitStatus in 14..14 ? 'retry' : 'finish'}
maxRetries 3
errorStrategy = { task.exitStatus==14 ? 'retry' : 'terminate' }
maxRetries = 5
//publishDir "${params.output}/${name}_bam/", mode: 'copy', pattern: "illumina.bam"
//SINCE THIS module is use multiple times it migh not be advise to output the same name file mutiple times
input:
Expand Down
2 changes: 2 additions & 0 deletions modules/cat_all_bins.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
process cat_all_bins {
label 'ubuntu'
errorStrategy = { task.exitStatus==14 ? 'retry' : 'terminate' }
maxRetries = 5
input:
tuple val(name), path(bins)
output:
Expand Down
4 changes: 2 additions & 2 deletions modules/checkm.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ process checkm {
publishDir "${params.output}/${name}/classify/checkm/", mode: 'copy', pattern: "taxonomy.txt"
publishDir "${params.output}/${name}/classify/checkm/", mode: 'copy', pattern: "*_checkm"
publishDir "${params.output}/${name}/classify/checkm/", mode: 'copy', pattern: "*_checkm_plot"
errorStrategy { task.exitStatus in 14..14 ? 'retry' : 'finish'}
maxRetries 3
errorStrategy = { task.exitStatus==14 ? 'retry' : 'terminate' }
maxRetries = 5
input:
tuple val(name), path(bins_assemblies)
output:
Expand Down
2 changes: 2 additions & 0 deletions modules/checkm_sourmash_parser.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ process sourmash_checkm_parser {
//label 'python38'
label 'ubuntu'
publishDir "${params.output}/${name}/classify/", mode: 'copy', pattern: "classify_step_summary.csv"
errorStrategy = { task.exitStatus==14 ? 'retry' : 'terminate' }
maxRetries = 5
input:
tuple val(name), path(checkm)
path(sourmash)
Expand Down
22 changes: 12 additions & 10 deletions modules/checkmgetdatabases.nf
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
process checkm_download_db {

if (workflow.profile.contains('gcloud')) {publishDir 'gs://gcloud_storage/databases-nextflow/checkm', mode: 'copy', pattern: "checkm_data_2015_01_16.tar.gz"}
else { storeDir 'nextflow-autodownload-databases/checkm' }
label 'ubuntu'
output:
path("checkm_data_2015_01_16.tar.gz")
script:
"""
wget https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz
"""
}
if (workflow.profile.contains('gcloud')) {publishDir 'gs://gcloud_storage/databases-nextflow/checkm', mode: 'copy', pattern: "checkm_data_2015_01_16.tar.gz"}
else { storeDir 'nextflow-autodownload-databases/checkm' }
label 'ubuntu'
errorStrategy = { task.exitStatus==14 ? 'retry' : 'terminate' }
maxRetries = 5
output:
path("checkm_data_2015_01_16.tar.gz")
script:
"""
wget https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz
"""
}
2 changes: 2 additions & 0 deletions modules/checkmsetupDB.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
process checkm_setup_db {
label 'checkm'
errorStrategy = { task.exitStatus==14 ? 'retry' : 'terminate' }
maxRetries = 5
input:
val(db)
val(untar)
Expand Down
Loading

0 comments on commit bf3ef37

Please sign in to comment.