Skip to content

Commit

Permalink
Merge pull request #57 from Eco-Flow/new_input_validation
Browse files Browse the repository at this point in the history
Fixed genome only option not working
  • Loading branch information
FernandoDuarteF authored Nov 6, 2024
2 parents 6613139 + c56b5e6 commit 49a21b3
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 107 deletions.
10 changes: 10 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
3 changes: 1 addition & 2 deletions modules/local/create_path.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ process CREATE_PATH {
tuple val(meta), val(accession)

output:
val meta , emit: meta
path "${meta.id}.txt" , emit: accession
tuple val (meta), path("${meta.id}.txt"), emit: accession

when:
task.ext.when == null || task.ext.when
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ params {
groups = 'all'

// merqury/meryl options
merqury_skip = false
skip_merqury = false
kvalue = 21

// BUSCO options
Expand Down
20 changes: 10 additions & 10 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$schema": "https://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/ecoflow/genomeqc/master/nextflow_schema.json",
"title": "ecoflow/genomeqc pipeline parameters",
"description": "A pipeline to compare multiple genomes and annotations",
Expand Down Expand Up @@ -304,21 +304,21 @@
"description": "A path to a BUSCO config file (optional)"
},
"genome_only": {
"type": "string"
"type": "boolean",
"description": "Run genomeqc on genomes only"
},
"kvalue": {
"type": "integer",
"default": 21,
"description": "k size for meryl (merqury)"
"type": "integer",
"default": 21,
"description": "k size for meryl (merqury)"
},
"merqury_skip": {
"type": "boolean",
"default": true,
"description": "Skip meryl/merqury step?"
"skip_merqury": {
"type": "boolean",
"description": "Skip meryl/merqury step?"
},
"skip_tidk": {
"type": "boolean",
"description": "Do not run TIDK.",
"description": "Do not run TIDK",
"hidden": true,
"help_text": "You may wish to turn off the tidk subworkflow"
}
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/genome.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ workflow GENOME {

BUSCO_BUSCO (
ch_fasta,
"genome", // hard coded, other options ('prteins', 'transcriptome') make no sense
"genome", // hardcoded, other options ('proteins', 'transcriptome') make no sense
params.busco_lineage,
params.busco_lineages_path ?: [],
params.busco_config ?: []
Expand Down
52 changes: 15 additions & 37 deletions subworkflows/local/genome_and_annotation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ include { LONGEST } from '../../modules/local/longes
include { BUSCO_BUSCO } from '../../modules/nf-core/busco/busco/main'
include { QUAST } from '../../modules/nf-core/quast/main'
include { AGAT_SPSTATISTICS } from '../../modules/nf-core/agat/spstatistics/main'
//include { GFFREAD } from '../../modules/nf-core/gffread/main'
include { GFFREAD } from '../../modules/local/gffread'
include { ORTHOFINDER } from '../../modules/nf-core/orthofinder/main'

Expand All @@ -17,6 +16,7 @@ workflow GENOME_AND_ANNOTATION {
main:

ch_versions = Channel.empty()

// For tree plot
ch_tree_data = Channel.empty()

Expand All @@ -41,44 +41,12 @@ workflow GENOME_AND_ANNOTATION {
ch_tree_data = ch_tree_data.mix(QUAST.out.tsv.map { tuple -> tuple[1] })

//
// Run AGAT Spstatistics
// Run GFFREAD
//

AGAT_SPSTATISTICS (
ch_agat_gff
)
ch_versions = ch_versions.mix(AGAT_SPSTATISTICS.out.versions.first())

//
// Run AGAT longest isoform
//

// LONGEST (
// ch_ch_agat_gff
// )
// ch_versions = ch_versions.mix(LONGEST.out.versions.first())
//
// //
// // Run GFFREAD
// //
//
// ch_long_gff = LONGEST.out.longest_proteins
//
inputChannel = ch_agat_gff.combine(ch_fasta, by: 0)

// Split the input channel into two channels
gffChannel = inputChannel.map { tuple ->
// Extracting the GFF path and ID
[tuple[0], tuple[1]]
}
fnaChannel = inputChannel.map { tuple ->
// Extracting only the FNA path
[tuple[0], tuple[2]]
}

GFFREAD (
fnaChannel,
gffChannel
ch_fasta,
ch_gff
)
ch_versions = ch_versions.mix(GFFREAD.out.versions.first())

Expand All @@ -87,7 +55,7 @@ workflow GENOME_AND_ANNOTATION {
//

ortho_ch = GFFREAD.out.longest.collect().map { it -> [[id:"orthofinder"], it] }

ORTHOFINDER (
ortho_ch,
[[],[]]
Expand All @@ -109,6 +77,16 @@ workflow GENOME_AND_ANNOTATION {

ch_tree_data = ch_tree_data.mix(BUSCO_BUSCO.out.batch_summary.collect { meta, file -> file })

//
// Run AGAT Spstatistics
//

AGAT_SPSTATISTICS (
ch_gff
)
ch_versions = ch_versions.mix(AGAT_SPSTATISTICS.out.versions.first())


emit:
orthofinder = ORTHOFINDER.out.orthofinder // channel: [ val(meta), [folder] ]
//busco = BUSCO_BUSCO.out.batch_summary.collect { meta, file -> file }
Expand Down
31 changes: 17 additions & 14 deletions subworkflows/local/utils_nfcore_genomeqc_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -149,22 +149,25 @@ def validateInputParameters() {
def validateInputSamplesheet(input) {
def (meta, refseq, fasta, gff, fastq) = input
// As for now, there are only two input options: RefSeq ID or local files. The pipeline will throw an error if the sample sheet does not contain the proper information
// For the RefSeq ID option
if ( meta && refseq && !fasta && !gff ) {
return [ meta, refseq, fastq ]
// For the local files option
} else if ( meta && !refseq && fasta && gff) {
return [ meta, fasta, gff, fastq ]
// If --genome_only parameter
// Check for genome-only mode
if (params.genome_only) {
if (meta && refseq && !fasta && !gff) {
return [meta, refseq, fastq]
} else if (meta && !refseq && fasta) {
return [meta, fasta, gff, fastq] // Empty or not gff, either way won't be used
} else {
error("You are running in --genome_only mode. Please check input samplesheet -> Incorrect samplesheet format")
}
} else {
error("Please check input samplesheet -> Incorrent samplesheet format")
if (meta && refseq && !fasta && !gff) {
return [ meta, refseq, fastq ]
} else if ( meta && !refseq && fasta && gff ) {
return [ meta, fasta, gff, fastq ]
} else {
error("You are running on default mode. Please check input samplesheet -> Incorrent samplesheet format")
}
}
// Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
//def endedness_ok = metas.collect{ it.single_end }.unique().size == 1
//if (!endedness_ok) {
// error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}")
//}

//return [ metas[0], fastqs ]
}
//
// Get attribute from genome config file e.g. fasta
Expand Down
Loading

0 comments on commit 49a21b3

Please sign in to comment.