Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed genome only option not working #57

Merged
merged 10 commits into from
Nov 6, 2024
10 changes: 10 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
3 changes: 1 addition & 2 deletions modules/local/create_path.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ process CREATE_PATH {
tuple val(meta), val(accession)

output:
val meta , emit: meta
path "${meta.id}.txt" , emit: accession
tuple val (meta), path("${meta.id}.txt"), emit: accession

when:
task.ext.when == null || task.ext.when
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ params {
groups = 'all'

// merqury/meryl options
merqury_skip = false
skip_merqury = false
kvalue = 21

// BUSCO options
Expand Down
20 changes: 10 additions & 10 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$schema": "https://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/ecoflow/genomeqc/master/nextflow_schema.json",
"title": "ecoflow/genomeqc pipeline parameters",
"description": "A pipeline to compare multiple genomes and annotations",
Expand Down Expand Up @@ -304,21 +304,21 @@
"description": "A path to a BUSCO config file (optional)"
},
"genome_only": {
"type": "string"
"type": "boolean",
"description": "Run genomeqc on genomes only"
},
"kvalue": {
"type": "integer",
"default": 21,
"description": "k size for meryl (merqury)"
"type": "integer",
"default": 21,
"description": "k size for meryl (merqury)"
},
"merqury_skip": {
"type": "boolean",
"default": true,
"description": "Skip meryl/merqury step?"
"skip_merqury": {
"type": "boolean",
"description": "Skip meryl/merqury step?"
},
"skip_tidk": {
"type": "boolean",
"description": "Do not run TIDK.",
"description": "Do not run TIDK",
"hidden": true,
"help_text": "You may wish to turn off the tidk subworkflow"
}
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/genome.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ workflow GENOME {

BUSCO_BUSCO (
ch_fasta,
"genome", // hard coded, other options ('prteins', 'transcriptome') make no sense
"genome", // hardcoded, other options ('proteins', 'transcriptome') make no sense
params.busco_lineage,
params.busco_lineages_path ?: [],
params.busco_config ?: []
Expand Down
52 changes: 15 additions & 37 deletions subworkflows/local/genome_and_annotation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ include { LONGEST } from '../../modules/local/longes
include { BUSCO_BUSCO } from '../../modules/nf-core/busco/busco/main'
include { QUAST } from '../../modules/nf-core/quast/main'
include { AGAT_SPSTATISTICS } from '../../modules/nf-core/agat/spstatistics/main'
//include { GFFREAD } from '../../modules/nf-core/gffread/main'
include { GFFREAD } from '../../modules/local/gffread'
include { ORTHOFINDER } from '../../modules/nf-core/orthofinder/main'

Expand All @@ -17,6 +16,7 @@ workflow GENOME_AND_ANNOTATION {
main:

ch_versions = Channel.empty()

// For tree plot
ch_tree_data = Channel.empty()

Expand All @@ -41,44 +41,12 @@ workflow GENOME_AND_ANNOTATION {
ch_tree_data = ch_tree_data.mix(QUAST.out.tsv.map { tuple -> tuple[1] })

//
// Run AGAT Spstatistics
// Run GFFREAD
//

AGAT_SPSTATISTICS (
ch_agat_gff
)
ch_versions = ch_versions.mix(AGAT_SPSTATISTICS.out.versions.first())

//
// Run AGAT longest isoform
//

// LONGEST (
// ch_ch_agat_gff
// )
// ch_versions = ch_versions.mix(LONGEST.out.versions.first())
//
// //
// // Run GFFREAD
// //
//
// ch_long_gff = LONGEST.out.longest_proteins
//
inputChannel = ch_agat_gff.combine(ch_fasta, by: 0)

// Split the input channel into two channels
gffChannel = inputChannel.map { tuple ->
// Extracting the GFF path and ID
[tuple[0], tuple[1]]
}
fnaChannel = inputChannel.map { tuple ->
// Extracting only the FNA path
[tuple[0], tuple[2]]
}

GFFREAD (
fnaChannel,
gffChannel
ch_fasta,
ch_gff
)
ch_versions = ch_versions.mix(GFFREAD.out.versions.first())

Expand All @@ -87,7 +55,7 @@ workflow GENOME_AND_ANNOTATION {
//

ortho_ch = GFFREAD.out.longest.collect().map { it -> [[id:"orthofinder"], it] }

ORTHOFINDER (
ortho_ch,
[[],[]]
Expand All @@ -109,6 +77,16 @@ workflow GENOME_AND_ANNOTATION {

ch_tree_data = ch_tree_data.mix(BUSCO_BUSCO.out.batch_summary.collect { meta, file -> file })

//
// Run AGAT Spstatistics
//

AGAT_SPSTATISTICS (
ch_gff
)
ch_versions = ch_versions.mix(AGAT_SPSTATISTICS.out.versions.first())


emit:
orthofinder = ORTHOFINDER.out.orthofinder // channel: [ val(meta), [folder] ]
//busco = BUSCO_BUSCO.out.batch_summary.collect { meta, file -> file }
Expand Down
31 changes: 17 additions & 14 deletions subworkflows/local/utils_nfcore_genomeqc_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -149,22 +149,25 @@ def validateInputParameters() {
def validateInputSamplesheet(input) {
def (meta, refseq, fasta, gff, fastq) = input
// As for now, there are only two input options: RefSeq ID or local files. The pipeline will throw an error if the sample sheet does not contain the proper information
// For the RefSeq ID option
if ( meta && refseq && !fasta && !gff ) {
return [ meta, refseq, fastq ]
// For the local files option
} else if ( meta && !refseq && fasta && gff) {
return [ meta, fasta, gff, fastq ]
// If --genome_only parameter
// Check for genome-only mode
if (params.genome_only) {
if (meta && refseq && !fasta && !gff) {
return [meta, refseq, fastq]
} else if (meta && !refseq && fasta) {
return [meta, fasta, gff, fastq] // Empty or not gff, either way won't be used
} else {
error("You are running in --genome_only mode. Please check input samplesheet -> Incorrect samplesheet format")
}
} else {
error("Please check input samplesheet -> Incorrent samplesheet format")
if (meta && refseq && !fasta && !gff) {
return [ meta, refseq, fastq ]
} else if ( meta && !refseq && fasta && gff ) {
return [ meta, fasta, gff, fastq ]
} else {
error("You are running on default mode. Please check input samplesheet -> Incorrent samplesheet format")
}
}
// Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
//def endedness_ok = metas.collect{ it.single_end }.unique().size == 1
//if (!endedness_ok) {
// error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}")
//}

//return [ metas[0], fastqs ]
}
//
// Get attribute from genome config file e.g. fasta
Expand Down
Loading