diff --git a/assets/samplesheet_s3.csv b/assets/samplesheet_s3.csv index 5089aed..b409060 100644 --- a/assets/samplesheet_s3.csv +++ b/assets/samplesheet_s3.csv @@ -1,6 +1,6 @@ sample,datatype,datafile,library mMelMel1,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/genomic_data/mMelMel1/illumina/31231_3%231.subset.cram, -mMelMel2,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/genomic_data/mMelMel2/illumina/31231_4%231.subset.cram, +mMelMel2,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/genomic_data/mMelMel2/illumina/31231_4%231.subset.fastq.gz, mMelMel3,hic,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/genomic_data/mMelMel3/hic/35528_2%231.subset.cram, mMelMel3,ont,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/genomic_data/mMelMel3/ont/PAE35587_pass_1f1f0707_115.subset.fastq.gz, mMelMel3,pacbio,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/genomic_data/mMelMel3/pacbio/m64094_200910_173211.ccs.bc1022_BAK8B_OA--bc1022_BAK8B_OA.subset.bam, diff --git a/docs/usage.md b/docs/usage.md index 8c6923d..dc201ef 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -42,7 +42,7 @@ sample1_T5,pacbio,pacbio2.bam,pacbio2 | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (\_). | | `datatype` | Type of sequencing data. Must be one of `hic`, `Illumina`, `pacbio`, or `ont`. | -| `datafile` | Full path to read data file. Must be `bam` or `cram` for `hic` and `illumina`. Must be `bam` for `pacbio`. Must be `fastq.gz` or `fq.gz` for `ont`. | +| `datafile` | Full path to read data file. Must be `bam` or `cram` or `fastq.gz` or `fq.gz` for `Illumina` and `HiC`. Must be `bam` for `pacbio`. Must be `fastq.gz` or `fq.gz` for `ont`. | | `library` | (Optional) The library value is a unique identifier which is assigned to read group (`@RG`) ID. If the library name is not specified, the pipeline will auto-create library name using the data filename provided in the samplesheet. | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. diff --git a/subworkflows/local/align_short.nf b/subworkflows/local/align_short.nf index 33c27b6..f318606 100644 --- a/subworkflows/local/align_short.nf +++ b/subworkflows/local/align_short.nf @@ -18,14 +18,28 @@ workflow ALIGN_SHORT { main: ch_versions = Channel.empty() + // Check file types and branch + reads + | branch { + meta, reads -> + fastq : reads.findAll { it.getName().toLowerCase() =~ /.*f.*\.gz/ } + cram : true + } + | set { ch_reads } + - // Convert from CRAM to FASTQ - SAMTOOLS_FASTQ ( reads, false ) + // Convert from CRAM to FASTQ only if CRAM files were provided as input + SAMTOOLS_FASTQ ( ch_reads.cram, false ) ch_versions = ch_versions.mix ( SAMTOOLS_FASTQ.out.versions.first() ) + + + SAMTOOLS_FASTQ.out.fastq + | mix ( ch_reads.fastq ) + | set { ch_reads_fastq } - // Align Fastq to Genome and output sorted BAM - BWAMEM2_MEM ( SAMTOOLS_FASTQ.out.fastq, index, true ) + // Align Fastq to Genome and output sorted BAM + BWAMEM2_MEM ( ch_reads_fastq, index, true ) ch_versions = ch_versions.mix ( BWAMEM2_MEM.out.versions.first() ) diff --git a/workflows/readmapping.nf b/workflows/readmapping.nf index 18ebd36..2910f52 100644 --- a/workflows/readmapping.nf +++ b/workflows/readmapping.nf @@ -112,7 +112,7 @@ workflow READMAPPING { // ALIGN_HIC ( PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.bwaidx, ch_reads.hic ) ch_versions = ch_versions.mix ( ALIGN_HIC.out.versions ) - + ALIGN_ILLUMINA ( PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.bwaidx, ch_reads.illumina ) ch_versions = ch_versions.mix ( ALIGN_ILLUMINA.out.versions )