From bd5f75ccaf2345269810e66e85de8a70e4de8764 Mon Sep 17 00:00:00 2001 From: Sateesh_Peri <33637490+sateeshperi@users.noreply.github.com> Date: Sat, 30 Nov 2024 11:59:38 +0530 Subject: [PATCH] Fastq align dedup bwameth (#7007) * init bwameth subworkflow * update output channels * bwameth single, paired-end default, skip_dedup tests * add GPU tests * separate GPU tests * add subworkflow GPU test path to test.yml include * add subworkflow GPU test path to test.yml include * add subworkflow GPU test path to test.yml include * use more descriptive collect variables than it * rename file to be explicit * separate profile exclusions * add gpu tag * rm old test.yml * add gpu test path to gpu-tests.yml:ci * Added log_level: DEBUG * Setup CI for debug * add more debug steps * usman's fix for exclude tags * Updated fail condition * ci sync to master * fix filtering by usman * Removed --changed-since when tags are supplied * Enabled more checks and added a non-gpu module * Two fixes * Now using paths * Fixed paths * Fixed typo * Now pass all paths instead of matrix * Added confirm pass, cleaned dispatch and removed DEBUG * Renamed all passes to confirm-pass * Now using adamrtalbot/detect-nf-test-changes * small ch format fix --------- Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Co-authored-by: Usman Rashid --- .github/actions/nf-test-action/action.yml | 11 +- .github/workflows/gpu-tests.yml | 43 ++-- .github/workflows/nf-test.yml | 91 +++++-- modules/nf-core/gffread/tests/main.nf.test | 3 +- .../nf-core/fastq_align_dedup_bwameth/main.nf | 163 +++++++++++++ .../fastq_align_dedup_bwameth/meta.yml | 116 +++++++++ .../tests/gpu.nf.test | 132 +++++++++++ .../tests/gpu.nf.test.snap | 149 ++++++++++++ .../tests/main.nf.test | 179 ++++++++++++++ .../tests/main.nf.test.snap | 224 ++++++++++++++++++ .../tests/nextflow.config | 14 ++ 11 files changed, 1078 insertions(+), 47 deletions(-) create mode 100644 subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf create mode 100644 subworkflows/nf-core/fastq_align_dedup_bwameth/meta.yml create mode 100644 subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test create mode 100644 subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test.snap create mode 100644 subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test create mode 100644 subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/fastq_align_dedup_bwameth/tests/nextflow.config diff --git a/.github/actions/nf-test-action/action.yml b/.github/actions/nf-test-action/action.yml index 8cd79f21680..a6f2532f926 100644 --- a/.github/actions/nf-test-action/action.yml +++ b/.github/actions/nf-test-action/action.yml @@ -10,9 +10,9 @@ inputs: total_shards: description: "Total number of test shards(NOT the total number of matrix jobs)" required: true - tags: - description: "Tags to test (`[,...]`)" - required: false + paths: + description: "Test paths" + required: true runs: using: "composite" @@ -72,7 +72,6 @@ runs: env: SENTIEON_LICSRVR_IP: ${{ env.SENTIEON_LICSRVR_IP }} SENTIEON_AUTH_MECH: "GitHub Actions - token" - TAGS: ${{ inputs.tags && format('--tag {0}', inputs.tags) || '' }} run: | NFT_WORKDIR=~ \ nf-test test \ @@ -80,11 +79,9 @@ runs: --tap=test.tap \ --verbose \ --ci \ - --changed-since HEAD^ \ --shard ${{ inputs.shard }}/${{ inputs.total_shards }} \ --filter process,workflow \ - --follow-dependencies \ - ${{ env.TAGS }} + ${{ inputs.paths }} # TODO If no test.tap, then make one to spoof? - uses: pcolby/tap-summary@0959cbe1d4422e62afc65778cdaea6716c41d936 # v1 diff --git a/.github/workflows/gpu-tests.yml b/.github/workflows/gpu-tests.yml index 34206298769..78667065a3b 100644 --- a/.github/workflows/gpu-tests.yml +++ b/.github/workflows/gpu-tests.yml @@ -6,9 +6,6 @@ on: - "renovate/**" # branches Renovate creates pull_request: branches: [master] - paths: - - ".github/workflows/gpu-tests.yml" - - "modules/nf-core/parabricks/**" merge_group: types: [checks_requested] branches: [master] @@ -16,11 +13,8 @@ on: inputs: runners: description: "Runners to test on" - type: choice - options: - - "ubuntu-latest" - - "self-hosted" - default: "self-hosted" + type: string + default: "gpu" # Cancel if a newer run is started concurrency: @@ -43,7 +37,7 @@ jobs: runs-on: ubuntu-latest outputs: # Expose detected tags as 'modules' and 'workflows' output variables - paths: ${{ steps.outputs.outputs.components }} + paths: ${{ steps.list.outputs.components }} modules: ${{ steps.outputs.outputs.modules }} subworkflows: ${{ steps.outputs.outputs.subworkflows}} # Prod for version bumping @@ -60,11 +54,11 @@ jobs: - name: List nf-test files id: list - uses: adamrtalbot/detect-nf-test-changes@6bf6fd9fe0fb63a0362fb0e09de5acb6d055a754 # v0.0.5 + uses: adamrtalbot/detect-nf-test-changes@de3c3c8e113031b4f15a3c1104b5f135e8346997 # v0.0.6 with: head: ${{ github.sha }} base: ${{ github.event.pull_request.base.sha || github.event.merge_group.base_sha }} - n_parents: 2 + n_parents: 0 tags: "gpu" - name: Separate modules and subworkflows @@ -74,21 +68,20 @@ jobs: echo subworkflows=$(echo '${{ steps.list.outputs.components }}' | jq '. | map(select(contains("subworkflows"))) | map(gsub("subworkflows/nf-core/"; ""))') >> $GITHUB_OUTPUT - name: debug run: | - echo ${{ steps.outputs.outputs.components }} + echo ${{ steps.list.outputs.components }} echo ${{ steps.outputs.outputs.modules }} echo ${{ steps.outputs.outputs.subworkflows }} nf-test-gpu: runs-on: "gpu" + name: "GPU | ${{ matrix.profile }} | ${{ matrix.shard }}" needs: nf-test-changes - if: ${{ fromJSON(needs.nf-test-changes.outputs.paths) != '[]' || needs.nf-test-changes.outputs.paths != '' }} - name: "GPU | ${{ matrix.tags}} | ${{ matrix.profile }} | ${{ matrix.shard }}" + if: ${{ needs.nf-test-changes.outputs.modules != '[]' || needs.nf-test-changes.outputs.subworkflows != '[]' }} strategy: fail-fast: false matrix: shard: [1, 2] profile: [docker_self_hosted, singularity] # conda? - tags: ${{ fromJSON(needs.nf-test-changes.outputs.modules) && fromJSON(needs.nf-test-changes.outputs.subworkflows) }} env: NXF_ANSI_LOG: false TOTAL_SHARDS: 2 @@ -108,4 +101,22 @@ jobs: profile: ${{ matrix.profile }},gpu shard: ${{ matrix.shard }} total_shards: ${{ env.TOTAL_SHARDS }} - tags: ${{matrix.tags}},gpu + paths: "${{ join(fromJson(needs.nf-test-changes.outputs.paths), ' ') }}" + + confirm-pass: + runs-on: ubuntu-latest + needs: [nf-test-gpu] + if: always() + steps: + - name: All tests ok + if: ${{ success() || !contains(needs.*.result, 'failure') }} + run: exit 0 + - name: One or more tests failed + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: debug-print + if: always() + run: | + echo "toJSON(needs) = ${{ toJSON(needs) }}" + echo "toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index 5445a444531..f8c71275bf9 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -35,45 +35,90 @@ env: NXF_VER: "24.10.1" jobs: + nf-test-changes: + name: nf-test-changes + runs-on: ubuntu-latest + outputs: + # Expose detected tags as 'modules' and 'workflows' output variables + paths: ${{ steps.list.outputs.components }} + modules: ${{ steps.outputs.outputs.modules }} + subworkflows: ${{ steps.outputs.outputs.subworkflows}} + # Prod for version bumping + steps: + - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner + run: | + ls -la ./ + rm -rf ./* || true + rm -rf ./.??* || true + ls -la ./ + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 + + - name: List nf-test files + id: list + uses: adamrtalbot/detect-nf-test-changes@de3c3c8e113031b4f15a3c1104b5f135e8346997 # v0.0.6 + with: + head: ${{ github.sha }} + base: ${{ github.event.pull_request.base.sha || github.event.merge_group.base_sha }} + n_parents: 0 + exclude_tags: "gpu" + + - name: Separate modules and subworkflows + id: outputs + run: | + echo modules=$(echo '${{ steps.list.outputs.components }}' | jq -c '. | map(select(contains("modules"))) | map(gsub("modules/nf-core/"; ""))') >> $GITHUB_OUTPUT + echo subworkflows=$(echo '${{ steps.list.outputs.components }}' | jq '. | map(select(contains("subworkflows"))) | map(gsub("subworkflows/nf-core/"; ""))') >> $GITHUB_OUTPUT + - name: debug + run: | + echo ${{ steps.list.outputs.components }} + echo ${{ steps.outputs.outputs.modules }} + echo ${{ steps.outputs.outputs.subworkflows }} nf-test: runs-on: ${{ github.event.inputs.runners || 'self-hosted' }} - # NOTE I think this is the cleanest way to get them organized - # process | conda | 1 - # process | conda | 2 - # process | conda | 3 - # process | docker_self_hosted | 1 - # ... - # workflow | singularity | 3 name: "${{ matrix.profile }} | ${{ matrix.shard }}" - # TODO - # needs: get-number-of-shards - # if: ${{ fromJSON(needs.get-number-of-shards.outputs.shards) != fromJSON('["1", "0"]') }} + needs: nf-test-changes + if: ${{ needs.nf-test-changes.outputs.modules != '[]' || needs.nf-test-changes.outputs.subworkflows != '[]' }} strategy: fail-fast: false matrix: - # NOTE We could split these, but there's probably going to be more process tests than workflow tests, so we're just going to combine them all and bump up the shards for now - # NOTE The name of the test would be name: "${{ matrix.filter }} | ${{ matrix.profile }} | ${{ matrix.shard }}" - # filter: [process, workflow] - profile: [conda, docker_self_hosted, singularity] shard: [1, 2, 3, 4, 5] + profile: [conda, docker_self_hosted, singularity] env: - # FIXME Bumping them up to make the transition smooth, then we can throttle them back + NXF_ANSI_LOG: false TOTAL_SHARDS: 5 - SENTIEON_LICENSE_MESSAGE: ${{ secrets.SENTIEON_LICENSE_MESSAGE }} - SENTIEON_ENCRYPTION_KEY: ${{ secrets.SENTIEON_ENCRYPTION_KEY }} + steps: - - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner - run: | - ls -la ./ - rm -rf ./* || true - rm -rf ./.??* || true - ls -la ./ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 with: fetch-depth: 0 - name: Run nf-test Action uses: ./.github/actions/nf-test-action + env: + SENTIEON_ENCRYPTION_KEY: ${{ secrets.SENTIEON_ENCRYPTION_KEY }} + SENTIEON_LICENSE_MESSAGE: ${{ secrets.SENTIEON_LICENSE_MESSAGE }} + SENTIEON_LICSRVR_IP: ${{ secrets.SENTIEON_LICSRVR_IP }} + SENTIEON_AUTH_MECH: "GitHub Actions - token" with: profile: ${{ matrix.profile }} shard: ${{ matrix.shard }} total_shards: ${{ env.TOTAL_SHARDS }} + paths: "${{ join(fromJson(needs.nf-test-changes.outputs.paths), ' ') }}" + + confirm-pass: + runs-on: ubuntu-latest + needs: [nf-test] + if: always() + steps: + - name: All tests ok + if: ${{ success() || !contains(needs.*.result, 'failure') }} + run: exit 0 + - name: One or more tests failed + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: debug-print + if: always() + run: | + echo "toJSON(needs) = ${{ toJSON(needs) }}" + echo "toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test index 4cd13dcd33b..d039f367c15 100644 --- a/modules/nf-core/gffread/tests/main.nf.test +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -23,6 +23,7 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) ] input[1] = [] + """ } } @@ -220,4 +221,4 @@ nextflow_process { } -} +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf b/subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf new file mode 100644 index 00000000000..c0cc67b83c7 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf @@ -0,0 +1,163 @@ +include { BWAMETH_ALIGN } from '../../../modules/nf-core/bwameth/align/main' +include { PARABRICKS_FQ2BAMMETH } from '../../../modules/nf-core/parabricks/fq2bammeth/main' +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_ALIGNMENTS } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_DEDUPLICATED } from '../../../modules/nf-core/samtools/index/main' +include { METHYLDACKEL_EXTRACT } from '../../../modules/nf-core/methyldackel/extract/main' +include { METHYLDACKEL_MBIAS } from '../../../modules/nf-core/methyldackel/mbias/main' + +workflow FASTQ_ALIGN_DEDUP_BWAMETH { + + take: + ch_reads // channel: [ val(meta), [ reads ] ] + ch_fasta // channel: [ val(meta), [ fasta ] ] + ch_fasta_index // channel: [ val(meta), [ fasta index ] ] + ch_bwameth_index // channel: [ val(meta), [ bwameth index ] ] + skip_deduplication // boolean: whether to deduplicate alignments + + main: + + ch_alignment = Channel.empty() + ch_alignment_index = Channel.empty() + ch_samtools_flagstat = Channel.empty() + ch_samtools_stats = Channel.empty() + ch_methydackel_extract_bedgraph = Channel.empty() + ch_methydackel_extract_methylkit = Channel.empty() + ch_methydackel_mbias = Channel.empty() + ch_picard_metrics = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_versions = Channel.empty() + + /* + * Align with bwameth + */ + if (params.use_gpu) { + /* + * Align with parabricks GPU enabled fq2bammeth implementation of bwameth + */ + PARABRICKS_FQ2BAMMETH ( + ch_reads, + ch_fasta, + ch_bwameth_index, + [] // known sites + ) + ch_alignment = PARABRICKS_FQ2BAMMETH.out.bam + ch_versions = ch_versions.mix(PARABRICKS_FQ2BAMMETH.out.versions) + } else { + /* + * Align with CPU version of bwameth + */ + BWAMETH_ALIGN ( + ch_reads, + ch_fasta, + ch_bwameth_index + ) + ch_alignment = BWAMETH_ALIGN.out.bam + ch_versions = BWAMETH_ALIGN.out.versions + } + + /* + * Sort raw output BAM + */ + SAMTOOLS_SORT ( + ch_alignment, + [[:],[]] // [ [meta], [fasta]] + ) + ch_alignment = SAMTOOLS_SORT.out.bam + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) + + /* + * Run samtools index on alignment + */ + SAMTOOLS_INDEX_ALIGNMENTS ( + ch_alignment + ) + ch_alignment_index = SAMTOOLS_INDEX_ALIGNMENTS.out.bai + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_ALIGNMENTS.out.versions) + + /* + * Run samtools flagstat + */ + SAMTOOLS_FLAGSTAT ( + ch_alignment.join(ch_alignment_index) + ) + ch_samtools_flagstat = SAMTOOLS_FLAGSTAT.out.flagstat + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + /* + * Run samtools stats + */ + SAMTOOLS_STATS ( + ch_alignment.join(ch_alignment_index), + [[:],[]] // [ [meta], [fasta]] + ) + ch_samtools_stats = SAMTOOLS_STATS.out.stats + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + + if (!skip_deduplication) { + /* + * Run Picard MarkDuplicates + */ + PICARD_MARKDUPLICATES ( + ch_alignment, + ch_fasta, + ch_fasta_index + ) + /* + * Run samtools index on deduplicated alignment + */ + SAMTOOLS_INDEX_DEDUPLICATED ( + PICARD_MARKDUPLICATES.out.bam + ) + ch_alignment = PICARD_MARKDUPLICATES.out.bam + ch_alignment_index = SAMTOOLS_INDEX_DEDUPLICATED.out.bai + ch_picard_metrics = PICARD_MARKDUPLICATES.out.metrics + ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_DEDUPLICATED.out.versions) + } + + /* + * Extract per-base methylation and plot methylation bias + */ + + METHYLDACKEL_EXTRACT ( + ch_alignment.join(ch_alignment_index), + ch_fasta.map{ meta, fasta_file -> fasta_file }, + ch_fasta_index.map{ meta, fasta_index -> fasta_index } + ) + ch_methydackel_extract_bedgraph = METHYLDACKEL_EXTRACT.out.bedgraph + ch_methydackel_extract_methylkit = METHYLDACKEL_EXTRACT.out.methylkit + ch_versions = ch_versions.mix(METHYLDACKEL_EXTRACT.out.versions) + + METHYLDACKEL_MBIAS ( + ch_alignment.join(ch_alignment_index), + ch_fasta.map{ meta, fasta_file -> fasta_file }, + ch_fasta_index.map{ meta, fasta_index -> fasta_index } + ) + ch_methydackel_mbias = METHYLDACKEL_MBIAS.out.txt + ch_versions = ch_versions.mix(METHYLDACKEL_MBIAS.out.versions) + + /* + * Collect MultiQC inputs + */ + ch_multiqc_files = ch_picard_metrics.collect{ meta, metrics -> metrics } + .mix(ch_samtools_flagstat.collect{ meta, flagstat -> flagstat }) + .mix(ch_samtools_stats.collect{ meta, stats -> stats }) + .mix(ch_methydackel_extract_bedgraph.collect{ meta, bedgraph -> bedgraph }) + .mix(ch_methydackel_mbias.collect{ meta, txt -> txt }) + + emit: + bam = ch_alignment // channel: [ val(meta), [ bam ] ] + bai = ch_alignment_index // channel: [ val(meta), [ bai ] ] + samtools_flagstat = ch_samtools_flagstat // channel: [ val(meta), [ flagstat ] ] + samtools_stats = ch_samtools_stats // channel: [ val(meta), [ stats ] ] + methydackel_extract_bedgraph = ch_methydackel_extract_bedgraph // channel: [ val(meta), [ bedgraph ] ] + methydackel_extract_methylkit = ch_methydackel_extract_methylkit // channel: [ val(meta), [ methylkit ] ] + methydackel_mbias = ch_methydackel_mbias // channel: [ val(meta), [ mbias ] ] + picard_metrics = ch_picard_metrics // channel: [ val(meta), [ metrics ] ] + multiqc = ch_multiqc_files // channel: [ *{html,txt} ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/meta.yml b/subworkflows/nf-core/fastq_align_dedup_bwameth/meta.yml new file mode 100644 index 00000000000..a66ea024e84 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/meta.yml @@ -0,0 +1,116 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_align_dedup_bwameth" +description: Performs alignment of BS-Seq reads using bwameth or parabricks/fq2bammeth, sort and deduplicate +keywords: + - bwameth + - alignment + - 3-letter genome + - map + - methylation + - 5mC + - methylseq + - bisulphite + - bisulfite + - fastq + - bam +components: + - bwameth/align + - parabricks/fq2bammeth + - samtools/sort + - samtools/index + - samtools/flagstat + - samtools/stats + - picard/markduplicates + - samtools/index + - methyldackel/extract + - methyldackel/mbias +input: + - ch_reads: + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + Structure: [ val(meta), [ path(reads) ] ] + pattern: "*.{fastq,fastq.gz}" + - ch_fasta: + type: file + description: | + Structure: [ val(meta), path(fasta) ] + pattern: "*.{fa,fa.gz}" + - ch_fasta_index: + type: file + description: | + Structure: [ val(meta), path(fasta index) ] + - ch_bwameth_index: + description: | + Bismark genome index files + Structure: [ val(meta), path(index) ] + pattern: "BismarkIndex" + - skip_deduplication: + type: boolean + description: | + Skip deduplication of aligned reads +output: + - bam: + type: file + description: | + Channel containing BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bai: + type: file + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" + - samtools_flagstat: + type: file + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + pattern: "*.flagstat" + - samtools_stats: + type: file + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + pattern: "*.{stats}" + - methydackel_extract_bedgraph: + type: file + description: | + bedGraph file, containing per-base methylation metrics + Structure: [ val(meta), path(bedgraph) ] + pattern: "*.bedGraph" + - methydackel_extract_methylkit: + type: file + description: | + methylKit file, containing per-base methylation metrics + Structure: [ val(meta), path(methylKit) ] + pattern: "*.methylKit" + - methydackel_mbias: + type: file + description: | + Text file containing methylation bias + Structure: [ val(meta), path(mbias) ] + pattern: "*.{txt}" + - picard_metrics: + type: file + description: | + Duplicate metrics file generated by picard + Structure: [ val(meta), path(metrics) ] + pattern: "*.{metrics.txt}" + - multiqc: + type: file + description: | + Channel containing MultiQC report aggregating results across samples. + Structure: [ val(meta), path(multiqc_report.html) ] + pattern: "*.html" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@sateeshperi" +maintainers: + - "@sateeshperi" diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test new file mode 100644 index 00000000000..b156de6bc47 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test @@ -0,0 +1,132 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_ALIGN_DEDUP_BWAMETH" + script "../main.nf" + workflow "FASTQ_ALIGN_DEDUP_BWAMETH" + config "./nextflow.config" + + tag "gpu" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_align_dedup_bwameth" + tag "bwameth/align" + tag "parabricks/fq2bammeth" + tag "samtools/sort" + tag "samtools/index" + tag "samtools/flagstat" + tag "samtools/stats" + tag "picard/markduplicates" + tag "samtools/index" + tag "methyldackel/extract" + tag "methyldackel/mbias" + tag "untar" + + setup { + run("UNTAR") { + script "../../../../modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/Bwameth_Index.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + test("Params: parabricks/fq2bammeth single-end | use_gpu") { + + when { + params { + skip_deduplication = false + use_gpu = true + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } + + test("Params: parabricks/fq2bammeth single-end | use_gpu | skip_deduplication") { + + when { + params { + skip_deduplication = true + use_gpu = true + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } + +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test.snap b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test.snap new file mode 100644 index 00000000000..a1602b3074a --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test.snap @@ -0,0 +1,149 @@ +{ + "Params: parabricks/fq2bammeth single-end | use_gpu": { + "content": [ + [ + "a7f7ca7b5eb503ab58790d64a0273ed6" + ], + [ + "test.markdup.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,897d500a710a56a7098172167fa71108" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,9aac964b859fda8239aa0eae16382d56" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.markdup.sorted_CpG.bedGraph:md5,f2fe02f180456f5f4922a2a8aa559fca" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,fce04d733e066d0b933cedc602e2af81" + ] + ], + [ + "test.markdup.sorted.MarkDuplicates.metrics.txt" + ], + [ + "test.flagstat", + "test.markdup.sorted.MarkDuplicates.metrics.txt", + "test.markdup.sorted_CpG.bedGraph", + "test.mbias.txt", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,45239309d0c40b5f0a56eba4347f09be", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15", + "versions.yml:md5,ddbe480ff81df55c6d95f911e7b6dc8a", + "versions.yml:md5,e9602257141b65a907ad9036e8a32a83" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T06:56:47.211389" + }, + "Params: parabricks/fq2bammeth single-end | use_gpu | skip_deduplication": { + "content": [ + [ + "a7f7ca7b5eb503ab58790d64a0273ed6" + ], + [ + "test.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,897d500a710a56a7098172167fa71108" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,9aac964b859fda8239aa0eae16382d56" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted_CpG.bedGraph:md5,b0cb426020f8beb45b4e8f09b9a17bfa" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,fce04d733e066d0b933cedc602e2af81" + ] + ], + [ + + ], + [ + "test.flagstat", + "test.mbias.txt", + "test.sorted_CpG.bedGraph", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15", + "versions.yml:md5,e9602257141b65a907ad9036e8a32a83" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T06:57:34.41912" + } +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test new file mode 100644 index 00000000000..6b20d545684 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test @@ -0,0 +1,179 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_ALIGN_DEDUP_BWAMETH" + script "../main.nf" + workflow "FASTQ_ALIGN_DEDUP_BWAMETH" + config "./nextflow.config" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_align_dedup_bwameth" + tag "bwameth/align" + tag "parabricks/fq2bammeth" + tag "samtools/sort" + tag "samtools/index" + tag "samtools/flagstat" + tag "samtools/stats" + tag "picard/markduplicates" + tag "samtools/index" + tag "methyldackel/extract" + tag "methyldackel/mbias" + tag "untar" + + setup { + run("UNTAR") { + script "../../../../modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/Bwameth_Index.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + test("Params: bwameth single-end | default") { + + when { + params { + skip_deduplication = false + use_gpu = false + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } + + test("Params: bwameth paired-end | default") { + + when { + params { + skip_deduplication = false + use_gpu = false + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/Ecoli_10K_methylated_R1.fastq.gz', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/Ecoli_10K_methylated_R2.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } + + test("Params: bwameth paired-end | skip_deduplication") { + + when { + params { + skip_deduplication = true + use_gpu = false + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/Ecoli_10K_methylated_R1.fastq.gz', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/Ecoli_10K_methylated_R2.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test.snap new file mode 100644 index 00000000000..90c9601506b --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test.snap @@ -0,0 +1,224 @@ +{ + "Params: bwameth single-end | default": { + "content": [ + [ + "37ec1c6338cc3fee7ab1cb2d48dba38" + ], + [ + "test.markdup.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,897d500a710a56a7098172167fa71108" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,9aac964b859fda8239aa0eae16382d56" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.markdup.sorted_CpG.bedGraph:md5,f2fe02f180456f5f4922a2a8aa559fca" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,fce04d733e066d0b933cedc602e2af81" + ] + ], + [ + "test.markdup.sorted.MarkDuplicates.metrics.txt" + ], + [ + "test.flagstat", + "test.markdup.sorted.MarkDuplicates.metrics.txt", + "test.markdup.sorted_CpG.bedGraph", + "test.mbias.txt", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,45239309d0c40b5f0a56eba4347f09be", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,8edf3166176c863b88ba488f8b715aa3", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15", + "versions.yml:md5,ddbe480ff81df55c6d95f911e7b6dc8a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T05:42:12.81856451" + }, + "Params: bwameth paired-end | skip_deduplication": { + "content": [ + [ + "cf25656fffc044f2bb7d9f1b3686ecb4" + ], + [ + "test.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4ff87d121ca174953734723938c99081" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,c753c72eb4e1c32f74afb1fbd932fe1f" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted_CpG.bedGraph:md5,285e492823182f5705bf0817e2d088b8" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,c1fda203c1b19aca2498efe0fd4cc9e3" + ] + ], + [ + + ], + [ + "test.flagstat", + "test.mbias.txt", + "test.sorted_CpG.bedGraph", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,8edf3166176c863b88ba488f8b715aa3", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T05:10:43.907134648" + }, + "Params: bwameth paired-end | default": { + "content": [ + [ + "cf25656fffc044f2bb7d9f1b3686ecb4" + ], + [ + "test.markdup.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4ff87d121ca174953734723938c99081" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,c753c72eb4e1c32f74afb1fbd932fe1f" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.markdup.sorted_CpG.bedGraph:md5,c6c73e5abba70ac799500f592fec5c29" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,c1fda203c1b19aca2498efe0fd4cc9e3" + ] + ], + [ + "test.markdup.sorted.MarkDuplicates.metrics.txt" + ], + [ + "test.flagstat", + "test.markdup.sorted.MarkDuplicates.metrics.txt", + "test.markdup.sorted_CpG.bedGraph", + "test.mbias.txt", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,45239309d0c40b5f0a56eba4347f09be", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,8edf3166176c863b88ba488f8b715aa3", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15", + "versions.yml:md5,ddbe480ff81df55c6d95f911e7b6dc8a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T05:42:39.183331191" + } +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/nextflow.config b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/nextflow.config new file mode 100644 index 00000000000..55385ec0621 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/nextflow.config @@ -0,0 +1,14 @@ +process { + withName: 'PARABRICKS_FQ2BAMMETH' { + ext.args = '--low-memory' + } + + withName: 'SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.sorted" } + } + + withName: 'PICARD_MARKDUPLICATES' { + ext.args = "--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --PROGRAM_RECORD_ID 'null' --TMP_DIR tmp" + ext.prefix = { "${meta.id}.markdup.sorted" } + } +}