diff --git a/CHANGELOG.md b/CHANGELOG.md index e4d169db..750a7028 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `--save_align_intermeds` parameter that publishes BAM files to the output directory (for `starsolo`, `cellranger` and `cellranger multi`) ([#384](https://github.com/nf-core/scrnaseq/issues/384)) - Added support for pre-built indexes in `genomes.config` file for `cellranger`, `cellranger-arc`, `simpleaf` and `simpleaf txp2gene` ([#371](https://github.com/nf-core/scrnaseq/issues/371)) +- Cleanup and fix bugs in matrix conversion code, and change to use anndataR for conversions, and cellbender for emptydrops call. ([#369](https://github.com/nf-core/scrnaseq/pull/369)) +- Fix problem with `test_full` that was not running out of the box, since code was trying to overwrite parameters in the workflow, which is not possible ([#366](https://github.com/nf-core/scrnaseq/issues/366)) ## v2.7.1 - 2024-08-13 diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py deleted file mode 100755 index 43ea071a..00000000 --- a/bin/concat_h5ad.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python - -# Set numba chache dir to current working directory (which is a writable mount also in containers) -import os - -os.environ["NUMBA_CACHE_DIR"] = "." - -import scanpy as sc, anndata as ad, pandas as pd -from pathlib import Path -import argparse - - -def read_samplesheet(samplesheet): - df = pd.read_csv(samplesheet) - df.set_index("sample") - - # samplesheet may contain replicates, when it has, - # group information from replicates and collapse with commas - # only keep unique values using set() - df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column))) - - return df - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet") - - parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv") - parser.add_argument("-o", "--out", dest="out", help="Output path.") - parser.add_argument( - "-s", - "--suffix", - dest="suffix", - help="Suffix of matrices to remove and get sample name", - ) - - args = vars(parser.parse_args()) - - # Open samplesheet as dataframe - df_samplesheet = read_samplesheet(args["input"]) - - # find all h5ad and append to dict - dict_of_h5ad = {str(path).replace(args["suffix"], ""): sc.read_h5ad(path) for path in Path(".").rglob("*.h5ad")} - - # concat h5ad files - adata = ad.concat(dict_of_h5ad, label="sample", merge="unique", index_unique="_") - - # merge with data.frame, on sample information - adata.obs = adata.obs.join(df_samplesheet, on="sample") - adata.write_h5ad(args["out"], compression="gzip") - - print("Wrote h5ad file to {}".format(args["out"])) diff --git a/bin/emptydrops_cell_calling.R b/bin/emptydrops_cell_calling.R deleted file mode 100755 index 23a45267..00000000 --- a/bin/emptydrops_cell_calling.R +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env Rscript -library("DropletUtils") -library("Matrix") - -args <- commandArgs(trailingOnly=TRUE) - -fn_mtx <- args[1] -fn_barcodes <- args[2] -fn_genes <- args[3] -outdir <- args[4] -aligner <- args[5] - -# Read matrix/barcodes/genes -genes <- read.table(fn_genes,sep='\t') -barcodes <- read.table(fn_barcodes,sep='\t') -mtx <- readMM(fn_mtx) - -get_name <- function(file) { - name <- as.character(basename(file)) - name <- gsub('\\.gz$', '', name) - return(name) -} - -# transpose matrices when required -# based on code of 'mtx_to_seurat.R', only the data from kallisto and alevin would require transposition -print("Only kallisto and alevin have transposed matrices.") -if (aligner %in% c( "kallisto", "alevin" )) { - is_transposed <- TRUE - mtx<-t(mtx) -} else { - is_transposed <- FALSE -} - - -# Call empty drops -e.out <- emptyDrops(mtx) -is.cell <- e.out$FDR <= 0.01 - -# Slice matrix and barcodes -mtx_filtered <-mtx[,which(is.cell),drop=FALSE] -barcodes_filtered<-barcodes[which(is.cell),] - -# If matrix was transposed early, need to transpose back -if (is_transposed){ - mtx_filtered<-t(mtx_filtered) - print('Transposing back matrix.') -} - -# Write output -writeMM(mtx_filtered,file.path(outdir,get_name(fn_mtx))) -write.table(barcodes_filtered,file=file.path(outdir,get_name(fn_barcodes)),col.names=FALSE,row.names=FALSE,sep='\t',quote=FALSE) -write.table(genes,file=file.path(outdir,get_name(fn_genes)),col.names=FALSE,row.names=FALSE,sep='\t',quote=FALSE) diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py deleted file mode 100755 index 2190245d..00000000 --- a/bin/mtx_to_h5ad.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python - -# Set numba chache dir to current working directory (which is a writable mount also in containers) -import os - -os.environ["NUMBA_CACHE_DIR"] = "." - -import scanpy as sc -import pandas as pd -import argparse -from scipy import io -from anndata import AnnData - - -def _10x_h5_to_adata(mtx_h5: str, sample: str): - adata = sc.read_10x_h5(mtx_h5) - adata.var["gene_symbols"] = adata.var_names - adata.var.set_index("gene_ids", inplace=True) - adata.obs["sample"] = sample - - # reorder columns for 10x mtx files - adata.var = adata.var[["gene_symbols", "feature_types", "genome"]] - - return adata - - -def _mtx_to_adata( - mtx_file: str, - barcode_file: str, - feature_file: str, - sample: str, - aligner: str, -): - adata = sc.read_mtx(mtx_file) - # for some reason star matrix comes transposed and doesn't fit when values are appended directly - # also true for cellranger files ( this is only used when running with the custom emptydrops_filtered files ) - # otherwise, it uses the cellranger .h5 files - if aligner in [ - "cellranger", - "cellrangermulti", - "star", - ]: - adata = adata.transpose() - - adata.obs_names = pd.read_csv(barcode_file, header=None, sep="\t")[0].values - adata.var_names = pd.read_csv(feature_file, header=None, sep="\t")[0].values - adata.obs["sample"] = sample - - return adata - - -def input_to_adata( - input_data: str, - barcode_file: str, - feature_file: str, - sample: str, - aligner: str, - txp2gene: str, - star_index: str, - verbose: bool = True, -): - if verbose and (txp2gene or star_index): - print("Reading in {}".format(input_data)) - - # - # open main data - # - if aligner == "cellranger" and input_data.lower().endswith('.h5'): - adata = _10x_h5_to_adata(input_data, sample) - else: - adata = _mtx_to_adata(input_data, barcode_file, feature_file, sample, aligner) - - # - # open gene information - # - if verbose and (txp2gene or star_index): - print("Reading in {}".format(txp2gene)) - - if aligner == "cellranger" and not input_data.lower().endswith('.h5'): - # - # for cellranger workflow, we do not have a txp2gene file, so, when using this normal/manual function for empty drops - # we need to provide this information coming directly from the features.tsv file - # by not using the .h5 file for conversion, we loose the two col information: feature_types and genome - # - t2g = pd.read_table(feature_file, header=None, names=["gene_id", "gene_symbol", "feature_types"], usecols=[0, 1, 2]) - else: - if txp2gene: - t2g = pd.read_table(txp2gene, header=None, names=["gene_id", "gene_symbol"], usecols=[1, 2]) - elif star_index: - t2g = pd.read_table( - f"{star_index}/geneInfo.tab", header=None, skiprows=1, names=["gene_id", "gene_symbol"], usecols=[0, 1] - ) - - if txp2gene or star_index or (aligner == "cellranger" and not input_data.lower().endswith('.h5')): - t2g = t2g.drop_duplicates(subset="gene_id").set_index("gene_id") - adata.var["gene_symbol"] = t2g["gene_symbol"] - - return adata - - -def write_counts( - adata: AnnData, - out: str, - verbose: bool = False, -): - pd.DataFrame(adata.obs.index).to_csv(os.path.join(out, "barcodes.tsv"), sep="\t", index=False, header=None) - pd.DataFrame(adata.var).to_csv(os.path.join(out, "features.tsv"), sep="\t", index=True, header=None) - io.mmwrite(os.path.join(out, "matrix.mtx"), adata.X.T, field="integer") - - if verbose: - print("Wrote features.tsv, barcodes.tsv, and matrix.mtx files to {}".format(args["out"])) - - -def dump_versions(task_process): - import pkg_resources - - with open("versions.yml", "w") as f: - f.write(f"{task_process}:\n\t") - f.write("\n\t".join([f"{pkg.key}: {pkg.version}" for pkg in pkg_resources.working_set])) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.") - - parser.add_argument("-i", "--input_data", dest="input_data", help="Path to either mtx or mtx h5 file.") - parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False) - parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.", nargs="?", const="") - parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.", nargs="?", const="") - parser.add_argument("-s", "--sample", dest="sample", help="Sample name") - parser.add_argument("-o", "--out", dest="out", help="Output path.") - parser.add_argument("-a", "--aligner", dest="aligner", help="Which aligner has been used?") - parser.add_argument("--task_process", dest="task_process", help="Task process name.") - parser.add_argument("--txp2gene", dest="txp2gene", help="Transcript to gene (t2g) file.", nargs="?", const="") - parser.add_argument( - "--star_index", dest="star_index", help="Star index folder containing geneInfo.tab.", nargs="?", const="" - ) - - args = vars(parser.parse_args()) - - # create the directory with the sample name - os.makedirs(os.path.dirname(args["out"]), exist_ok=True) - - adata = input_to_adata( - input_data=args["input_data"], - barcode_file=args["barcode"], - feature_file=args["feature"], - sample=args["sample"], - aligner=args["aligner"], - txp2gene=args["txp2gene"], - star_index=args["star_index"], - verbose=args["verbose"], - ) - - write_counts(adata=adata, out=args["sample"], verbose=args["verbose"]) - - adata.write_h5ad(args["out"], compression="gzip") - - print("Wrote h5ad file to {}".format(args["out"])) - - dump_versions(task_process=args["task_process"]) diff --git a/bin/mtx_to_seurat.R b/bin/mtx_to_seurat.R deleted file mode 100755 index 7cacccf7..00000000 --- a/bin/mtx_to_seurat.R +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env Rscript -library(Seurat) - -args <- commandArgs(trailingOnly=TRUE) - -mtx_file <- args[1] -barcode_file <- args[2] -feature_file <- args[3] -out.file <- args[4] -aligner <- args[5] -is_emptydrops <- args[6] - -if (is_emptydrops == "--is_emptydrops") { - is_emptydrops <- TRUE -} else{ - is_emptydrops <- FALSE -} - -if (aligner %in% c( "kallisto", "alevin" )) { - print("1") - # for kallisto and alevin, the features file contains only one column and matrix needs to be transposed - expression.matrix <- ReadMtx( - mtx = mtx_file, features = feature_file, cells = barcode_file, feature.column = 1, mtx.transpose = TRUE - ) -} else { - if (aligner %in% c( "cellranger", "cellrangermulti", "star" ) && is_emptydrops) { - print("2") - expression.matrix <- ReadMtx( - mtx = mtx_file, features = feature_file, cells = barcode_file, feature.column = 1 - ) - } else{ - print("3") - expression.matrix <- ReadMtx( - mtx = mtx_file, features = feature_file, cells = barcode_file - ) - } -} - - -seurat.object <- CreateSeuratObject(counts = expression.matrix) - -dir.create(basename(dirname(out.file)), showWarnings = FALSE) - -saveRDS(seurat.object, file = out.file) - - -yaml::write_yaml( -list( - 'MTX_TO_SEURAT'=list( - 'Seurat' = paste(packageVersion('Seurat'), collapse='.') - ) -), -"versions.yml" -) diff --git a/conf/modules.config b/conf/modules.config index d00278a6..7c09d8df 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -31,25 +31,35 @@ process { } if (!params.skip_emptydrops) { - withName: EMPTYDROPS_CELL_CALLING { + withName: 'CELLBENDER_REMOVEBACKGROUND' { publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, + path: { "${params.outdir}/${params.aligner}/${meta.id}/emptydrops_filter" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'ADATA_BARCODES' { + ext.prefix = { "${meta.id}_${meta.input_type}_matrix" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mtx_conversions/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> - if ( params.aligner == 'cellranger' ) "count/${meta.id}/${filename}" - else if ( params.aligner == 'kallisto' ) "${meta.id}.count/${filename}" - else "${meta.id}/${filename}" - } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } } - withName: 'MTX_TO_H5AD|CONCAT_H5AD|MTX_TO_SEURAT' { + withName: 'MTX_TO_H5AD|CONCAT_H5AD|ANNDATAR_CONVERT' { publishDir = [ path: { "${params.outdir}/${params.aligner}/mtx_conversions" }, - mode: params.publish_dir_mode + mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.equals('versions.yml')) { null } + else if (!filename.contains('combined_')) { "${meta.id}/${filename}" } + else filename + } ] } + withName: 'GTF_GENE_FILTER' { publishDir = [ path: { "${params.outdir}/gtf_filter" }, @@ -73,13 +83,15 @@ if(params.aligner == "cellranger") { withName: CELLRANGER_MKREF { publishDir = [ path: "${params.outdir}/${params.aligner}/mkref", - mode: params.publish_dir_mode + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: CELLRANGER_COUNT { publishDir = [ path: "${params.outdir}/${params.aligner}/count", - mode: params.publish_dir_mode + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = {"--chemistry ${meta.chemistry} --create-bam ${params.save_align_intermeds}" + " " + (meta.expected_cells ? "--expect-cells ${meta.expected_cells}" : '')} time = { 240.h * task.attempt } @@ -160,8 +172,9 @@ if (params.aligner == "alevin") { } withName: 'SIMPLEAF_QUANT' { publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode + path: { "${params.outdir}/${params.aligner}/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = "-r cr-like" } @@ -182,7 +195,8 @@ if (params.aligner == "star") { publishDir = [ path: { "${params.outdir}/${params.aligner}/genome_generate" }, mode: params.publish_dir_mode, - enabled: params.save_reference + enabled: params.save_reference, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -213,14 +227,16 @@ if (params.aligner == 'kallisto') { publishDir = [ path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode, - enabled: params.save_reference + enabled: params.save_reference, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: KALLISTOBUSTOOLS_COUNT { def kb_filter = (params.kb_filter) ? '--filter' : '' publishDir = [ path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = "--workflow ${params.kb_workflow} ${kb_filter}" } @@ -259,7 +275,8 @@ if (params.aligner == 'cellrangermulti') { withName: CELLRANGER_MKVDJREF { publishDir = [ path: "${params.outdir}/${params.aligner}/mkvdjref", - mode: params.publish_dir_mode + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } } diff --git a/conf/test_cellranger_multi.config b/conf/test_cellranger_multi.config index f10550ae..0a229f9c 100644 --- a/conf/test_cellranger_multi.config +++ b/conf/test_cellranger_multi.config @@ -10,6 +10,14 @@ ---------------------------------------------------------------------------------------- */ +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + // shared across profiles params { config_profile_name = 'Test profile (Cellranger Multi)' diff --git a/docs/output.md b/docs/output.md index a5292336..8be54c40 100644 --- a/docs/output.md +++ b/docs/output.md @@ -19,7 +19,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Cellranger ARC](#cellranger-arc) - [Cellranger multi](#cellranger-multi) - [UniverSC](#universc) - - [Custom emptydrops filter](#custom-emptydrops-filter) + - [Cellbender emptydrops filter](#cellbender-emptydrops-filter) - [Other output data](#other-output-data) - [MultiQC](#multiqc) - [Pipeline information](#pipeline-information) @@ -141,15 +141,15 @@ Battenberg, K., Kelly, S.T., Ras, R.A., Hetherington, N.A., Hayashi, K., and Min - Contains the mapped BAM files, filtered and unfiltered HDF5 matrices and output metrics created by the open-source implementation of Cell Ranger run via UniverSC -## Custom emptydrops filter +## Cellbender emptydrops filter -The pipeline also possess a module to perform empty-drops calling and filtering with a custom-made script that uses a library called `bioconductor-dropletutils` that is available in `bioconda`. The process is simple, it takes a raw/unfiltered matrix file, and performs the empty-drops calling and filtering on it, generating another matrix file. +The pipeline also possess a subworkflow imported from scdownstream to perform emptydrops calling and filtering using [cellbender](https://github.com/broadinstitute/CellBender). The process is simple, it takes a raw/unfiltered matrix file, and performs the emptydrops calling and filtering on it, generating another matrix file. > Users can turn it of with `--skip_emptydrops`. -**Output directory: `results/${params.aligner}/emptydrops_filtered`** +**Output directory: `results/${params.aligner}/${meta.id}/emptydrops_filter`** -- Contains the empty-drops filtered matrices results generated by the `bioconductor-dropletutils` custom script +- Contains the emptydrops filtered matrices results generated by the cellbender subworkflow. ## Other output data @@ -170,15 +170,15 @@ The pipeline also possess a module to perform empty-drops calling and filtering - `.mtx` files converted to R native data format, rds, using the [Seurat package](https://github.com/satijalab/seurat) - One per sample -Because the pipeline has both the data directly from the aligners, and from the custom empty-drops filtering module the conversion modules were modified to understand the difference between raw/filtered from the aligners itself and filtered from the custom empty-drops module. So, to try to avoid confusion by the user, we added "suffixes" to the generated converted files so that we have provenance from what input it came from. +Because the pipeline has both the data directly from the aligners, and from the cellbender empty-drops filtering module, the conversion modules were modified to understand the difference between raw/filtered from the aligners itself and filtered from the empty-drops module. So, to try to avoid confusion by the user, we added "suffixes" to the generated converted files so that we have provenance from what input it came from. -So, the conversion modules generate data with the following syntax: **`*_{raw,filtered,custom_emptydrops_filter}_matrix.{h5ad,rds}`**. With the following meanings: +So, the conversion modules generate data with the following syntax: **`*_{raw,filtered,emptydrops_filter}_matrix.{h5ad,rds}`**. With the following meanings: -| suffix | meaning | -| :----------------------- | :--------------------------------------------------------------------------------------------------------------------------------------- | -| raw | Conversion of the raw/unprocessed matrix generated by the tool. It is also used for tools that generate only one matrix, such as alevin. | -| filtered | Conversion of the filtered/processed matrix generated by the tool | -| custom_emptydrops_filter | Conversion of the matrix that was generated by the new custom empty drops filter module | +| suffix | meaning | +| :---------------- | :--------------------------------------------------------------------------------------------------------------------------------------- | +| raw | Conversion of the raw/unprocessed matrix generated by the tool. It is also used for tools that generate only one matrix, such as alevin. | +| filtered | Conversion of the filtered/processed matrix generated by the tool | +| emptydrops_filter | Conversion of the matrix that was generated by the cellbender empty drops filter module | > Some aligners, like `alevin` do not produce both raw&filtered matrices. When aligners give only one output, they are treated with the `raw` suffix. Some aligners may have an option to give both raw&filtered and only one, like `kallisto`. Be aware when using the tools. diff --git a/modules.json b/modules.json index 3caa39e7..ad713c9f 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "cellbender/removebackground": { + "branch": "master", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": ["modules"] + }, "cellranger/count": { "branch": "master", "git_sha": "90dad5491658049282ceb287a3d7732c1ce39837", diff --git a/modules/local/adata_barcodes.nf b/modules/local/adata_barcodes.nf new file mode 100644 index 00000000..630d90ae --- /dev/null +++ b/modules/local/adata_barcodes.nf @@ -0,0 +1,29 @@ +process ADATA_BARCODES { + + // + // Module from nf-core/scdownstream. + // This module performs the subset of the h5ad file to only contain barcodes that passed emptydrops filter with cellbender + // + + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/anndata:0.10.7--e9840a94592528c8': + 'community.wave.seqera.io/library/anndata:0.10.7--336c6c1921a0632b' }" + + input: + tuple val(meta), path(h5ad), path(barcodes_csv) + + output: + tuple val(meta), path("*.h5ad"), emit: h5ad + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + template 'barcodes.py' +} diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf index 9000d79e..777a1371 100644 --- a/modules/local/alevinqc.nf +++ b/modules/local/alevinqc.nf @@ -1,4 +1,9 @@ process ALEVINQC { + + // + // This module executes alevinfry QC reporting tool on alevin results + // + tag "$meta.id" label 'process_low' diff --git a/modules/local/anndatar_convert.nf b/modules/local/anndatar_convert.nf new file mode 100644 index 00000000..f17e0483 --- /dev/null +++ b/modules/local/anndatar_convert.nf @@ -0,0 +1,31 @@ +process ANNDATAR_CONVERT { + + // + // This module uses the anndata R package to convert h5ad files in different formats + // + + tag "${meta.id}" + + label 'process_medium' + + container "docker.io/nfcore/anndatar:20241129" + + input: + tuple val(meta), path(h5ad) + + output: + tuple val(meta), path("${meta.id}_${meta.input_type}_matrix*.rds"), emit: rds + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template 'anndatar_convert.R' + + stub: + """ + touch ${meta.id}_${meta.input_type}_matrix.Rds + touch versions.yml + """ +} diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf index cd08cbbe..17c8d4e1 100644 --- a/modules/local/concat_h5ad.nf +++ b/modules/local/concat_h5ad.nf @@ -1,31 +1,34 @@ process CONCAT_H5AD { + + // + // This module concatenates all h5ad, per type (raw, filtered, etc.) files generated during pipeline execution + // + + + tag "${meta.id}" + label 'process_medium' - conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : - 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg" + container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538" input: - tuple val(input_type), path(h5ad) + tuple val(meta), path(h5ad) path samplesheet output: - path "*.h5ad", emit: h5ad + tuple val(meta), path("*.h5ad"), emit: h5ad + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - """ - concat_h5ad.py \\ - --input $samplesheet \\ - --out combined_${input_type}_matrix.h5ad \\ - --suffix "_matrix.h5ad" - """ + template 'concat_h5ad.py' stub: """ touch combined_matrix.h5ad + touch versions.yml """ } diff --git a/modules/local/emptydrops.nf b/modules/local/emptydrops.nf deleted file mode 100644 index 9457fc09..00000000 --- a/modules/local/emptydrops.nf +++ /dev/null @@ -1,101 +0,0 @@ -process EMPTYDROPS_CELL_CALLING { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::bioconductor-dropletutils" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bioconductor-dropletutils:1.18.0--r42hf17093f_1' : - 'quay.io/biocontainers/bioconductor-dropletutils:1.18.0--r42hf17093f_1' }" - - input: - // inputs from cellranger nf-core module does not come in a single sample dir - // for each sample, the sub-folders and files come directly in array. - tuple val(meta), path(inputs) - - output: - tuple val(meta), path("emptydrops_filtered"), emit: filtered_matrices - - when: - task.ext.when == null || task.ext.when - - script: - if (params.aligner in ["cellranger", "cellrangermulti"]) { - - matrix = "matrix.mtx.gz" - barcodes = "barcodes.tsv.gz" - features = "features.tsv.gz" - - } else if (params.aligner == "kallisto") { - - matrix = "counts_unfiltered/*.mtx" - barcodes = "counts_unfiltered/*.barcodes.txt" - features = "counts_unfiltered/*.genes.names.txt" - - // kallisto allows the following workflows: ["standard", "lamanno", "nac"] - // lamanno creates "spliced" and "unspliced" - // nac creates "nascent", "ambiguous" "mature" - // also, lamanno produces a barcodes and genes file for both spliced and unspliced - // while nac keep only one for all the different .mtx files produced - kb_non_standard_files = "" - if (params.kb_workflow == "lamanno") { - kb_non_standard_files = "spliced unspliced" - matrix = "counts_unfiltered/\${input_type}.mtx" - barcodes = "counts_unfiltered/\${input_type}.barcodes.txt" - features = "counts_unfiltered/\${input_type}.genes.txt" - } - if (params.kb_workflow == "nac") { - kb_non_standard_files = "nascent ambiguous mature" - matrix = "counts_unfiltered/*\${input_type}.mtx" - features = "counts_unfiltered/*.genes.txt" - } // barcodes tsv has same pattern as standard workflow - - } else if (params.aligner == "alevin") { - - matrix = "*_alevin_results/af_quant/alevin/quants_mat.mtx" - barcodes = "*_alevin_results/af_quant/alevin/quants_mat_rows.txt" - features = "*_alevin_results/af_quant/alevin/quants_mat_cols.txt" - - } else if (params.aligner == 'star') { - - matrix = "raw/matrix.mtx.gz" - barcodes = "raw/barcodes.tsv.gz" - features = "raw/features.tsv.gz" - - } - - // - // run script - // - if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') - """ - # convert file types - for input_type in ${kb_non_standard_files} ; do - mkdir -p emptydrops_filtered/\${input_type} - emptydrops_cell_calling.R \\ - ${matrix} \\ - ${barcodes} \\ - ${features} \\ - emptydrops_filtered/\${input_type} \\ - ${params.aligner} \\ - 0 - done - """ - - else - """ - mkdir emptydrops_filtered/ - emptydrops_cell_calling.R \\ - $matrix \\ - $barcodes \\ - $features \\ - emptydrops_filtered \\ - ${params.aligner} \\ - 0 - """ - - stub: - """ - mkdir emptydrops_filtered - touch emptydrops_filtered/empty_file - """ -} diff --git a/modules/local/gffread_transcriptome.nf b/modules/local/gffread_transcriptome.nf index ab573b07..671b6726 100644 --- a/modules/local/gffread_transcriptome.nf +++ b/modules/local/gffread_transcriptome.nf @@ -1,4 +1,9 @@ process GFFREAD_TRANSCRIPTOME { + + // + // This module uses gffread to filter input to generate a transcripts fasta + // + tag "${genome_fasta}" label 'process_low' diff --git a/modules/local/gtf_gene_filter.nf b/modules/local/gtf_gene_filter.nf index 063bd228..10af352b 100644 --- a/modules/local/gtf_gene_filter.nf +++ b/modules/local/gtf_gene_filter.nf @@ -1,4 +1,9 @@ process GTF_GENE_FILTER { + + // + // This module executes a custom script to filter input gtf to contain only annotations present in input genome + // + tag "$fasta" label 'process_low' diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 61e06e91..424580f6 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -1,11 +1,14 @@ process MTX_TO_H5AD { + + // + // This module executes different conversion template scripts (per aligner) for converting output mtx files into h5ad files + // + tag "$meta.id" label 'process_medium' - conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : - 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg" + container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538" input: // inputs from cellranger nf-core module does not come in a single sample dir @@ -13,127 +16,23 @@ process MTX_TO_H5AD { tuple val(meta), path(inputs) path txp2gene path star_index + val input_aligner output: - tuple val(input_type), path("${meta.id}/*h5ad") , emit: h5ad - path "versions.yml" , emit: versions + tuple val(meta), path("${meta.id}_${meta.input_type}_matrix.h5ad"), emit: h5ad + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - // Get a file to check input type. Some aligners bring arrays instead of a single file. - def input_to_check = (inputs instanceof String) ? inputs : inputs[0] - - // check input type of inputs - input_type = (input_to_check.toUriString().contains('unfiltered') || input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered' - if ( params.aligner == 'alevin' ) { input_type = 'raw' } // alevin has its own filtering methods and mostly output a single mtx, 'raw' here means, the base tool output - if (input_to_check.toUriString().contains('emptydrops')) { input_type = 'custom_emptydrops_filter' } - - // def file paths for aligners. Cellranger is normally converted with the .h5 files - // However, the emptydrops call, always generate .mtx files, thus, cellranger 'emptydrops' required a parsing - if (params.aligner in [ 'cellranger', 'cellrangerarc', 'cellrangermulti' ] && input_type == 'custom_emptydrops_filter') { - - aligner = 'cellranger' - txp2gene = '' - star_index = '' - mtx_matrix = "emptydrops_filtered/matrix.mtx" - barcodes_tsv = "emptydrops_filtered/barcodes.tsv" - features_tsv = "emptydrops_filtered/features.tsv" - - } else if (params.aligner == 'kallisto') { - - kb_pattern = (input_type == 'raw') ? 'un' : '' - mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : "counts_${kb_pattern}filtered" - if ((input_type == 'custom_emptydrops_filter') && (params.kb_workflow != 'standard')) { mtx_dir = 'emptydrops_filtered/\${input_type}' } // dir has subdirs for non-standard workflows - mtx_matrix = "${mtx_dir}/*.mtx" - barcodes_tsv = "${mtx_dir}/*.barcodes.txt" - features_tsv = "${mtx_dir}/*.genes.names.txt" - - // kallisto allows the following workflows: ["standard", "lamanno", "nac"] - // lamanno creates "spliced" and "unspliced" - // nac creates "nascent", "ambiguous" "mature" - // also, lamanno produces a barcodes and genes file for both spliced and unspliced - // while nac keep only one for all the different .mtx files produced - kb_non_standard_files = "" - if (params.kb_workflow == "lamanno") { - kb_non_standard_files = "spliced unspliced" - matrix = "${mtx_dir}/\${input_type}.mtx" - barcodes_tsv = "${mtx_dir}/\${input_type}.barcodes.txt" - features_tsv = "${mtx_dir}/\${input_type}.genes.txt" - } - if (params.kb_workflow == "nac") { - kb_non_standard_files = "nascent ambiguous mature" - matrix = "${mtx_dir}/*\${input_type}.mtx" - features_tsv = "${mtx_dir}/*.genes.txt" - } // barcodes tsv has same pattern as standard workflow - - } else if (params.aligner == 'alevin') { - - // alevin does not have filtered/unfiltered results - mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : '*_alevin_results/af_quant/alevin' - mtx_matrix = "${mtx_dir}/quants_mat.mtx" - barcodes_tsv = "${mtx_dir}/quants_mat_rows.txt" - features_tsv = "${mtx_dir}/quants_mat_cols.txt" - - } else if (params.aligner == 'star') { + def aligner = (input_aligner in [ 'cellranger', 'cellrangerarc', 'cellrangermulti' ]) ? 'cellranger' : input_aligner - mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : "${input_type}" - suffix = (input_type == 'custom_emptydrops_filter') ? '' : '.gz' - mtx_matrix = "${mtx_dir}/matrix.mtx${suffix}" - barcodes_tsv = "${mtx_dir}/barcodes.tsv${suffix}" - features_tsv = "${mtx_dir}/features.tsv${suffix}" - - } - - // - // run script - // - if (params.aligner in [ "cellranger", "cellrangerarc", "cellrangermulti"] && input_type != 'custom_emptydrops_filter') - """ - # convert file types - mtx_to_h5ad.py \\ - --aligner cellranger \\ - --input *${input_type}_feature_bc_matrix.h5 \\ - --sample ${meta.id} \\ - --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad - """ - - else if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') - """ - # convert file types - for input_type in ${kb_non_standard_files} ; do - mtx_to_h5ad.py \\ - --aligner ${params.aligner} \\ - --sample ${meta.id} \\ - --input ${matrix} \\ - --barcode ${barcodes_tsv} \\ - --feature ${features_tsv} \\ - --txp2gene ${txp2gene} \\ - --star_index ${star_index} \\ - --out ${meta.id}/${meta.id}_\${input_type}_matrix.h5ad ; - done - """ - - else - """ - # convert file types - mtx_to_h5ad.py \\ - --task_process ${task.process} \\ - --aligner ${params.aligner} \\ - --sample ${meta.id} \\ - --input $mtx_matrix \\ - --barcode $barcodes_tsv \\ - --feature $features_tsv \\ - --txp2gene ${txp2gene} \\ - --star_index ${star_index} \\ - --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad - """ + template "mtx_to_h5ad_${aligner}.py" stub: """ - mkdir ${meta.id} - touch ${meta.id}/${meta.id}_matrix.h5ad + touch ${meta.id}_${meta.input_type}_matrix.h5ad touch versions.yml """ } diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf deleted file mode 100644 index 21dea175..00000000 --- a/modules/local/mtx_to_seurat.nf +++ /dev/null @@ -1,127 +0,0 @@ -process MTX_TO_SEURAT { - tag "$meta.id" - label 'process_medium' - - conda "r-seurat" - container "nf-core/seurat:4.3.0" - - input: - // inputs from cellranger nf-core module does not come in a single sample dir - // for each sample, the sub-folders and files come directly in array. - tuple val(meta), path(inputs) - - output: - path "${meta.id}/*.rds", emit: seuratObjects - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def aligner = params.aligner - - - // Get a file to check input type. Some aligners bring arrays instead of a single file. - def input_to_check = (inputs instanceof String) ? inputs : inputs[0] - - // check input type of inputs - def is_emptydrops = '0' - input_type = (input_to_check.toUriString().contains('unfiltered') || input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered' - if ( params.aligner == 'alevin' ) { input_type = 'raw' } // alevin has its own filtering methods and mostly output a single mtx, raw here means, the base tool output - if (input_to_check.toUriString().contains('emptydrops')) { - input_type = 'custom_emptydrops_filter' - is_emptydrops = '--is_emptydrops' - } - - // def file paths for aligners. Cellranger is normally converted with the .h5 files - // However, the emptydrops call, always generate .mtx files, thus, cellranger 'emptydrops' required a parsing - if (params.aligner in [ "cellranger", "cellrangerarc", "cellrangermulti" ]) { - - mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered/' : '' - matrix = "${mtx_dir}matrix.mtx*" - barcodes = "${mtx_dir}barcodes.tsv*" - features = "${mtx_dir}features.tsv*" - - } else if (params.aligner == 'kallisto') { - - kb_pattern = (input_type == 'raw') ? 'un' : '' - mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : "counts_${kb_pattern}filtered" - if ((input_type == 'custom_emptydrops_filter') && (params.kb_workflow != 'standard')) { mtx_dir = 'emptydrops_filtered/\${input_type}' } // dir has subdirs for non-standard workflows - matrix = "${mtx_dir}/*.mtx" - barcodes = "${mtx_dir}/*.barcodes.txt" - features = "${mtx_dir}/*.genes.names.txt" - - // kallisto allows the following workflows: ["standard", "lamanno", "nac"] - // lamanno creates "spliced" and "unspliced" - // nac creates "nascent", "ambiguous" "mature" - // also, lamanno produces a barcodes and genes file for both spliced and unspliced - // while nac keep only one for all the different .mtx files produced - kb_non_standard_files = "" - if (params.kb_workflow == "lamanno") { - kb_non_standard_files = "spliced unspliced" - matrix = "${mtx_dir}/\${input_type}.mtx" - barcodes = "${mtx_dir}/\${input_type}.barcodes.txt" - features = "${mtx_dir}/\${input_type}.genes.txt" - } - if (params.kb_workflow == "nac") { - kb_non_standard_files = "nascent ambiguous mature" - matrix = "${mtx_dir}/*\${input_type}.mtx" - features = "${mtx_dir}/*.genes.txt" - } // barcodes tsv has same pattern as standard workflow - - } else if (params.aligner == "alevin") { - - mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : '*_alevin_results/af_quant/alevin' - matrix = "${mtx_dir}/quants_mat.mtx" - barcodes = "${mtx_dir}/quants_mat_rows.txt" - features = "${mtx_dir}/quants_mat_cols.txt" - - } else if (params.aligner == 'star') { - - mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : "${input_type}" - suffix = (input_type == 'custom_emptydrops_filter') ? '' : '.gz' - matrix = "${mtx_dir}/matrix.mtx${suffix}" - barcodes = "${mtx_dir}/barcodes.tsv${suffix}" - features = "${mtx_dir}/features.tsv${suffix}" - - } - - // - // run script - // - """ - mkdir ${meta.id} - """ - - if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') - """ - # convert file types - for input_type in ${kb_non_standard_files} ; do - mtx_to_seurat.R \\ - ${matrix} \\ - ${barcodes} \\ - ${features} \\ - ${meta.id}/${meta.id}_\${input_type}_matrix.rds \\ - ${aligner} \\ - ${is_emptydrops} - done - """ - - else - """ - mtx_to_seurat.R \\ - $matrix \\ - $barcodes \\ - $features \\ - ${meta.id}/${meta.id}_${input_type}_matrix.rds \\ - ${aligner} \\ - ${is_emptydrops} - """ - - stub: - """ - mkdir ${meta.id} - touch ${meta.id}/${meta.id}_matrix.rds - touch versions.yml - """ -} diff --git a/modules/local/parse_cellrangermulti_samplesheet.nf b/modules/local/parse_cellrangermulti_samplesheet.nf index df616995..e8f56b67 100644 --- a/modules/local/parse_cellrangermulti_samplesheet.nf +++ b/modules/local/parse_cellrangermulti_samplesheet.nf @@ -1,4 +1,9 @@ process PARSE_CELLRANGERMULTI_SAMPLESHEET { + + // + // This module contains a custom script for checking special cellranger multi samplesheet + // + label 'process_low' publishDir = [ enabled: false ] diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index 8e8bd519..5c362c99 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -1,4 +1,9 @@ process SIMPLEAF_INDEX { + + // + // This module executes simpleaf to generate alevin genome index + // + tag "$transcript_gtf" label "process_medium" diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index 53e0ccb2..cf0de3fb 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -1,4 +1,9 @@ process SIMPLEAF_QUANT { + + // + // This module executes simpleaf to perform quantification with alevin + // + tag "$meta.id" label 'process_high' diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf index 0b26e037..001e2ccd 100644 --- a/modules/local/star_align.nf +++ b/modules/local/star_align.nf @@ -1,4 +1,9 @@ process STAR_ALIGN { + + // + // This module executes STAR align quantification + // + tag "$meta.id" label 'process_high' diff --git a/modules/local/templates/anndatar_convert.R b/modules/local/templates/anndatar_convert.R new file mode 100755 index 00000000..6f78e282 --- /dev/null +++ b/modules/local/templates/anndatar_convert.R @@ -0,0 +1,42 @@ +#!/usr/bin/env Rscript + +# to use nf variables: "${meta.id}" + +# load libraries +library(anndataR) +library(SeuratObject) +library(SingleCellExperiment) + +# read input +adata <- read_h5ad("${h5ad}") + +# convert to Seurat +obj <- adata\$to_Seurat() + +# save files +dir.create(file.path("$meta.id"), showWarnings = FALSE) +saveRDS(obj, file = "${meta.id}_${meta.input_type}_matrix.seurat.rds") + +# convert to SingleCellExperiment +obj <- adata\$to_SingleCellExperiment() + +# save files +dir.create(file.path("$meta.id"), showWarnings = FALSE) +saveRDS(obj, file = "${meta.id}_${meta.input_type}_matrix.sce.rds") + +# +# save versions file +# +versions_file <- file("versions.yml") +write( + paste( + '${task.process}:', + paste0(' r-base: "', R.Version()\$version.string, '"'), + paste0(' anndataR: "', as.character(packageVersion("anndataR")), '"'), + paste0(' SeuratObject: "', as.character(packageVersion("SeuratObject")), '"'), + paste0(' SingleCellExperiment: "', as.character(packageVersion("SingleCellExperiment")), '"'), + sep = "\\n" + ), + versions_file +) +close(versions_file) diff --git a/modules/local/templates/barcodes.py b/modules/local/templates/barcodes.py new file mode 100644 index 00000000..73b9a32a --- /dev/null +++ b/modules/local/templates/barcodes.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +"""Subset h5ad to a predefined set of barcodes""" + +import platform +import anndata as ad +import pandas as pd + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +df = pd.read_csv("${barcodes_csv}", header=None) +adata = ad.read_h5ad("${h5ad}") + +adata = adata[df[0].values] + +adata.write_h5ad("${prefix}.h5ad") + +# Versions + +versions = { + "${task.process}": { + "python": platform.python_version(), + "anndata": ad.__version__, + "pandas": pd.__version__ + } +} + +with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) diff --git a/modules/local/templates/concat_h5ad.py b/modules/local/templates/concat_h5ad.py new file mode 100755 index 00000000..9eddfa46 --- /dev/null +++ b/modules/local/templates/concat_h5ad.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +# Set numba chache dir to current working directory (which is a writable mount also in containers) +import os + +os.environ["NUMBA_CACHE_DIR"] = "." + +import scanpy as sc, anndata as ad, pandas as pd +from pathlib import Path +import platform + + +def read_samplesheet(samplesheet): + df = pd.read_csv(samplesheet) + df.set_index("sample") + + # samplesheet may contain replicates, when it has, + # group information from replicates and collapse with commas + # only keep unique values using set() + df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column.astype(str)))) + + return df + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +def dump_versions(): + versions = { + "${task.process}": { + "python": platform.python_version(), + "scanpy": sc.__version__, + } + } + + with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) + +if __name__ == "__main__": + + # Open samplesheet as dataframe + df_samplesheet = read_samplesheet("${samplesheet}") + + # find all h5ad and append to dict + dict_of_h5ad = {str(path).replace("_matrix.h5ad", ""): sc.read_h5ad(path) for path in Path(".").rglob("*.h5ad")} + + # concat h5ad files + adata = ad.concat(dict_of_h5ad, label="sample", merge="unique", index_unique="_") + + # merge with data.frame, on sample information + adata.obs = adata.obs.join(df_samplesheet, on="sample", how="left").astype(str) + adata.write_h5ad("combined_${meta.input_type}_matrix.h5ad") + + print("Wrote h5ad file to {}".format("combined_${meta.input_type}_matrix.h5ad")) + + # dump versions + dump_versions() diff --git a/modules/local/templates/mtx_to_h5ad_alevin.py b/modules/local/templates/mtx_to_h5ad_alevin.py new file mode 100755 index 00000000..492defd3 --- /dev/null +++ b/modules/local/templates/mtx_to_h5ad_alevin.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python + +# Set numba chache dir to current working directory (which is a writable mount also in containers) +import os + +os.environ["NUMBA_CACHE_DIR"] = "." + +import scanpy as sc +import pandas as pd +import argparse +import anndata +from anndata import AnnData +import platform + +def _mtx_to_adata( + input: str, + sample: str, +): + + adata = sc.read_mtx(f"{input}/quants_mat.mtx") + adata.obs_names = pd.read_csv(f"{input}/quants_mat_rows.txt", header=None, sep="\\t")[0].values + adata.var_names = pd.read_csv(f"{input}/quants_mat_cols.txt", header=None, sep="\\t")[0].values + adata.obs["sample"] = sample + + return adata + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +def dump_versions(): + versions = { + "${task.process}": { + "python": platform.python_version(), + "scanpy": sc.__version__, + "pandas": pd.__version__, + "anndata": anndata.__version__, + } + } + + with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) + +def input_to_adata( + input_data: str, + output: str, + sample: str, +): + print(f"Reading in {input_data}") + + # open main data + adata = _mtx_to_adata(input_data, sample) + + # standard format + # index are gene IDs and symbols are a column + # TODO: how to get gene_symbols for alevin? + adata.var['gene_versions'] = adata.var.index + adata.var.index = adata.var['gene_versions'].str.split('.').str[0].values + adata.var_names_make_unique() + + # write results + adata.write_h5ad(f"{output}") + print(f"Wrote h5ad file to {output}") + +# +# Run main script +# + +# create the directory with the sample name +os.makedirs("${meta.id}", exist_ok=True) + +# input_type comes from NF module +input_to_adata( + input_data="${meta.id}_alevin_results/af_quant/alevin/", + output="${meta.id}_${meta.input_type}_matrix.h5ad", + sample="${meta.id}" +) + +# dump versions +dump_versions() diff --git a/modules/local/templates/mtx_to_h5ad_cellranger.py b/modules/local/templates/mtx_to_h5ad_cellranger.py new file mode 100755 index 00000000..44d587e6 --- /dev/null +++ b/modules/local/templates/mtx_to_h5ad_cellranger.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python + +# Set numba chache dir to current working directory (which is a writable mount also in containers) +import os + +os.environ["NUMBA_CACHE_DIR"] = "." + +import scanpy as sc +import pandas as pd +import argparse +import anndata +from anndata import AnnData +import platform +import glob + +def _mtx_to_adata( + input: str, + sample: str, +): + + adata = sc.read_10x_h5(input) + adata.var["gene_symbols"] = adata.var_names + adata.var.set_index("gene_ids", inplace=True) + adata.obs["sample"] = sample + + # reorder columns for 10x mtx files + adata.var = adata.var[["gene_symbols", "feature_types", "genome"]] + + return adata + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +def dump_versions(): + versions = { + "${task.process}": { + "python": platform.python_version(), + "scanpy": sc.__version__, + "pandas": pd.__version__, + "anndata": anndata.__version__, + } + } + + with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) + +def input_to_adata( + input_data: str, + output: str, + sample: str, +): + print(f"Reading in {input_data}") + + # open main data + adata = _mtx_to_adata(input_data, sample) + + # standard format + # index are gene IDs and symbols are a column + adata.var['gene_versions'] = adata.var.index + adata.var.index = adata.var['gene_versions'].str.split('.').str[0].values + adata.var_names_make_unique() + + # write results + adata.write_h5ad(f"{output}") + print(f"Wrote h5ad file to {output}") + + # dump versions + dump_versions() + + return adata + +# +# Run main script +# + +# create the directory with the sample name +os.makedirs("${meta.id}", exist_ok=True) + +# input_type comes from NF module +adata = input_to_adata( + input_data=glob.glob("*${meta.input_type}_feature_bc_matrix.h5")[0], # cellrangermulti has 'sample_' as prefix + output="${meta.id}_${meta.input_type}_matrix.h5ad", + sample="${meta.id}" +) diff --git a/modules/local/templates/mtx_to_h5ad_kallisto.py b/modules/local/templates/mtx_to_h5ad_kallisto.py new file mode 100755 index 00000000..905f3d8a --- /dev/null +++ b/modules/local/templates/mtx_to_h5ad_kallisto.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python + +# Set numba chache dir to current working directory (which is a writable mount also in containers) +import os + +os.environ["NUMBA_CACHE_DIR"] = "." + +import scanpy as sc +import pandas as pd +import anndata +from anndata import AnnData, concat as concat_ad +from scipy.sparse import csr_matrix +import platform +import glob +import numpy as np + + +def _mtx_to_adata( + matrix: str, + barcodes: str, + features: str, +): + """Load kallisto-formatted mtx files into AnnData""" + adata = sc.read_mtx(matrix) + adata.obs_names = pd.read_csv(barcodes, header=None, sep="\\t")[0].values + adata.var_names = pd.read_csv(features, header=None, sep="\\t")[0].values + return adata + + +def _add_metadata(adata: AnnData, t2g: str, sample: str): + """Add var and obs metadata""" + adata.obs["sample"] = sample + + txp2gene = pd.read_table( + t2g, header=None, names=["gene_id", "gene_symbol"], usecols=[1, 2] + ) + txp2gene = txp2gene.drop_duplicates(subset="gene_id").set_index("gene_id") + adata.var = adata.var.join(txp2gene, how="left") + + # sanitize gene IDs into standard format + # index are gene IDs and symbols are a column + adata.var["gene_versions"] = adata.var.index + adata.var.index = adata.var["gene_versions"].str.split(".").str[0].values + adata.var_names_make_unique() # in case user does not use ensembl references, names might not be unique + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + + +def dump_versions(): + versions = { + "${task.process}": { + "python": platform.python_version(), + "scanpy": sc.__version__, + "pandas": pd.__version__, + "anndata": anndata.__version__, + } + } + + with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) + + +if __name__ == "__main__": + # create the directory with the sample name + os.makedirs("${meta.id}", exist_ok=True) + + # input_type comes from NF module + if "${params.kb_workflow}" == "standard": + adata = _mtx_to_adata( + matrix=glob.glob("${inputs}/*.mtx")[0], + barcodes=glob.glob("${inputs}/*.barcodes.txt")[0], + features=glob.glob("${inputs}/*.genes.txt")[0], + ) + + else: + spliced = _mtx_to_adata( + matrix=glob.glob("${inputs}/spliced*.mtx")[0], + barcodes=glob.glob("${inputs}/spliced*.barcodes.txt")[0], + features=glob.glob("${inputs}/spliced*.genes.txt")[0], + ) + unspliced = _mtx_to_adata( + matrix=glob.glob("${inputs}/unspliced*.mtx")[0], + barcodes=glob.glob("${inputs}/unspliced*.barcodes.txt")[0], + features=glob.glob("${inputs}/unspliced*.genes.txt")[0], + ) + + # The barcodes of spliced / non-spliced are not necessarily the same. + # We fill the missing barcodes with zeros + all_barcodes = list(set(unspliced.obs_names) | set(spliced.obs_names)) + missing_spliced = list(set(unspliced.obs_names) - set(spliced.obs_names)) + missing_unspliced = list(set(spliced.obs_names) - set(unspliced.obs_names)) + ad_missing_spliced = AnnData( + X=csr_matrix((len(missing_spliced), spliced.shape[1])), + obs=pd.DataFrame(index=missing_spliced), + var=spliced.var, + ) + ad_missing_unspliced = AnnData( + X=csr_matrix((len(missing_unspliced), spliced.shape[1])), + obs=pd.DataFrame(index=missing_unspliced), + var=unspliced.var, + ) + + spliced = concat_ad([spliced, ad_missing_spliced], join="outer")[ + all_barcodes, : + ] + unspliced = concat_ad([unspliced, ad_missing_unspliced], join="outer")[ + all_barcodes, : + ] + + assert np.all(spliced.var_names == unspliced.var_names) + + adata = AnnData( + X=spliced.X + unspliced.X, + layers={"unspliced": unspliced.X, "spliced": spliced.X}, + obs=pd.DataFrame(index=all_barcodes), + var=pd.DataFrame(index=spliced.var_names), + ) + + # out of the conditional: snippet for both standard and non-standard workflows + + # finalize generated adata object + _add_metadata(adata, t2g="${txp2gene}", sample="${meta.id}") + adata.write_h5ad("${meta.id}_${meta.input_type}_matrix.h5ad") + + # dump versions + dump_versions() diff --git a/modules/local/templates/mtx_to_h5ad_star.py b/modules/local/templates/mtx_to_h5ad_star.py new file mode 100755 index 00000000..d90a7d50 --- /dev/null +++ b/modules/local/templates/mtx_to_h5ad_star.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python + +# Set numba chache dir to current working directory (which is a writable mount also in containers) +import os + +os.environ["NUMBA_CACHE_DIR"] = "." + +import scanpy as sc +import pandas as pd +import argparse +import anndata +from anndata import AnnData +import platform + +def _mtx_to_adata( + input: str, + sample: str, +): + adata = sc.read_10x_mtx(input) + adata.obs["sample"] = sample + + return adata + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +def dump_versions(): + versions = { + "${task.process}": { + "python": platform.python_version(), + "scanpy": sc.__version__, + "pandas": pd.__version__, + "anndata": anndata.__version__, + } + } + + with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) + +def input_to_adata( + input_data: str, + output: str, + sample: str, +): + print(f"Reading in {input_data}") + + # open main data + adata = _mtx_to_adata(input_data, sample) + + # standard format + # index are gene IDs and symbols are a column + adata.var["gene_symbol"] = adata.var.index + adata.var['gene_versions'] = adata.var["gene_ids"] + adata.var.index = adata.var['gene_versions'].str.split('.').str[0].values + adata.var_names_make_unique() # in case user does not use ensembl references, names might not be unique + + # write results + adata.write_h5ad(f"{output}") + print(f"Wrote h5ad file to {output}") + +# +# Run main script +# + +# create the directory with the sample name +os.makedirs("${meta.id}", exist_ok=True) + +# input_type comes from NF module +input_to_adata( + input_data="${meta.input_type}", + output="${meta.id}_${meta.input_type}_matrix.h5ad", + sample="${meta.id}" +) + +# dump versions +dump_versions() diff --git a/modules/nf-core/cellbender/removebackground/environment.yml b/modules/nf-core/cellbender/removebackground/environment.yml new file mode 100644 index 00000000..a157c522 --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cellbender=0.3.0 diff --git a/modules/nf-core/cellbender/removebackground/main.nf b/modules/nf-core/cellbender/removebackground/main.nf new file mode 100644 index 00000000..f3cfd1ff --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/main.nf @@ -0,0 +1,65 @@ +process CELLBENDER_REMOVEBACKGROUND { + tag "$meta.id" + label 'process_medium' + label 'process_gpu' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/cellbender:0.3.0--c4addb97ab2d83fe': + 'community.wave.seqera.io/library/cellbender:0.3.0--41318a055fc3aacb' }" + + input: + tuple val(meta), path(h5ad) + + output: + tuple val(meta), path("${prefix}.h5") , emit: h5 + tuple val(meta), path("${prefix}_filtered.h5") , emit: filtered_h5 + tuple val(meta), path("${prefix}_posterior.h5") , emit: posterior_h5 + tuple val(meta), path("${prefix}_cell_barcodes.csv"), emit: barcodes + tuple val(meta), path("${prefix}_metrics.csv") , emit: metrics + tuple val(meta), path("${prefix}_report.html") , emit: report + tuple val(meta), path("${prefix}.pdf") , emit: pdf + tuple val(meta), path("${prefix}.log") , emit: log + tuple val(meta), path("ckpt.tar.gz") , emit: checkpoint + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + args = task.ext.args ?: "" + use_gpu = task.ext.use_gpu ? "--cuda" : "" + """ + TMPDIR=. cellbender remove-background \ + ${args} \ + --cpu-threads ${task.cpus} \ + ${use_gpu} \ + --input ${h5ad} \ + --output ${prefix}.h5 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellbender: \$(cellbender --version) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${prefix}.h5" + touch "${prefix}_filtered.h5" + touch "${prefix}_posterior.h5" + touch "${prefix}_cell_barcodes.csv" + touch "${prefix}_metrics.csv" + touch "${prefix}_report.html" + touch "${prefix}.pdf" + touch "${prefix}.log" + touch "ckpt.tar.gz" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellbender: \$(cellbender --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cellbender/removebackground/meta.yml b/modules/nf-core/cellbender/removebackground/meta.yml new file mode 100644 index 00000000..d70fa3fd --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/meta.yml @@ -0,0 +1,75 @@ +name: cellbender_removebackground +description: Module to use CellBender to estimate ambient RNA from single-cell RNA-seq data +keywords: + - single-cell + - scRNA-seq + - ambient RNA removal +tools: + - cellbender: + description: CellBender is a software package for eliminating technical artifacts from high-throughput single-cell RNA sequencing (scRNA-seq) data. + documentation: https://cellbender.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/broadinstitute/CellBender + licence: ["BSD-3-Clause"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - h5ad: + type: file + description: AnnData file containing unfiltered data (with empty droplets) + pattern: "*.h5ad" +output: + - h5: + type: file + description: Full count matrix as an h5 file, with background RNA removed. This file contains all the original droplet barcodes. + pattern: "*.h5" + - filtered_h5: + type: file + description: | + Full count matrix as an h5 file, with background RNA removed. This file contains only the droplet barcodes which were determined to have a > 50% posterior probability of containing cells. + pattern: "*.h5" + - posterior_h5: + type: file + description: | + The full posterior probability of noise counts. This is not normally used downstream. + pattern: "*.h5" + - barcodes: + type: file + description: | + CSV file containing all the droplet barcodes which were determined to have a > 50% posterior probability of containing cells. | + Barcodes are written in plain text. This information is also contained in each of the above outputs, | + but is included as a separate output for convenient use in certain downstream applications. + pattern: "*.csv" + - metrics: + type: file + description: | + Metrics describing the run, potentially to be used to flag problematic runs | + when using CellBender as part of a large-scale automated pipeline. + pattern: "*.csv" + - report: + type: file + description: | + HTML report including plots and commentary, along with any warnings or suggestions for improved parameter settings. + pattern: "*.html" + - pdf: + type: file + description: PDF file that provides a standard graphical summary of the inference procedure. + pattern: "*.pdf" + - log: + type: file + description: Log file produced by the cellbender remove-background run. + pattern: "*.log" + - checkpoint: + type: file + description: Checkpoint file which contains the trained model and the full posterior. + pattern: "*.ckpt" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@nictru" +maintainers: + - "@nictru" diff --git a/modules/nf-core/cellbender/removebackground/tests/epochs.config b/modules/nf-core/cellbender/removebackground/tests/epochs.config new file mode 100644 index 00000000..96282b07 --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/tests/epochs.config @@ -0,0 +1,6 @@ + +process { + withName: CELLBENDER_REMOVEBACKGROUND { + ext.args = '--epochs 20' + } +} diff --git a/modules/nf-core/cellbender/removebackground/tests/main.nf.test b/modules/nf-core/cellbender/removebackground/tests/main.nf.test new file mode 100644 index 00000000..1afa6f3b --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + name 'Test Process CELLBENDER_REMOVEBACKGROUND' + script '../main.nf' + process 'CELLBENDER_REMOVEBACKGROUND' + + tag "modules" + tag "modules_nfcore" + tag "cellbender/removebackground" + tag "cellbender" + + test("test_cellbender_removebackground") { + config './epochs.config' + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix_5k.h5ad", checkIfExists: true) + ] + """ + } + } + then { + assertAll( + {assert process.success}, + {assert file(process.out.h5.get(0).get(1)).exists()}, + {assert file(process.out.filtered_h5.get(0).get(1)).exists()}, + {assert file(process.out.posterior_h5.get(0).get(1)).exists()}, + {assert snapshot(process.out.barcodes).match("cellbender_removebackground_barcodes")}, + {assert snapshot(process.out.metrics).match("cellbender_removebackground_metrics")}, + {assert file(process.out.report.get(0).get(1)).exists()}, + {assert file(process.out.pdf.get(0).get(1)).exists()}, + {assert file(process.out.log.get(0).get(1)).exists()}, + {assert snapshot(process.out.versions).match("cellbender_removebackground_versions")} + ) + } + } + + test("test_cellbender_removebackground - stub") { + options '-stub' + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix_5k.h5ad", checkIfExists: true) + ] + """ + } + } + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out.h5).match("cellbender_removebackground_h5_stub")}, + {assert snapshot(process.out.filtered_h5).match("cellbender_removebackground_filtered_h5_stub")}, + {assert snapshot(process.out.posterior_h5).match("cellbender_removebackground_posterior_h5_stub")}, + {assert snapshot(process.out.barcodes).match("cellbender_removebackground_barcodes_stub")}, + {assert snapshot(process.out.metrics).match("cellbender_removebackground_metrics_stub")}, + {assert snapshot(process.out.report).match("cellbender_removebackground_report_stub")}, + {assert snapshot(process.out.pdf).match("cellbender_removebackground_pdf_stub")}, + {assert snapshot(process.out.log).match("cellbender_removebackground_log_stub")}, + {assert snapshot(process.out.versions).match("cellbender_removebackground_versions_stub")} + ) + } + } +} diff --git a/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap b/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap new file mode 100644 index 00000000..fdb51d66 --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap @@ -0,0 +1,196 @@ +{ + "cellbender_removebackground_versions": { + "content": [ + [ + "versions.yml:md5,b236ac7595dfa6cd4d51ac73e51cb05a" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:09.33127881" + }, + "cellbender_removebackground_filtered_h5_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_filtered.h5:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.833598082" + }, + "cellbender_removebackground_pdf_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.891829278" + }, + "cellbender_removebackground_metrics": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_metrics.csv:md5,88272bde1c157528b0b0ab2abe5ad26f" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:09.327155805" + }, + "cellbender_removebackground_versions_stub": { + "content": [ + [ + "versions.yml:md5,b236ac7595dfa6cd4d51ac73e51cb05a" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.904614838" + }, + "cellbender_removebackground_h5_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.h5:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.829304361" + }, + "cellbender_removebackground_metrics_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_metrics.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.870469733" + }, + "cellbender_removebackground_log_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.899293304" + }, + "cellbender_removebackground_barcodes": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_cell_barcodes.csv:md5,c8e8df9d0f9aea976d6f6aa36d329429" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:09.316098811" + }, + "cellbender_removebackground_report_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.885307244" + }, + "cellbender_removebackground_posterior_h5_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_posterior.h5:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.838032754" + }, + "cellbender_removebackground_barcodes_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_cell_barcodes.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.861284979" + } +} \ No newline at end of file diff --git a/modules/nf-core/cellbender/removebackground/tests/tags.yml b/modules/nf-core/cellbender/removebackground/tests/tags.yml new file mode 100644 index 00000000..d935083b --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/tests/tags.yml @@ -0,0 +1,2 @@ +cellbender/removebackground: + - modules/nf-core/cellbender/removebackground/** diff --git a/nextflow_schema.json b/nextflow_schema.json index 935d4277..c09875be 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -90,7 +90,7 @@ }, "skip_emptydrops": { "type": "boolean", - "description": "Skip custom empty drops filter module" + "description": "Skip cellbender empty drops filter subworkflow" } } }, diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index 764c08f8..ae98cc85 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -1,11 +1,11 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ -include { GFFREAD_TRANSCRIPTOME } from '../../modules/local/gffread_transcriptome' -include { ALEVINQC } from '../../modules/local/alevinqc' -include { SIMPLEAF_INDEX } from '../../modules/local/simpleaf_index' -include { SIMPLEAF_QUANT } from '../../modules/local/simpleaf_quant' +include { GFFREAD_TRANSCRIPTOME } from '../../modules/local/gffread_transcriptome' +include { ALEVINQC } from '../../modules/local/alevinqc' +include { SIMPLEAF_INDEX } from '../../modules/local/simpleaf_index' +include { SIMPLEAF_QUANT } from '../../modules/local/simpleaf_quant' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ -include { GUNZIP } from '../../modules/nf-core/gunzip/main' +include { GUNZIP } from '../../modules/nf-core/gunzip/main' include { GFFREAD as GFFREAD_TXP2GENE } from '../../modules/nf-core/gffread/main' def multiqc_report = [] @@ -44,8 +44,6 @@ workflow SCRNASEQ_ALEVIN { } } - - /* * Perform quantification with salmon alevin */ @@ -66,6 +64,6 @@ workflow SCRNASEQ_ALEVIN { emit: ch_versions - alevin_results = SIMPLEAF_QUANT.out.alevin_results - alevinqc = ALEVINQC.out.report + alevin_results = SIMPLEAF_QUANT.out.alevin_results.map{ meta, files -> [meta + [input_type: 'raw'], files] } + alevinqc = ALEVINQC.out.report } diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index 2461373b..d787e0f0 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -2,9 +2,9 @@ * Alignment with Cellranger */ -include {CELLRANGER_MKGTF} from "../../modules/nf-core/cellranger/mkgtf/main.nf" -include {CELLRANGER_MKREF} from "../../modules/nf-core/cellranger/mkref/main.nf" -include {CELLRANGER_COUNT} from "../../modules/nf-core/cellranger/count/main.nf" +include { CELLRANGER_MKGTF } from "../../modules/nf-core/cellranger/mkgtf/main.nf" +include { CELLRANGER_MKREF } from "../../modules/nf-core/cellranger/mkref/main.nf" +include { CELLRANGER_COUNT } from "../../modules/nf-core/cellranger/count/main.nf" // Define workflow to subset and index a genome region fasta file workflow CELLRANGER_ALIGN { @@ -49,7 +49,7 @@ workflow CELLRANGER_ALIGN { mtx_files.each{ if ( it.toString().contains("raw_feature_bc_matrix") ) { desired_files.add( it ) } } - [ meta, desired_files ] + [ meta + [input_type: 'raw'], desired_files ] } ch_matrices_filtered = @@ -58,12 +58,13 @@ workflow CELLRANGER_ALIGN { mtx_files.each{ if ( it.toString().contains("filtered_feature_bc_matrix") ) { desired_files.add( it ) } } - [ meta, desired_files ] + [ meta + [input_type: 'filtered'], desired_files ] } emit: ch_versions - cellranger_out = CELLRANGER_COUNT.out.outs - cellranger_matrices = ch_matrices_raw.mix( ch_matrices_filtered ) - star_index = cellranger_index + cellranger_out = CELLRANGER_COUNT.out.outs + cellranger_matrices_raw = ch_matrices_raw + cellranger_matrices_filtered = ch_matrices_filtered + star_index = cellranger_index } diff --git a/subworkflows/local/align_cellrangermulti.nf b/subworkflows/local/align_cellrangermulti.nf index 977bf478..53313b2f 100644 --- a/subworkflows/local/align_cellrangermulti.nf +++ b/subworkflows/local/align_cellrangermulti.nf @@ -204,8 +204,9 @@ workflow CELLRANGER_MULTI_ALIGN { emit: ch_versions - cellrangermulti_out = CELLRANGER_MULTI.out.outs - cellrangermulti_mtx = ch_matrices_raw.mix( ch_matrices_filtered ) + cellrangermulti_out = CELLRANGER_MULTI.out.outs + cellrangermulti_mtx_raw = ch_matrices_raw + cellrangermulti_mtx_filtered = ch_matrices_filtered } def parse_demultiplexed_output_channels(in_ch, pattern) { @@ -218,6 +219,7 @@ def parse_demultiplexed_output_channels(in_ch, pattern) { .transpose() // transpose for handling one meta/file pair at a time .map { meta, mtx_files -> def meta_clone = meta.clone() + meta_clone.input_type = pattern.contains('raw_') ? 'raw' : 'filtered' // add metadata for conversion workflow if ( mtx_files.toString().contains("per_sample_outs") ) { def demultiplexed_sample_id = mtx_files.toString().split('/per_sample_outs/')[1].split('/')[0] meta_clone.id = demultiplexed_sample_id.toString() diff --git a/subworkflows/local/emptydrops_removal.nf b/subworkflows/local/emptydrops_removal.nf new file mode 100644 index 00000000..7d63e86f --- /dev/null +++ b/subworkflows/local/emptydrops_removal.nf @@ -0,0 +1,37 @@ +include { CELLBENDER_REMOVEBACKGROUND } from '../../modules/nf-core/cellbender/removebackground' +include { ADATA_BARCODES } from '../../modules/local/adata_barcodes' + +// +// TODO: Make it a nf-core subworkflow to be shared by scrnaseq and scdownstream pipelines. +// + +workflow EMPTY_DROPLET_REMOVAL { + take: + ch_unfiltered + + main: + ch_versions = Channel.empty() + + CELLBENDER_REMOVEBACKGROUND(ch_unfiltered) + ch_versions = ch_versions.mix(CELLBENDER_REMOVEBACKGROUND.out.versions) + + ch_combined = + ch_unfiltered + .join(CELLBENDER_REMOVEBACKGROUND.out.barcodes) + .map { meta, h5ad, csv -> + def meta_clone = meta.clone() + meta_clone.input_type = meta['input_type'].toString().replaceAll('raw', 'emptydrops_filter') + + [ meta_clone, h5ad, csv ] + } + + ADATA_BARCODES(ch_combined) + ch_versions = ch_versions.mix(ADATA_BARCODES.out.versions) + + ch_h5ad = ADATA_BARCODES.out.h5ad + + emit: + h5ad = ch_h5ad + + versions = ch_versions +} diff --git a/subworkflows/local/h5ad_conversion.nf b/subworkflows/local/h5ad_conversion.nf new file mode 100644 index 00000000..f832a7cf --- /dev/null +++ b/subworkflows/local/h5ad_conversion.nf @@ -0,0 +1,48 @@ +/* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ +include { CONCAT_H5AD } from '../../modules/local/concat_h5ad.nf' +include { ANNDATAR_CONVERT } from '../../modules/local/anndatar_convert' + +workflow H5AD_CONVERSION { + + take: + ch_h5ads + samplesheet + + main: + ch_versions = Channel.empty() + + // + // Concat sample-specific h5ad in one + // + ch_concat_h5ad_input = ch_h5ads.groupTuple() // gather all sample-specific files / per type + if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') { + // when having spliced / unspliced matrices, the collected tuple has two levels ( [[mtx_1, mtx_2]] ) + // which nextflow break because it is not a valid 'path' thus, we have to remove one level + // making it as [ mtx_1, mtx_2 ] + ch_concat_h5ad_input = ch_concat_h5ad_input.map{ type, matrices -> [ type, matrices.flatten().toList() ] } + } + + CONCAT_H5AD ( + ch_concat_h5ad_input, + samplesheet + ) + ch_h5ad_concat = CONCAT_H5AD.out.h5ad.map{ meta, file -> + def meta_clone = meta.clone() + meta_clone.id = 'combined' // maintain output prefix + [ meta_clone, file ] + } + ch_versions = ch_versions.mix(CONCAT_H5AD.out.versions.first()) + + // + // MODULE: Convert to Rds with AnndataR package + // + ANNDATAR_CONVERT ( + ch_h5ads.mix( ch_h5ad_concat ) + ) + ch_versions = ch_versions.mix(ANNDATAR_CONVERT.out.versions.first()) + + emit: + ch_versions + h5ads = ch_h5ads + +} diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index 3deee2c5..5f8d9bcc 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -55,20 +55,20 @@ workflow KALLISTO_BUSTOOLS { // get raw/filtered counts ch_raw_counts = KALLISTOBUSTOOLS_COUNT.out.count.map{ meta, kb_dir -> if (file("${kb_dir.toUriString()}/counts_unfiltered").exists()) { - [meta, file("${kb_dir.toUriString()}/counts_unfiltered")] + [meta + [input_type: 'raw'], file("${kb_dir.toUriString()}/counts_unfiltered")] } } ch_filtered_counts = KALLISTOBUSTOOLS_COUNT.out.count.map{ meta, kb_dir -> if (file("${kb_dir.toUriString()}/counts_filtered").exists()) { - [meta, file("${kb_dir.toUriString()}/counts_filtered")] + [meta + [input_type: 'filtered'], file("${kb_dir.toUriString()}/counts_filtered")] } } emit: ch_versions counts = KALLISTOBUSTOOLS_COUNT.out.count - raw_counts = ch_raw_counts - filtered_counts = ch_filtered_counts + counts_raw = ch_raw_counts + counts_filtered = ch_filtered_counts txp2gene = txp2gene.collect() } diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf deleted file mode 100644 index 98e49a2e..00000000 --- a/subworkflows/local/mtx_conversion.nf +++ /dev/null @@ -1,65 +0,0 @@ -/* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ -include { MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' -include { CONCAT_H5AD } from '../../modules/local/concat_h5ad.nf' -include { MTX_TO_SEURAT } from '../../modules/local/mtx_to_seurat.nf' - -workflow MTX_CONVERSION { - - take: - mtx_matrices - samplesheet - txp2gene - star_index - - main: - ch_versions = Channel.empty() - - // Cellranger module output contains too many files which cause path collisions, we filter to the ones we need. - // Keeping backwards compatibility with cellranger-arc. - // TODO: Adapt cellranger-arc subworkflow like cellranger to remove this snippet here. - if (params.aligner in [ 'cellrangerarc' ]) { - mtx_matrices = mtx_matrices.map { meta, mtx_files -> - [ meta, mtx_files.findAll { it.toString().contains("filtered_feature_bc_matrix") } ] - } - .filter { meta, mtx_files -> mtx_files } // Remove any that are missing the relevant files - } - - // - // Convert matrix to h5ad - // - MTX_TO_H5AD ( - mtx_matrices, - txp2gene, - star_index - ) - - // - // Concat sample-specific h5ad in one - // - ch_concat_h5ad_input = MTX_TO_H5AD.out.h5ad.groupTuple() // gather all sample-specific files / per type - if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') { - // when having spliced / unspliced matrices, the collected tuple has two levels ( [[mtx_1, mtx_2]] ) - // which nextflow break because it is not a valid 'path' thus, we have to remove one level - // making it as [ mtx_1, mtx_2 ] - ch_concat_h5ad_input = ch_concat_h5ad_input.map{ type, matrices -> [ type, matrices.flatten().toList() ] } - } - CONCAT_H5AD ( - ch_concat_h5ad_input, - samplesheet - ) - - // - // Convert matrix do seurat - // - MTX_TO_SEURAT ( - mtx_matrices - ) - - //TODO CONCAT h5ad and MTX to h5ad should also have versions.yaml output - ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions, MTX_TO_SEURAT.out.versions) - - emit: - ch_versions - // counts = MTX_TO_H5AD.out.counts was this ever used? - -} diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf index 0c11acd1..aadda6b6 100644 --- a/subworkflows/local/starsolo.nf +++ b/subworkflows/local/starsolo.nf @@ -1,5 +1,6 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ -include { STAR_ALIGN } from '../../modules/local/star_align' +include { STAR_ALIGN } from '../../modules/local/star_align' +include { MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/gunzip/main' @@ -53,14 +54,12 @@ workflow STARSOLO { ) ch_versions = ch_versions.mix(STAR_ALIGN.out.versions) - emit: ch_versions // get rid of meta for star index - star_index = star_index.map{ meta, index -> index } - star_result = STAR_ALIGN.out.tab - star_counts = STAR_ALIGN.out.counts - raw_counts = STAR_ALIGN.out.raw_counts - filtered_counts = STAR_ALIGN.out.filtered_counts - for_multiqc = STAR_ALIGN.out.log_final.map{ meta, it -> it } + star_result = STAR_ALIGN.out.tab + star_counts = STAR_ALIGN.out.counts + raw_counts = STAR_ALIGN.out.raw_counts.map{ meta, files -> [meta + [input_type: 'raw'], files] } + filtered_counts = STAR_ALIGN.out.filtered_counts.map{ meta, files -> [meta + [input_type: 'filtered'], files] } + for_multiqc = STAR_ALIGN.out.log_final.map{ meta, it -> it } } diff --git a/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf index 4124a9b1..7ae2ed9a 100644 --- a/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf @@ -199,9 +199,12 @@ def getGenomeAttribute(attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { if (params.genomes[ params.genome ].containsKey(attribute)) { return params.genomes[ params.genome ][ attribute ] + } else { + return null } + } else { + return null } - return null } // diff --git a/tests/main_pipeline_alevin.nf.test b/tests/main_pipeline_alevin.nf.test index be04fb76..398e98d2 100644 --- a/tests/main_pipeline_alevin.nf.test +++ b/tests/main_pipeline_alevin.nf.test @@ -25,12 +25,12 @@ nextflow_pipeline { {assert workflow.success}, // How many tasks were executed? - {assert workflow.trace.tasks().size() == 14}, + {assert workflow.trace.tasks().size() == 17}, // How many results were produced? {assert path("${outputDir}/results_alevin").list().size() == 5}, - {assert path("${outputDir}/results_alevin/alevin").list().size() == 4}, - {assert path("${outputDir}/results_alevin/alevin/mtx_conversions").list().size() == 4}, + {assert path("${outputDir}/results_alevin/alevin").list().size() == 3}, + {assert path("${outputDir}/results_alevin/alevin/mtx_conversions").list().size() == 5}, {assert path("${outputDir}/results_alevin/alevinqc").list().size() == 2}, {assert path("${outputDir}/results_alevin/fastqc").list().size() == 12}, {assert path("${outputDir}/results_alevin/multiqc").list().size() == 3}, @@ -45,14 +45,16 @@ nextflow_pipeline { // Check if files are the same // {assert snapshot( - path( "${outputDir}/results_alevin/alevin/Sample_X_alevin_results/af_quant/alevin/quants_mat_cols.txt" ), - path( "${outputDir}/results_alevin/alevin/Sample_X_alevin_results/af_quant/alevin/quants_mat.mtx" ), - path( "${outputDir}/results_alevin/alevin/Sample_X_alevin_results/af_quant/alevin/quants_mat_rows.txt" ), - path( "${outputDir}/results_alevin/alevin/Sample_Y_alevin_results/af_quant/alevin/quants_mat_cols.txt" ), - path( "${outputDir}/results_alevin/alevin/Sample_Y_alevin_results/af_quant/alevin/quants_mat.mtx" ), - path( "${outputDir}/results_alevin/alevin/Sample_Y_alevin_results/af_quant/alevin/quants_mat_rows.txt" ), - path( "${outputDir}/results_alevin/alevin/mtx_conversions/Sample_X/Sample_X_raw_matrix.rds" ), - path( "${outputDir}/results_alevin/alevin/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.rds" ) + path( "${outputDir}/results_alevin/alevin/Sample_X/Sample_X_alevin_results/af_quant/alevin/quants_mat_cols.txt" ), + path( "${outputDir}/results_alevin/alevin/Sample_X/Sample_X_alevin_results/af_quant/alevin/quants_mat.mtx" ), + path( "${outputDir}/results_alevin/alevin/Sample_X/Sample_X_alevin_results/af_quant/alevin/quants_mat_rows.txt" ), + path( "${outputDir}/results_alevin/alevin/Sample_Y/Sample_Y_alevin_results/af_quant/alevin/quants_mat_cols.txt" ), + path( "${outputDir}/results_alevin/alevin/Sample_Y/Sample_Y_alevin_results/af_quant/alevin/quants_mat.mtx" ), + path( "${outputDir}/results_alevin/alevin/Sample_Y/Sample_Y_alevin_results/af_quant/alevin/quants_mat_rows.txt" ), + path( "${outputDir}/results_alevin/alevin/mtx_conversions/Sample_X/Sample_X_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_alevin/alevin/mtx_conversions/Sample_X/Sample_X_raw_matrix.sce.rds" ), + path( "${outputDir}/results_alevin/alevin/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_alevin/alevin/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.sce.rds" ) ).match()} ) // end of assertAll() diff --git a/tests/main_pipeline_alevin.nf.test.snap b/tests/main_pipeline_alevin.nf.test.snap index 58976c6a..4138b7e5 100644 --- a/tests/main_pipeline_alevin.nf.test.snap +++ b/tests/main_pipeline_alevin.nf.test.snap @@ -7,13 +7,15 @@ "quants_mat_cols.txt:md5,e9868982c17a330392e38c2a5933cf97", "quants_mat.mtx:md5,54cd12666016adce94c025b2e07f4b02", "quants_mat_rows.txt:md5,6b458a7777260ba90eccbe7919df934b", - "Sample_X_raw_matrix.rds:md5,ad35ee66bf2fc3d5d4656c19a7e64e2b", - "Sample_Y_raw_matrix.rds:md5,baf584142205b1d42bb6fdab1f22a06a" + "Sample_X_raw_matrix.seurat.rds:md5,708ec66ee15c31c1a09cbaee035a6508", + "Sample_X_raw_matrix.sce.rds:md5,3bed89cd187a3f5385636fd5196ef42d", + "Sample_Y_raw_matrix.seurat.rds:md5,3f031ff7c50ee2a2e13ea86319892ee5", + "Sample_Y_raw_matrix.sce.rds:md5,c83e8e04b8fd4bb4d03313a1348686dc" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.10.2" }, - "timestamp": "2024-02-14T14:49:46.831540515" + "timestamp": "2024-11-29T10:17:18.33882659" } -} +} \ No newline at end of file diff --git a/tests/main_pipeline_cellranger.nf.test b/tests/main_pipeline_cellranger.nf.test index d1eb4191..f16dbed6 100644 --- a/tests/main_pipeline_cellranger.nf.test +++ b/tests/main_pipeline_cellranger.nf.test @@ -25,13 +25,13 @@ nextflow_pipeline { {assert workflow.success}, // How many tasks were executed? - {assert workflow.trace.tasks().size() == 18}, + {assert workflow.trace.tasks().size() == 24}, // How many results were produced? {assert path("${outputDir}/results_cellranger").list().size() == 4}, {assert path("${outputDir}/results_cellranger/cellranger").list().size() == 4}, - {assert path("${outputDir}/results_cellranger/cellranger/mtx_conversions").list().size() == 5}, - {assert path("${outputDir}/results_cellranger/cellranger/count").list().size() == 3}, + {assert path("${outputDir}/results_cellranger/cellranger/mtx_conversions").list().size() == 8}, + {assert path("${outputDir}/results_cellranger/cellranger/count").list().size() == 2}, {assert path("${outputDir}/results_cellranger/fastqc").list().size() == 12}, {assert path("${outputDir}/results_cellranger/multiqc").list().size() == 3}, @@ -49,20 +49,24 @@ nextflow_pipeline { {assert snapshot( path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/filtered_feature_bc_matrix/barcodes.tsv.gz" ), path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/filtered_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/filtered_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/filtered_feature_bc_matrix/matrix.mtx.gz" ), path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/filtered_feature_bc_matrix/barcodes.tsv.gz" ), path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/filtered_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/filtered_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/raw_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/raw_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/raw_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/raw_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/raw_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/raw_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_X/Sample_X_raw_matrix.rds" ), - path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.rds" ), - path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_X/Sample_X_filtered_matrix.rds" ), - path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_Y/Sample_Y_filtered_matrix.rds" ) + path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/filtered_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/raw_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/raw_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/raw_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/raw_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/raw_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/raw_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_X/Sample_X_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_X/Sample_X_raw_matrix.sce.rds" ), + path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.sce.rds" ), + path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_X/Sample_X_filtered_matrix.seurat.rds" ), + path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_X/Sample_X_filtered_matrix.sce.rds" ), + path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_Y/Sample_Y_filtered_matrix.seurat.rds" ), + path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_Y/Sample_Y_filtered_matrix.sce.rds" ) ).match()} ) // end of assertAll() diff --git a/tests/main_pipeline_cellranger.nf.test.snap b/tests/main_pipeline_cellranger.nf.test.snap index 6d276d82..1379aff8 100644 --- a/tests/main_pipeline_cellranger.nf.test.snap +++ b/tests/main_pipeline_cellranger.nf.test.snap @@ -13,15 +13,19 @@ "barcodes.tsv.gz:md5,081f72b5252ccaf5ffd535ffbd235c4c", "features.tsv.gz:md5,99e453cb1443a3e43e99405184e51a5e", "matrix.mtx.gz:md5,58182db2706d532ec970526de3d3b70f", - "Sample_X_raw_matrix.rds:md5,306a5477ace4d43d851b8389fdfeaf1f", - "Sample_Y_raw_matrix.rds:md5,74b31532da4cae5a8197d690021d77fc", - "Sample_X_filtered_matrix.rds:md5,f9191ba575a3ab79ada4807715f18573", - "Sample_Y_filtered_matrix.rds:md5,7be3f7b29d668dcf7e951b9f4d371a5e" + "Sample_X_raw_matrix.seurat.rds:md5,155faccf5164a5c56819b267dee0ebb1", + "Sample_X_raw_matrix.sce.rds:md5,4bfef42037307e73f0135abb2373a21e", + "Sample_Y_raw_matrix.seurat.rds:md5,3f4a3e6529b10c646fd08173d5baa339", + "Sample_Y_raw_matrix.sce.rds:md5,061ab8ba3ed28e6312c0367a2a9dfeb3", + "Sample_X_filtered_matrix.seurat.rds:md5,847448239100d08e3ca44017f93ca05d", + "Sample_X_filtered_matrix.sce.rds:md5,797244c2cd63f2b814f2a0cd6c3f080e", + "Sample_Y_filtered_matrix.seurat.rds:md5,50e765e4559c94edd23b123f9232075f", + "Sample_Y_filtered_matrix.sce.rds:md5,dcf9ce35fba58c2b04ca72703b483804" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.10.2" }, - "timestamp": "2024-04-16T09:43:45.32298954" + "timestamp": "2024-11-29T07:53:03.653246538" } -} +} \ No newline at end of file diff --git a/tests/main_pipeline_cellrangermulti.nf.test b/tests/main_pipeline_cellrangermulti.nf.test index 9cf4b413..263e486a 100644 --- a/tests/main_pipeline_cellrangermulti.nf.test +++ b/tests/main_pipeline_cellrangermulti.nf.test @@ -18,6 +18,7 @@ nextflow_pipeline { gtf = 'https://ftp.ensembl.org/pub/release-110/gtf/homo_sapiens/Homo_sapiens.GRCh38.110.gtf.gz' aligner = 'cellrangermulti' protocol = 'auto' + skip_emptydrops = true } } @@ -33,12 +34,12 @@ nextflow_pipeline { {assert workflow.success}, // How many tasks were executed? - {assert workflow.trace.tasks().size() == 55}, + {assert workflow.trace.tasks().size() == 85}, // How many results were produced? {assert path("${outputDir}/results_cellrangermulti").list().size() == 4}, {assert path("${outputDir}/results_cellrangermulti/cellrangermulti").list().size() == 5}, - {assert path("${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions").list().size() == 13}, + {assert path("${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions").list().size() == 16}, {assert path("${outputDir}/results_cellrangermulti/cellrangermulti/count").list().size() == 4}, {assert path("${outputDir}/results_cellrangermulti/fastqc").list().size() == 48}, {assert path("${outputDir}/results_cellrangermulti/multiqc").list().size() == 3}, @@ -70,86 +71,111 @@ nextflow_pipeline { // {assert snapshot( // barcodes.tsv.gz files - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/multi/count/raw_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Colorectal_BC3/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Colorectal_BC3/count/sample_raw_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Liver_BC1/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Liver_BC1/count/sample_raw_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Ovarian_BC2/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Ovarian_BC2/count/sample_raw_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Pancreas_BC4/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Pancreas_BC4/count/sample_raw_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K/outs/multi/count/raw_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K/outs/per_sample_outs/PBMC_10K/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMO/outs/multi/count/raw_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/multi/count/raw_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Colorectal_BC3/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Colorectal_BC3/count/sample_raw_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Liver_BC1/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Liver_BC1/count/sample_raw_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Ovarian_BC2/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Ovarian_BC2/count/sample_raw_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Pancreas_BC4/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Pancreas_BC4/count/sample_raw_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K/outs/multi/count/raw_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K/outs/per_sample_outs/PBMC_10K/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMO/outs/multi/count/raw_feature_bc_matrix/barcodes.tsv.gz" ), path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMO/outs/per_sample_outs/PBMC_10K_CMO_PBMCs_human_1/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz" ), path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMO/outs/per_sample_outs/PBMC_10K_CMO_PBMCs_human_2/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMV/outs/multi/count/raw_feature_bc_matrix/barcodes.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMV/outs/per_sample_outs/PBMC_10K_CMV/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMV/outs/multi/count/raw_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMV/outs/per_sample_outs/PBMC_10K_CMV/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz" ), // features.tsv.gz files path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/multi/count/raw_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Colorectal_BC3/count/sample_filtered_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Colorectal_BC3/count/sample_raw_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Liver_BC1/count/sample_filtered_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Liver_BC1/count/sample_raw_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Ovarian_BC2/count/sample_filtered_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Ovarian_BC2/count/sample_raw_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Pancreas_BC4/count/sample_filtered_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Pancreas_BC4/count/sample_raw_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K/outs/multi/count/raw_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K/outs/per_sample_outs/PBMC_10K/count/sample_filtered_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMO/outs/multi/count/raw_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Colorectal_BC3/count/sample_filtered_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Colorectal_BC3/count/sample_raw_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Liver_BC1/count/sample_filtered_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Liver_BC1/count/sample_raw_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Ovarian_BC2/count/sample_filtered_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Ovarian_BC2/count/sample_raw_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Pancreas_BC4/count/sample_filtered_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Pancreas_BC4/count/sample_raw_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K/outs/multi/count/raw_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K/outs/per_sample_outs/PBMC_10K/count/sample_filtered_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMO/outs/multi/count/raw_feature_bc_matrix/features.tsv.gz" ), path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMO/outs/per_sample_outs/PBMC_10K_CMO_PBMCs_human_1/count/sample_filtered_feature_bc_matrix/features.tsv.gz" ), path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMO/outs/per_sample_outs/PBMC_10K_CMO_PBMCs_human_2/count/sample_filtered_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMV/outs/multi/count/raw_feature_bc_matrix/features.tsv.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMV/outs/per_sample_outs/PBMC_10K_CMV/count/sample_filtered_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMV/outs/multi/count/raw_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMV/outs/per_sample_outs/PBMC_10K_CMV/count/sample_filtered_feature_bc_matrix/features.tsv.gz" ), // matrix.mtx.gz files path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/multi/count/raw_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Colorectal_BC3/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Colorectal_BC3/count/sample_raw_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Liver_BC1/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Liver_BC1/count/sample_raw_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Ovarian_BC2/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Ovarian_BC2/count/sample_raw_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Pancreas_BC4/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Pancreas_BC4/count/sample_raw_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K/outs/multi/count/raw_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K/outs/per_sample_outs/PBMC_10K/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMO/outs/multi/count/raw_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Colorectal_BC3/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Colorectal_BC3/count/sample_raw_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Liver_BC1/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Liver_BC1/count/sample_raw_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Ovarian_BC2/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Ovarian_BC2/count/sample_raw_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Pancreas_BC4/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Pancreas_BC4/count/sample_raw_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K/outs/multi/count/raw_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K/outs/per_sample_outs/PBMC_10K/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMO/outs/multi/count/raw_feature_bc_matrix/matrix.mtx.gz" ), path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMO/outs/per_sample_outs/PBMC_10K_CMO_PBMCs_human_1/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz" ), path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMO/outs/per_sample_outs/PBMC_10K_CMO_PBMCs_human_2/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMV/outs/multi/count/raw_feature_bc_matrix/matrix.mtx.gz" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMV/outs/per_sample_outs/PBMC_10K_CMV/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMV/outs/multi/count/raw_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMV/outs/per_sample_outs/PBMC_10K_CMV/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz" ), // metrics_summary.csv files - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Colorectal_BC3/metrics_summary.csv" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Liver_BC1/metrics_summary.csv" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Ovarian_BC2/metrics_summary.csv" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Pancreas_BC4/metrics_summary.csv" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K/outs/per_sample_outs/PBMC_10K/metrics_summary.csv" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Colorectal_BC3/metrics_summary.csv" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Liver_BC1/metrics_summary.csv" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Ovarian_BC2/metrics_summary.csv" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/4PLEX_HUMAN/outs/per_sample_outs/Pancreas_BC4/metrics_summary.csv" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K/outs/per_sample_outs/PBMC_10K/metrics_summary.csv" ), path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMO/outs/per_sample_outs/PBMC_10K_CMO_PBMCs_human_1/metrics_summary.csv" ), path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMO/outs/per_sample_outs/PBMC_10K_CMO_PBMCs_human_2/metrics_summary.csv" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMV/outs/per_sample_outs/PBMC_10K_CMV/metrics_summary.csv" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/count/PBMC_10K_CMV/outs/per_sample_outs/PBMC_10K_CMV/metrics_summary.csv" ), // .rds files - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/4PLEX_HUMAN/4PLEX_HUMAN_raw_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Colorectal_BC3/Colorectal_BC3_raw_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Colorectal_BC3/Colorectal_BC3_filtered_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Liver_BC1/Liver_BC1_raw_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Liver_BC1/Liver_BC1_filtered_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Ovarian_BC2/Ovarian_BC2_raw_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Ovarian_BC2/Ovarian_BC2_filtered_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K/PBMC_10K_raw_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K/PBMC_10K_filtered_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMO/PBMC_10K_CMO_raw_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMO_PBMCs_human_1/PBMC_10K_CMO_PBMCs_human_1_filtered_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMO_PBMCs_human_2/PBMC_10K_CMO_PBMCs_human_2_filtered_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMV/PBMC_10K_CMV_raw_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMV/PBMC_10K_CMV_filtered_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Pancreas_BC4/Pancreas_BC4_raw_matrix.rds" ), - path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Pancreas_BC4/Pancreas_BC4_filtered_matrix.rds" ) + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/4PLEX_HUMAN/4PLEX_HUMAN_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/4PLEX_HUMAN/4PLEX_HUMAN_raw_matrix.sce.rds" ), + + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Colorectal_BC3/Colorectal_BC3_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Colorectal_BC3/Colorectal_BC3_raw_matrix.sce.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Colorectal_BC3/Colorectal_BC3_filtered_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Colorectal_BC3/Colorectal_BC3_filtered_matrix.sce.rds" ), + + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Liver_BC1/Liver_BC1_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Liver_BC1/Liver_BC1_raw_matrix.sce.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Liver_BC1/Liver_BC1_filtered_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Liver_BC1/Liver_BC1_filtered_matrix.sce.rds" ), + + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Ovarian_BC2/Ovarian_BC2_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Ovarian_BC2/Ovarian_BC2_raw_matrix.sce.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Ovarian_BC2/Ovarian_BC2_filtered_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Ovarian_BC2/Ovarian_BC2_filtered_matrix.sce.rds" ), + + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K/PBMC_10K_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K/PBMC_10K_raw_matrix.sce.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K/PBMC_10K_filtered_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K/PBMC_10K_filtered_matrix.sce.rds" ), + + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMO/PBMC_10K_CMO_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMO/PBMC_10K_CMO_raw_matrix.sce.rds" ), + + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMO_PBMCs_human_1/PBMC_10K_CMO_PBMCs_human_1_filtered_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMO_PBMCs_human_1/PBMC_10K_CMO_PBMCs_human_1_filtered_matrix.sce.rds" ), + + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMO_PBMCs_human_2/PBMC_10K_CMO_PBMCs_human_2_filtered_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMO_PBMCs_human_2/PBMC_10K_CMO_PBMCs_human_2_filtered_matrix.sce.rds" ), + + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMV/PBMC_10K_CMV_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMV/PBMC_10K_CMV_raw_matrix.sce.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMV/PBMC_10K_CMV_filtered_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/PBMC_10K_CMV/PBMC_10K_CMV_filtered_matrix.sce.rds" ), + + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Pancreas_BC4/Pancreas_BC4_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Pancreas_BC4/Pancreas_BC4_raw_matrix.sce.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Pancreas_BC4/Pancreas_BC4_filtered_matrix.seurat.rds" ), + path( "${outputDir}/results_cellrangermulti/cellrangermulti/mtx_conversions/Pancreas_BC4/Pancreas_BC4_filtered_matrix.sce.rds" ) ).match()} diff --git a/tests/main_pipeline_cellrangermulti.nf.test.snap b/tests/main_pipeline_cellrangermulti.nf.test.snap index 1594f01e..1b2f1bdd 100644 --- a/tests/main_pipeline_cellrangermulti.nf.test.snap +++ b/tests/main_pipeline_cellrangermulti.nf.test.snap @@ -57,27 +57,43 @@ "metrics_summary.csv:md5,d14d385df3e8e61c924a30cd5b959b86", "metrics_summary.csv:md5,bcd7e1d1854e31a4968d5876eab7d64a", "metrics_summary.csv:md5,98d9d9617c2ca4f614614568e4371d44", - "4PLEX_HUMAN_raw_matrix.rds:md5,16fbbce025edf14080d9652f0b3b7994", - "Colorectal_BC3_raw_matrix.rds:md5,bb6da39a69fac14757be2ef9c1759b24", - "Colorectal_BC3_filtered_matrix.rds:md5,92f5b4a8340a5180dc7ad232a4994ace", - "Liver_BC1_raw_matrix.rds:md5,def4320e149344bf1852304feae7a137", - "Liver_BC1_filtered_matrix.rds:md5,573da7ab560533d6e0930c3b3822f90f", - "Ovarian_BC2_raw_matrix.rds:md5,e43ac66c2bce57d94152e30e5a1af721", - "Ovarian_BC2_filtered_matrix.rds:md5,4368fe37be8ca85850689e70a0a9a5f6", - "PBMC_10K_raw_matrix.rds:md5,d9e9a5d35e6ead35ba5a0d2a9bfdb06d", - "PBMC_10K_filtered_matrix.rds:md5,1866ab65fdfb8dd704485bae222823d2", - "PBMC_10K_CMO_raw_matrix.rds:md5,fb5bc19943ebaf58ffcfbbe796c4baf2", - "PBMC_10K_CMO_PBMCs_human_1_filtered_matrix.rds:md5,725b5310cc5ea04bcce6fd69d92ec36a", - "PBMC_10K_CMO_PBMCs_human_2_filtered_matrix.rds:md5,5a1c0412f9058487f5f0798c28b87d39", - "PBMC_10K_CMV_raw_matrix.rds:md5,a7ee013dcf87bea1e6d589f51ace64ff", - "PBMC_10K_CMV_filtered_matrix.rds:md5,c0b74395b5ecc54dcb321a12f0601777", - "Pancreas_BC4_raw_matrix.rds:md5,0d9640458250fd53c54031382b3b19dc", - "Pancreas_BC4_filtered_matrix.rds:md5,1df6ddb6fc29427fc6240c197447d146" + "4PLEX_HUMAN_raw_matrix.seurat.rds:md5,acc22e948a2250907897f79e008ff3ea", + "4PLEX_HUMAN_raw_matrix.sce.rds:md5,084ac812ebb69ca3152abd2c7226d739", + "Colorectal_BC3_raw_matrix.seurat.rds:md5,0d6a6222daf2b03cba426cb80b86914c", + "Colorectal_BC3_raw_matrix.sce.rds:md5,b00d8244f06eef3e5d0fae39c9e14196", + "Colorectal_BC3_filtered_matrix.seurat.rds:md5,554e8c02aabc9935f7f60b4ccd95790d", + "Colorectal_BC3_filtered_matrix.sce.rds:md5,d18364082406728404deefd7e163e758", + "Liver_BC1_raw_matrix.seurat.rds:md5,826973ce82225e8942a823e18dbe01fd", + "Liver_BC1_raw_matrix.sce.rds:md5,4806e7f7e00af77c9eb0e2666cedc96c", + "Liver_BC1_filtered_matrix.seurat.rds:md5,958cb86208fa8241f84452b92061b534", + "Liver_BC1_filtered_matrix.sce.rds:md5,315bee1c2cfbebd58fbe070756fa578e", + "Ovarian_BC2_raw_matrix.seurat.rds:md5,f107fd3e315a04aec3e6e53650c456f8", + "Ovarian_BC2_raw_matrix.sce.rds:md5,e30869371d77941b1d009fd6983b5c43", + "Ovarian_BC2_filtered_matrix.seurat.rds:md5,c2f00ae90a958938197666f2642697f1", + "Ovarian_BC2_filtered_matrix.sce.rds:md5,0852d41a76b05cae1b6ee8359b7fcb15", + "PBMC_10K_raw_matrix.seurat.rds:md5,97d5d0bc88db1df05b7effcb9dbb31cc", + "PBMC_10K_raw_matrix.sce.rds:md5,4b3fe0fbda8a80eafc372bd895c26ec0", + "PBMC_10K_filtered_matrix.seurat.rds:md5,dc780d878e2abae61b10eba5218116b8", + "PBMC_10K_filtered_matrix.sce.rds:md5,1f2f3072ede853dd5f6c47821fc39543", + "PBMC_10K_CMO_raw_matrix.seurat.rds:md5,079695fa7ca1190a8467c40ea906ab55", + "PBMC_10K_CMO_raw_matrix.sce.rds:md5,1b5f531b29b6f35a2fc17e8e679b1f38", + "PBMC_10K_CMO_PBMCs_human_1_filtered_matrix.seurat.rds:md5,8dc6983ed48e114e997111bb9a3cb08d", + "PBMC_10K_CMO_PBMCs_human_1_filtered_matrix.sce.rds:md5,343dff9ef666aceafc5bc3f5da4dfb67", + "PBMC_10K_CMO_PBMCs_human_2_filtered_matrix.seurat.rds:md5,d1d8205885be044a721295205e34aeea", + "PBMC_10K_CMO_PBMCs_human_2_filtered_matrix.sce.rds:md5,a6598288ca1afd8590e25ea3095de929", + "PBMC_10K_CMV_raw_matrix.seurat.rds:md5,7b731e32655cace681ace140e3ef9af3", + "PBMC_10K_CMV_raw_matrix.sce.rds:md5,75baf36779b04ed941b97b644125a2ff", + "PBMC_10K_CMV_filtered_matrix.seurat.rds:md5,4dfa3f7aa87706e23a04248349292dc1", + "PBMC_10K_CMV_filtered_matrix.sce.rds:md5,cb78864bde8833c2e6323ff01eef3c15", + "Pancreas_BC4_raw_matrix.seurat.rds:md5,7f972f40b05824a3cff5449a8f9f8b61", + "Pancreas_BC4_raw_matrix.sce.rds:md5,4af114f068fc9bab8cceb3f25a9b6d9a", + "Pancreas_BC4_filtered_matrix.seurat.rds:md5,83b0c41b147c45ce0204eb14e6bca9d9", + "Pancreas_BC4_filtered_matrix.sce.rds:md5,b22101dc4bd007f03d5254e0211961fd" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.10.2" }, - "timestamp": "2024-05-06T11:40:54.060867657" + "timestamp": "2024-11-29T14:57:39.288233009" } -} +} \ No newline at end of file diff --git a/tests/main_pipeline_kallisto.nf.test b/tests/main_pipeline_kallisto.nf.test index cb331986..50ea4b8f 100644 --- a/tests/main_pipeline_kallisto.nf.test +++ b/tests/main_pipeline_kallisto.nf.test @@ -25,12 +25,12 @@ nextflow_pipeline { {assert workflow.success}, // How many tasks were executed? - {assert workflow.trace.tasks().size() == 12}, + {assert workflow.trace.tasks().size() == 15}, // How many results were produced? {assert path("${outputDir}/results_kallisto").list().size() == 4}, - {assert path("${outputDir}/results_kallisto/kallisto").list().size() == 4}, - {assert path("${outputDir}/results_kallisto/kallisto/mtx_conversions").list().size() == 4}, + {assert path("${outputDir}/results_kallisto/kallisto").list().size() == 3}, + {assert path("${outputDir}/results_kallisto/kallisto/mtx_conversions").list().size() == 5}, {assert path("${outputDir}/results_kallisto/kallisto/Sample_X.count").list().size() == 9}, {assert path("${outputDir}/results_kallisto/kallisto/Sample_Y.count").list().size() == 9}, {assert path("${outputDir}/results_kallisto/fastqc").list().size() == 12}, @@ -47,13 +47,15 @@ nextflow_pipeline { // {assert snapshot( path( "${outputDir}/results_kallisto/kallisto/Sample_X.count/counts_unfiltered/cells_x_genes.barcodes.txt" ), - path( "${outputDir}/results_kallisto/kallisto/Sample_X.count/counts_unfiltered/cells_x_genes.genes.txt" ), - path( "${outputDir}/results_kallisto/kallisto/Sample_X.count/counts_unfiltered/cells_x_genes.mtx" ), + path( "${outputDir}/results_kallisto/kallisto/Sample_X.count/counts_unfiltered/cells_x_genes.genes.txt" ), + path( "${outputDir}/results_kallisto/kallisto/Sample_X.count/counts_unfiltered/cells_x_genes.mtx" ), path( "${outputDir}/results_kallisto/kallisto/Sample_Y.count/counts_unfiltered/cells_x_genes.barcodes.txt" ), - path( "${outputDir}/results_kallisto/kallisto/Sample_Y.count/counts_unfiltered/cells_x_genes.genes.txt" ), - path( "${outputDir}/results_kallisto/kallisto/Sample_Y.count/counts_unfiltered/cells_x_genes.mtx" ), - path( "${outputDir}/results_kallisto/kallisto/mtx_conversions/Sample_X/Sample_X_raw_matrix.rds" ), - path( "${outputDir}/results_kallisto/kallisto/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.rds" ) + path( "${outputDir}/results_kallisto/kallisto/Sample_Y.count/counts_unfiltered/cells_x_genes.genes.txt" ), + path( "${outputDir}/results_kallisto/kallisto/Sample_Y.count/counts_unfiltered/cells_x_genes.mtx" ), + path( "${outputDir}/results_kallisto/kallisto/mtx_conversions/Sample_X/Sample_X_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_kallisto/kallisto/mtx_conversions/Sample_X/Sample_X_raw_matrix.sce.rds" ), + path( "${outputDir}/results_kallisto/kallisto/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_kallisto/kallisto/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.sce.rds" ) ).match()} ) // end of assertAll() diff --git a/tests/main_pipeline_kallisto.nf.test.snap b/tests/main_pipeline_kallisto.nf.test.snap index 82580178..6629f459 100644 --- a/tests/main_pipeline_kallisto.nf.test.snap +++ b/tests/main_pipeline_kallisto.nf.test.snap @@ -7,13 +7,15 @@ "cells_x_genes.barcodes.txt:md5,a8cf7ea4b2d075296a94bf066a64b7a4", "cells_x_genes.genes.txt:md5,acd9d00120f52031974b2add3e7521b6", "cells_x_genes.mtx:md5,abd83de117204d0a77df3c92d00cc025", - "Sample_X_raw_matrix.rds:md5,0938f4189b7a7fd1030abfcee798741c", - "Sample_Y_raw_matrix.rds:md5,93c12abe283ab37c5f37e5cd3cb25302" + "Sample_X_raw_matrix.seurat.rds:md5,6dba7ab652441df6a2b0712c7529053b", + "Sample_X_raw_matrix.sce.rds:md5,3fef29fea599561551a06caa82811e2b", + "Sample_Y_raw_matrix.seurat.rds:md5,a9e9ac3d1bf83f4e791d6f0c3f6540de", + "Sample_Y_raw_matrix.sce.rds:md5,6818392c6b8b65d762521406aa963b2a" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.10.2" }, - "timestamp": "2024-03-18T14:51:42.040931572" + "timestamp": "2024-11-29T09:54:53.033167911" } -} +} \ No newline at end of file diff --git a/tests/main_pipeline_star.nf.test b/tests/main_pipeline_star.nf.test index 21eb1955..ef314358 100644 --- a/tests/main_pipeline_star.nf.test +++ b/tests/main_pipeline_star.nf.test @@ -25,12 +25,12 @@ nextflow_pipeline { {assert workflow.success}, // How many tasks were executed? - {assert workflow.trace.tasks().size() == 17}, + {assert workflow.trace.tasks().size() == 23}, // How many results were produced? {assert path("${outputDir}/results_star").list().size() == 4}, {assert path("${outputDir}/results_star/star").list().size() == 3}, - {assert path("${outputDir}/results_star/star/mtx_conversions").list().size() == 5}, + {assert path("${outputDir}/results_star/star/mtx_conversions").list().size() == 8}, {assert path("${outputDir}/results_star/fastqc").list().size() == 12}, {assert path("${outputDir}/results_star/multiqc").list().size() == 3}, @@ -56,10 +56,14 @@ nextflow_pipeline { path( "${outputDir}/results_star/star/Sample_Y/Sample_Y.Solo.out/Gene/filtered/matrix.mtx.gz" ), path( "${outputDir}/results_star/star/Sample_Y/Sample_Y.Solo.out/Gene/filtered/features.tsv.gz" ), path( "${outputDir}/results_star/star/Sample_Y/Sample_Y.Solo.out/Gene/filtered/barcodes.tsv.gz" ), - path( "${outputDir}/results_star/star/mtx_conversions/Sample_X/Sample_X_raw_matrix.rds" ), - path( "${outputDir}/results_star/star/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.rds" ), - path( "${outputDir}/results_star/star/mtx_conversions/Sample_X/Sample_X_filtered_matrix.rds" ), - path( "${outputDir}/results_star/star/mtx_conversions/Sample_Y/Sample_Y_filtered_matrix.rds" ), + path( "${outputDir}/results_star/star/mtx_conversions/Sample_X/Sample_X_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_star/star/mtx_conversions/Sample_X/Sample_X_raw_matrix.sce.rds" ), + path( "${outputDir}/results_star/star/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.seurat.rds" ), + path( "${outputDir}/results_star/star/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.sce.rds" ), + path( "${outputDir}/results_star/star/mtx_conversions/Sample_X/Sample_X_filtered_matrix.seurat.rds" ), + path( "${outputDir}/results_star/star/mtx_conversions/Sample_X/Sample_X_filtered_matrix.sce.rds" ), + path( "${outputDir}/results_star/star/mtx_conversions/Sample_Y/Sample_Y_filtered_matrix.seurat.rds" ), + path( "${outputDir}/results_star/star/mtx_conversions/Sample_Y/Sample_Y_filtered_matrix.sce.rds" ) ).match()} ) // end of assertAll() diff --git a/tests/main_pipeline_star.nf.test.snap b/tests/main_pipeline_star.nf.test.snap index 88739a7b..e13a570a 100644 --- a/tests/main_pipeline_star.nf.test.snap +++ b/tests/main_pipeline_star.nf.test.snap @@ -11,15 +11,19 @@ "matrix.mtx.gz:md5,0ae080bd0002e350531a5816e159345e", "features.tsv.gz:md5,99e453cb1443a3e43e99405184e51a5e", "barcodes.tsv.gz:md5,9b695b0b91bcb146ec9c4688ca10a690", - "Sample_X_raw_matrix.rds:md5,31604db3e7846acc8d9a60b1a171ce78", - "Sample_Y_raw_matrix.rds:md5,1a52c823e91acce2b29621c8c99c8c72", - "Sample_X_filtered_matrix.rds:md5,aa2d36dd8507aba864347c88e4ce0d27", - "Sample_Y_filtered_matrix.rds:md5,d459af8f99258bcc88b80b2f7c58e911" + "Sample_X_raw_matrix.seurat.rds:md5,51a863d56f6d4c9df7161d574fecfd33", + "Sample_X_raw_matrix.sce.rds:md5,2135e075bfb5043b78841de9bd261a3c", + "Sample_Y_raw_matrix.seurat.rds:md5,f177854d779169f6f0e1c628f154a656", + "Sample_Y_raw_matrix.sce.rds:md5,86fc59316bc9083c510aa0d3a0a24ffb", + "Sample_X_filtered_matrix.seurat.rds:md5,a035c9ead72baa36f2ef298dc02d5e1b", + "Sample_X_filtered_matrix.sce.rds:md5,8fd8b3a602a00578e5a72f1fd6792e05", + "Sample_Y_filtered_matrix.seurat.rds:md5,42823b941a1c375473f454980eafbf0b", + "Sample_Y_filtered_matrix.sce.rds:md5,254fd8a73aa0e0ca4bee57855c9cde30" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.10.2" }, - "timestamp": "2024-02-14T16:30:25.7971791" + "timestamp": "2024-11-29T15:32:12.524479228" } -} +} \ No newline at end of file diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 0d230cbb..375ad79d 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -3,25 +3,27 @@ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_scrnaseq_pipeline' -include { getGenomeAttribute } from '../subworkflows/local/utils_nfcore_scrnaseq_pipeline' -include { FASTQC_CHECK } from '../subworkflows/local/fastqc' -include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' -include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' -include { STARSOLO } from '../subworkflows/local/starsolo' -include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" -include { CELLRANGER_MULTI_ALIGN } from "../subworkflows/local/align_cellrangermulti" -include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellrangerarc" -include { UNIVERSC_ALIGN } from "../subworkflows/local/align_universc" -include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" -include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' -include { EMPTYDROPS_CELL_CALLING } from '../modules/local/emptydrops' -include { GUNZIP as GUNZIP_FASTA } from '../modules/nf-core/gunzip/main' -include { GUNZIP as GUNZIP_GTF } from '../modules/nf-core/gunzip/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_scrnaseq_pipeline' +include { getGenomeAttribute } from '../subworkflows/local/utils_nfcore_scrnaseq_pipeline' +include { FASTQC_CHECK } from '../subworkflows/local/fastqc' +include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' +include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' +include { STARSOLO } from '../subworkflows/local/starsolo' +include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" +include { CELLRANGER_MULTI_ALIGN } from "../subworkflows/local/align_cellrangermulti" +include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellrangerarc" +include { UNIVERSC_ALIGN } from "../subworkflows/local/align_universc" +include { MTX_TO_H5AD } from '../modules/local/mtx_to_h5ad' +include { H5AD_CONVERSION } from '../subworkflows/local/h5ad_conversion' +include { H5AD_CONVERSION as EMPTYDROPS_H5AD_CONVERSION } from '../subworkflows/local/h5ad_conversion' +include { EMPTY_DROPLET_REMOVAL } from '../subworkflows/local/emptydrops_removal.nf' +include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' +include { GUNZIP as GUNZIP_FASTA } from '../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_GTF } from '../modules/nf-core/gunzip/main' @@ -38,71 +40,69 @@ workflow SCRNASEQ { } // collect paths from genome attributes file (e.g. iGenomes.config; optional) - params.fasta = getGenomeAttribute('fasta') - params.gtf = getGenomeAttribute('gtf') - params.star_index = getGenomeAttribute('star') - params.salmon_index = getGenomeAttribute('simpleaf') - params.txp2gene = getGenomeAttribute('simpleaf_tx2pgene') + // we cannot overwrite params in the workflow (they stay null as coming from the config file) + def genome_fasta = params.fasta ?: getGenomeAttribute('fasta') + def gtf = params.gtf ?: getGenomeAttribute('gtf') + def star_index = params.star_index // ?: getGenomeAttribute('star') TODO: Currently not fetching iGenomes star index due version incompatibility + def salmon_index = params.salmon_index ?: getGenomeAttribute('simpleaf') + def txp2gene = params.txp2gene ?: getGenomeAttribute('simpleaf_tx2pgene') // Make cellranger or cellranger-arc index conditional + def cellranger_index = [] if (params.aligner in ["cellranger", "cellrangermulti"]){ - params.cellranger_index = getGenomeAttribute('cellranger') + cellranger_index = params.cellranger_index ?: getGenomeAttribute('cellranger') } else if (params.aligner == "cellrangerarc") { - params.cellranger_index = getGenomeAttribute('cellrangerarc') + cellranger_index = params.cellranger_index ?: getGenomeAttribute('cellrangerarc') } - ch_genome_fasta = params.fasta ? file(params.fasta, checkIfExists: true) : [] - ch_gtf = params.gtf ? file(params.gtf, checkIfExists: true) : [] + ch_genome_fasta = genome_fasta ? file(genome_fasta, checkIfExists: true) : [] + ch_gtf = gtf ? file(gtf, checkIfExists: true) : [] // general input and params ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): [] ch_motifs = params.motifs ? file(params.motifs) : [] ch_cellrangerarc_config = params.cellrangerarc_config ? file(params.cellrangerarc_config) : [] - ch_txp2gene = params.txp2gene ? file(params.txp2gene) : [] + ch_txp2gene = txp2gene ? file(txp2gene, checkIfExists: true) : [] ch_multiqc_files = Channel.empty() if (params.barcode_whitelist) { - ch_barcode_whitelist = file(params.barcode_whitelist) + ch_barcode_whitelist = file(params.barcode_whitelist, checkIfExists: true) } else if (protocol_config.containsKey("whitelist")) { - ch_barcode_whitelist = file("$projectDir/${protocol_config['whitelist']}") + ch_barcode_whitelist = file("$projectDir/${protocol_config['whitelist']}", checkIfExists: true) } else { ch_barcode_whitelist = [] } - //kallisto params - ch_kallisto_index = params.kallisto_index ? file(params.kallisto_index) : [] - kb_workflow = params.kb_workflow - kb_t1c = params.kb_t1c ? file(params.kb_t1c) : [] - kb_t2c = params.kb_t2c ? file(params.kb_t2c) : [] - // samplesheet - this is passed to the MTX conversion functions to add metadata to the // AnnData objects. ch_input = file(params.input) //kallisto params - ch_kallisto_index = params.kallisto_index ? file(params.kallisto_index) : [] + ch_kallisto_index = params.kallisto_index ? file(params.kallisto_index, checkIfExists: true) : [] kb_workflow = params.kb_workflow + kb_t1c = params.kb_t1c ? file(params.kb_t1c, checkIfExists: true) : [] + kb_t2c = params.kb_t2c ? file(params.kb_t2c, checkIfExists: true) : [] //salmon params - ch_salmon_index = params.salmon_index ? file(params.salmon_index) : [] + ch_salmon_index = salmon_index ? file(salmon_index, checkIfExists: true) : [] //star params - star_index = params.star_index ? file(params.star_index, checkIfExists: true) : null - ch_star_index = star_index ? [[id: star_index.baseName], star_index] : [] + star_index = star_index ? file(star_index, checkIfExists: true) : null + ch_star_index = star_index ? Channel.of( [[id: star_index.baseName], star_index] ) : [] star_feature = params.star_feature //cellranger params - ch_cellranger_index = params.cellranger_index ? file(params.cellranger_index) : [] + ch_cellranger_index = cellranger_index ? file(cellranger_index, checkIfExists: true) : [] //universc params - ch_universc_index = params.universc_index ? file(params.universc_index) : [] + ch_universc_index = params.universc_index ? file(params.universc_index, checkIfExists: true) : [] //cellrangermulti params cellranger_vdj_index = params.cellranger_vdj_index ? file(params.cellranger_vdj_index, checkIfExists: true) : [] ch_multi_samplesheet = params.cellranger_multi_barcodes ? file(params.cellranger_multi_barcodes, checkIfExists: true) : [] empty_file = file("$projectDir/assets/EMPTY", checkIfExists: true) - ch_versions = Channel.empty() + ch_versions = Channel.empty() ch_mtx_matrices = Channel.empty() // Run FastQC @@ -115,24 +115,24 @@ workflow SCRNASEQ { // // Uncompress genome fasta file if required // - if (params.fasta) { - if (params.fasta.endsWith('.gz')) { - ch_genome_fasta = GUNZIP_FASTA ( [ [:], file(params.fasta) ] ).gunzip.map { it[1] } + if (genome_fasta) { + if (genome_fasta.endsWith('.gz')) { + ch_genome_fasta = GUNZIP_FASTA ( [ [:], ch_genome_fasta ] ).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) } else { - ch_genome_fasta = Channel.value( file(params.fasta) ) + ch_genome_fasta = Channel.value( ch_genome_fasta ) } } // // Uncompress GTF annotation file or create from GFF3 if required // - if (params.gtf) { - if (params.gtf.endsWith('.gz')) { - ch_gtf = GUNZIP_GTF ( [ [:], file(params.gtf) ] ).gunzip.map { it[1] } + if (gtf) { + if (gtf.endsWith('.gz')) { + ch_gtf = GUNZIP_GTF ( [ [:], ch_gtf ] ).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) } else { - ch_gtf = Channel.value( file(params.gtf) ) + ch_gtf = Channel.value( ch_gtf ) } } @@ -153,7 +153,7 @@ workflow SCRNASEQ { ch_fastq ) ch_versions = ch_versions.mix(KALLISTO_BUSTOOLS.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(KALLISTO_BUSTOOLS.out.raw_counts, KALLISTO_BUSTOOLS.out.filtered_counts) + ch_mtx_matrices = ch_mtx_matrices.mix( KALLISTO_BUSTOOLS.out.counts_raw, KALLISTO_BUSTOOLS.out.counts_filtered ) ch_txp2gene = KALLISTO_BUSTOOLS.out.txp2gene } @@ -171,7 +171,7 @@ workflow SCRNASEQ { ) ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions) ch_multiqc_files = ch_multiqc_files.mix(SCRNASEQ_ALEVIN.out.alevin_results.map{ meta, it -> it }) - ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results) + ch_mtx_matrices = ch_mtx_matrices.mix( SCRNASEQ_ALEVIN.out.alevin_results ) } // Run STARSolo pipeline @@ -187,9 +187,8 @@ workflow SCRNASEQ { protocol_config.get('extra_args', ""), ) ch_versions = ch_versions.mix(STARSOLO.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(STARSOLO.out.raw_counts, STARSOLO.out.filtered_counts) - ch_star_index = STARSOLO.out.star_index ch_multiqc_files = ch_multiqc_files.mix(STARSOLO.out.for_multiqc) + ch_mtx_matrices = ch_mtx_matrices.mix( STARSOLO.out.raw_counts, STARSOLO.out.filtered_counts ) } // Run cellranger pipeline @@ -202,9 +201,8 @@ workflow SCRNASEQ { protocol_config['protocol'] ) ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_matrices) - ch_star_index = CELLRANGER_ALIGN.out.star_index - ch_multiqc_files = ch_multiqc_files.mix(CELLRANGER_ALIGN.out.cellranger_out.map{ + ch_mtx_matrices = ch_mtx_matrices.mix( CELLRANGER_ALIGN.out.cellranger_matrices_raw, CELLRANGER_ALIGN.out.cellranger_matrices_filtered ) + ch_multiqc_files = ch_multiqc_files.mix(CELLRANGER_ALIGN.out.cellranger_out.map { meta, outs -> outs.findAll{ it -> it.name == "web_summary.html"} }) } @@ -301,43 +299,47 @@ workflow SCRNASEQ { ch_multiqc_files = ch_multiqc_files.mix( CELLRANGER_MULTI_ALIGN.out.cellrangermulti_out.map{ meta, outs -> outs.findAll{ it -> it.name == "web_summary.html" } }) - ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx) + ch_mtx_matrices = ch_mtx_matrices.mix( CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx_raw, CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx_filtered ) } - // Run emptydrops calling module + // + // MODULE: Convert mtx matrices to h5ad + // + MTX_TO_H5AD ( + ch_mtx_matrices, + ch_txp2gene, + star_index ? ch_star_index.map{it[1]} : [], + params.aligner + ) + ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first()) + + // + // SUBWORKFLOW: Run h5ad conversion and concatenation + // + ch_emptydrops = Channel.empty() + H5AD_CONVERSION ( + MTX_TO_H5AD.out.h5ad, + ch_input + ) + ch_versions = ch_versions.mix(H5AD_CONVERSION.out.ch_versions) + + // + // SUBWORKFLOW: Run cellbender emptydrops filter + // if ( !params.skip_emptydrops && !(params.aligner in ['cellrangerarc']) ) { - // // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it - // - if ( params.aligner in [ 'cellranger', 'cellrangermulti', 'kallisto', 'star' ] ) { - ch_mtx_matrices_for_emptydrops = - ch_mtx_matrices.filter { meta, mtx_files -> - mtx_files.toString().contains("raw_feature_bc_matrix") || // cellranger - mtx_files.toString().contains("counts_unfiltered") || // kallisto - mtx_files.toString().contains("raw") // star - } - } else { - ch_mtx_matrices_for_emptydrops = ch_mtx_matrices - } - - EMPTYDROPS_CELL_CALLING( ch_mtx_matrices_for_emptydrops ) - ch_mtx_matrices = ch_mtx_matrices.mix( EMPTYDROPS_CELL_CALLING.out.filtered_matrices ) + EMPTY_DROPLET_REMOVAL ( + H5AD_CONVERSION.out.h5ads.filter { meta, mtx_files -> meta.input_type.contains('raw') } + ) + EMPTYDROPS_H5AD_CONVERSION ( + EMPTY_DROPLET_REMOVAL.out.h5ad, + ch_input + ) } - // Run mtx to h5ad conversion subworkflow - MTX_CONVERSION ( - ch_mtx_matrices, - ch_input, - ch_txp2gene, - ch_star_index - ) - - //Add Versions from MTX Conversion workflow too - ch_versions.mix(MTX_CONVERSION.out.ch_versions) - // // Collate and save software versions //