diff --git a/subworkflows/local/align_cellrangermulti.nf b/subworkflows/local/align_cellrangermulti.nf index 8d8b04ab..b3895d9a 100644 --- a/subworkflows/local/align_cellrangermulti.nf +++ b/subworkflows/local/align_cellrangermulti.nf @@ -57,9 +57,68 @@ workflow CELLRANGER_MULTI_ALIGN { ch_vdj_primer_index = params.vdj_inner_enrichment_primers ? file(params.vdj_inner_enrichment_primers) : empty_file ch_beam_antigen_panel_csv = empty_file // currently not implemented ch_beam_control_panel_csv = empty_file // currently not implemented - // TODO: Add cellranger_multi_barcodes samplesheet parsing - ch_frna_sample_csv = params.frna_sample_csv ? file(params.frna_sample_csv) : empty_file - ch_cmo_barcode_csv = params.cmo_barcode_csv ? file(params.cmo_barcode_csv) : empty_file + + // parse frna and barcode information + if (params.cellranger_multi_barcodes) { + Channel.fromPath( params.cellranger_multi_barcodes ) + .splitCsv( header: true ) + .map { + assert it.containsKey('sample'), "The ${params.cellranger_multi_barcodes} samplesheet for cellranger multi must contain a 'sample' colum. Please, check the docs." + assert it.containsKey('multiplexed_sample_id'), "The ${params.cellranger_multi_barcodes} samplesheet for cellranger multi must contain a 'multiplexed_sample_id' colum. Please, check the docs." + assert it.containsKey('description'), "The ${params.cellranger_multi_barcodes} samplesheet for cellranger multi must contain a 'description' colum. Please, check the docs." + assert it.containsKey('probe_barcode_ids') || it.containsKey('cmo_ids'), "The ${params.cellranger_multi_barcodes} samplesheet for cellranger multi must contain at least one, or both, of 'cmo_ids' or 'probe_barcode_ids' columns. Please, check the docs." + + it + } + .branch { + cmo : it.containsKey('cmo_ids') + frna : it.containsKey('probe_barcode_ids') + } + .set { ch_cellrangermulti_barcodes } + + // + // Here, we parse the received cellranger multi barcodes samplesheet. + // We get the sample name information and merge the "sample-map" with the received GEX fastqs. + // The selection of the GEX fastqs is because samples are always expected to have at least GEX data. + // Then, using "joined" map, which means, the "additional barcode information" of each sample, we then, + // parse it to generate the cmo / frna samplesheets to be used by each sample. + // + // Here, because of the .join() we take advantage of the "FIFO"-rule and are sure that the data used in the + // module is from the same sample from the "normal" samplesheet. + // + + // CMOs + ch_cmo_barcode_csv = + ch_grouped_fastq.gex.map{ it[0].id } + .join( ch_cellrangermulti_barcodes.cmo.collect().map{ [ it[0].sample, it ] }.groupTuple(by:0), remainder: true ) + .map{ sample, meta -> + if (meta) { + lines = [ 'sample,cmo_ids,description' ] + meta[0].each{ lines = lines + [ "$it.sample,$it.cmo_ids,$it.description" ] } + cmos = file( "${sample}_cmo_samplesheet.csv" ) + cmos.text = lines.join("\n").trim().toString() + cmos + } else { empty_file } + } + + // FRNAs + ch_frna_sample_csv = + ch_grouped_fastq.gex.map{ it[0].id } + .join( ch_cellrangermulti_barcodes.frna.collect().map{ [ it[0].sample, it ] }.groupTuple(by:0), remainder: true ) + .map{ sample, meta -> + if (meta) { + lines = [ 'sample,probe_barcode_ids,description' ] + meta[0].each{ lines = lines + [ "$it.sample,$it.probe_barcode_ids,$it.description" ] } + cmos = file( "${sample}_cmo_samplesheet.csv" ) + cmos.text = lines.join("\n").trim().toString() + cmos + } else { empty_file } + } + + } else { + ch_cmo_barcode_csv = empty_file + ch_frna_sample_csv = empty_file + } // // Prepare GTF