Skip to content

Commit

Permalink
add a parser for frna/cmo data from customised, unified, barcodes sam…
Browse files Browse the repository at this point in the history
…plesheet
  • Loading branch information
fmalmeida committed Apr 10, 2024
1 parent d044f51 commit c8ac4cf
Showing 1 changed file with 62 additions and 3 deletions.
65 changes: 62 additions & 3 deletions subworkflows/local/align_cellrangermulti.nf
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,68 @@ workflow CELLRANGER_MULTI_ALIGN {
ch_vdj_primer_index = params.vdj_inner_enrichment_primers ? file(params.vdj_inner_enrichment_primers) : empty_file
ch_beam_antigen_panel_csv = empty_file // currently not implemented
ch_beam_control_panel_csv = empty_file // currently not implemented
// TODO: Add cellranger_multi_barcodes samplesheet parsing
ch_frna_sample_csv = params.frna_sample_csv ? file(params.frna_sample_csv) : empty_file
ch_cmo_barcode_csv = params.cmo_barcode_csv ? file(params.cmo_barcode_csv) : empty_file

// parse frna and barcode information
if (params.cellranger_multi_barcodes) {
Channel.fromPath( params.cellranger_multi_barcodes )
.splitCsv( header: true )
.map {
assert it.containsKey('sample'), "The ${params.cellranger_multi_barcodes} samplesheet for cellranger multi must contain a 'sample' colum. Please, check the docs."
assert it.containsKey('multiplexed_sample_id'), "The ${params.cellranger_multi_barcodes} samplesheet for cellranger multi must contain a 'multiplexed_sample_id' colum. Please, check the docs."
assert it.containsKey('description'), "The ${params.cellranger_multi_barcodes} samplesheet for cellranger multi must contain a 'description' colum. Please, check the docs."
assert it.containsKey('probe_barcode_ids') || it.containsKey('cmo_ids'), "The ${params.cellranger_multi_barcodes} samplesheet for cellranger multi must contain at least one, or both, of 'cmo_ids' or 'probe_barcode_ids' columns. Please, check the docs."

it
}
.branch {
cmo : it.containsKey('cmo_ids')
frna : it.containsKey('probe_barcode_ids')
}
.set { ch_cellrangermulti_barcodes }

//
// Here, we parse the received cellranger multi barcodes samplesheet.
// We get the sample name information and merge the "sample-map" with the received GEX fastqs.
// The selection of the GEX fastqs is because samples are always expected to have at least GEX data.
// Then, using "joined" map, which means, the "additional barcode information" of each sample, we then,
// parse it to generate the cmo / frna samplesheets to be used by each sample.
//
// Here, because of the .join() we take advantage of the "FIFO"-rule and are sure that the data used in the
// module is from the same sample from the "normal" samplesheet.
//

// CMOs
ch_cmo_barcode_csv =
ch_grouped_fastq.gex.map{ it[0].id }
.join( ch_cellrangermulti_barcodes.cmo.collect().map{ [ it[0].sample, it ] }.groupTuple(by:0), remainder: true )
.map{ sample, meta ->
if (meta) {
lines = [ 'sample,cmo_ids,description' ]
meta[0].each{ lines = lines + [ "$it.sample,$it.cmo_ids,$it.description" ] }
cmos = file( "${sample}_cmo_samplesheet.csv" )
cmos.text = lines.join("\n").trim().toString()
cmos
} else { empty_file }
}

// FRNAs
ch_frna_sample_csv =
ch_grouped_fastq.gex.map{ it[0].id }
.join( ch_cellrangermulti_barcodes.frna.collect().map{ [ it[0].sample, it ] }.groupTuple(by:0), remainder: true )
.map{ sample, meta ->
if (meta) {
lines = [ 'sample,probe_barcode_ids,description' ]
meta[0].each{ lines = lines + [ "$it.sample,$it.probe_barcode_ids,$it.description" ] }
cmos = file( "${sample}_cmo_samplesheet.csv" )
cmos.text = lines.join("\n").trim().toString()
cmos
} else { empty_file }
}

} else {
ch_cmo_barcode_csv = empty_file
ch_frna_sample_csv = empty_file
}

//
// Prepare GTF
Expand Down

0 comments on commit c8ac4cf

Please sign in to comment.