Skip to content

Commit

Permalink
Merge pull request #40 from theislab/main
Browse files Browse the repository at this point in the history
Merge from main
  • Loading branch information
wxicu authored Feb 5, 2024
2 parents d5c739d + f62de8d commit a9a11a2
Show file tree
Hide file tree
Showing 14 changed files with 162 additions and 86 deletions.
7 changes: 4 additions & 3 deletions bin/bff.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ parser$add_argument("--methodsForConsensus", help='By default, a consensus call
parser$add_argument("--cellbarcodeWhitelist", help='A vector of expected cell barcodes. This allows reporting on the total set of expected barcodes, not just those in the filtered count matrix',default=NULL)
parser$add_argument("--metricsFile", help='If provided, summary metrics will be written to this file.', default="metrics_cell_hash_r.csv")
parser$add_argument("--doTSNE", help='If true, tSNE will be run on the resulting hashing calls after each caller.', default=TRUE)
parser$add_argument("--preprocess", help='If true, the PreProcess function by CellHashR is executed', default=FALSE)
parser$add_argument("--preprocess_bff", help='If given, the PreProcess function by CellHashR is executed', action="store_true")
parser$add_argument("--barcodeWhitelist", help='A vector of barcode names to retain, used for preprocessing step', default=NULL)
parser$add_argument("--doHeatmap", help='f true, Seurat::HTOHeatmap will be run on the results of each caller.', default=TRUE)
parser$add_argument("--perCellSaturation", help='An optional dataframe with the columns cellbarcode and saturation',default=NULL)
Expand All @@ -31,6 +31,7 @@ parser$add_argument("--assignmentOutBff", help="Prefix name for the file contain
parser$add_argument("--outputdir", help='Output directory')
args <- parser$parse_args()

print(args$preprocess_bff)
#Parameters originally Null
methodsForConsensus <- args$methodsForConsensus
if(is.null(methodsForConsensus)){
Expand Down Expand Up @@ -62,7 +63,7 @@ Argument <- c("HTO-File", "methods", "methodsForConsensus", "cellbarcodeWhitelis
Value <- c(args$fileHto, args$methods, methodsForConsensus, cellbarcodeWhitelist, args$metricsFile, perCellSaturation, majorityConsensusThreshold, callerDisagreementThreshold, args$doTSNE, args$doHeatmap,args$chemistry)
params <- data.frame(Argument, Value)

if(as.logical(args$preprocess)){
if(args$preprocess_bff == TRUE){
print("Preprocessing activated")
print(args$preprocess)
#get barcodes
Expand Down Expand Up @@ -90,7 +91,7 @@ if (!is.null(args$methodsForConsensus)) {
perCell_args <- args$perCellSaturation
perCell <- ifelse(perCell_args == "null" || perCell_args == "Null", NULL, perCell_args)

if(args$methodsForConsensus=="bff_raw" || args$methodsForConsensus=="bff_cluster" || args$methodsForConsensus=="bff_raw,bff_cluster" || is.null(args$methodsForConsensus) )
if(args$methodsForConsensus=="bff_raw" || args$methodsForConsensus=="bff_cluster" || args$methodsForConsensus=="bff_raw,bff_cluster" || args$methodsForConsensus=="bff_cluster,bff_raw"|| is.null(args$methodsForConsensus) )
#Only Bff in its different variations is available
if (args$methods == "bff_raw") {
print("Executing BFF raw")
Expand Down
168 changes: 114 additions & 54 deletions bin/summary_hash.py

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions modules/hash_demulti/bff.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,14 @@ process bff {

script:

def run_preprocess = preprocess_bff != 'False' ? " --preprocess_bff" : ''
"""
mkdir bff_${sampleId}
bff.R --fileHto hto_data --methods $methods --methodsForConsensus $methodsForConsensus \
--cellbarcodeWhitelist $cellbarcodeWhitelist --metricsFile bff_${sampleId}_$metricsFile \
--doTSNE $doTSNE --doHeatmap $doHeatmap --perCellSaturation $perCellSaturation --majorityConsensusThreshold $majorityConsensusThreshold \
--chemistry $chemistry --callerDisagreementThreshold $callerDisagreementThreshold --outputdir bff_${sampleId} \
--assignmentOutBff $assignmentOutBff --preprocess $preprocess_bff --barcodeWhitelist $barcodeWhitelist
--cellbarcodeWhitelist $cellbarcodeWhitelist --metricsFile bff_${sampleId}_$metricsFile \
--doTSNE $doTSNE --doHeatmap $doHeatmap --perCellSaturation $perCellSaturation --majorityConsensusThreshold $majorityConsensusThreshold \
--chemistry $chemistry --callerDisagreementThreshold $callerDisagreementThreshold --outputdir bff_${sampleId} \
--assignmentOutBff $assignmentOutBff ${run_preprocess} --barcodeWhitelist $barcodeWhitelist
"""
}

Expand Down
4 changes: 2 additions & 2 deletions modules/hash_demulti/gmm_demux.nf
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ process gmm_demux{

workflow gmm_demux_hashing{
take:
hto_matrix
input_list
main:
summary = params.summary
report_gmm = params.report_gmm
Expand All @@ -70,7 +70,7 @@ take:
threshold_gmm = params.threshold_gmm
ambiguous = params.ambiguous

gmm_demux(hto_matrix,summary,report_gmm,mode,extract,threshold_gmm,ambiguous)
gmm_demux(input_list,summary,report_gmm,mode,extract,threshold_gmm,ambiguous)

emit:
gmm_demux.out.collect()
Expand Down
8 changes: 6 additions & 2 deletions modules/hash_demultiplexing.nf
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,12 @@ workflow hash_demultiplexing {
if (params.gmmDemux == 'True') {
Channel.fromPath(params.multi_input) \
| splitCsv(header:true) \
| map { row-> tuple(row.sampleId, params.hto_matrix_gmm_demux == 'raw' ? row.hto_matrix_raw : row.hto_matrix_filtered, row.hto_name_gmm) }
| gmm_demux_hashing

| map { row-> tuple(row.sampleId,
params.hto_matrix_gmm_demux == "raw" ? row.hto_matrix_raw : row.hto_matrix_filtered,
row.hto_name_gmm )}
| set {input_list_gmm_demux}
| gmm_demux_hashing(input_list_gmm_demux)
gmmDemux_out = gmm_demux_hashing.out
}
else {
Expand Down
3 changes: 2 additions & 1 deletion modules/single/hash_demulti/bff.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,15 @@ process bff {
path "bff_${task.index}"

script:
def run_preprocess = preprocess_bff != 'False' ? " --preprocess_bff" : ''

"""
mkdir bff_${task.index}
bff.R --fileHto hto_data --methods $methods --methodsForConsensus $methodsForConsensus \
--cellbarcodeWhitelist $cellbarcodeWhitelist --metricsFile bff_${task.index}_$metricsFile \
--doTSNE $doTSNE --doHeatmap $doHeatmap --perCellSaturation $perCellSaturation --majorityConsensusThreshold $majorityConsensusThreshold \
--chemistry $chemistry --callerDisagreementThreshold $callerDisagreementThreshold --outputdir bff_${task.index} --assignmentOutBff $assignmentOutBff \
--preprocess $preprocess_bff --barcodeWhitelist $barcodeWhitelist
${run_preprocess} --barcodeWhitelist $barcodeWhitelist
"""
}

Expand Down
42 changes: 24 additions & 18 deletions modules/single/hash_demulti/gmm_demux.nf
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2

process gmm_demux {
publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti/gmm_demux", mode:'copy'

process gmm_demux{
publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/gmm_demux", mode:'copy'

label 'small_mem'
conda "$projectDir/conda/gmm_demux.yml"

input:
tuple val(sampleId), path(filtered_hto_matrix_dir), val(hto_name_gmm)
path filtered_hto_matrix_dir
//HTO names as string separated by commas
//val hto_name_gmm
val hto_name_gmm
//mode 2
//need estimate number of cells in the single cell assay
//obligatory
Expand All @@ -26,29 +28,31 @@ process gmm_demux {
val ambiguous

output:
path "gmm_demux_${sampleId}"

path "gmm_demux_${task.index}"


script:
def extract_droplets = extract != 'None' ? " -x ${extract}" : ''
def ambiguous_droplets = extract != 'None' ? " --ambiguous ${ambiguous}" : ''

if (mode_GMM == 'csv') {
"""
mkdir gmm_demux_${sampleId}
touch gmm_demux_${sampleId}_$report_gmm
GMM-demux -c $filtered_hto_matrix_dir $hto_name_gmm -u $summary --report gmm_demux_${sampleId}_$report_gmm --full gmm_demux_${sampleId} $extract_droplets -t $threshold_gmm
gmm_demux_params.py --path_hto $filtered_hto_matrix_dir --hto_name_gmm $hto_name_gmm --summary $summary --report gmm_demux_${sampleId}_$report_gmm --mode $mode_GMM $extract_droplets --threshold_gmm $threshold_gmm $ambiguous_droplets --outputdir gmm_demux_${sampleId}
mkdir gmm_demux_${task.index}
touch gmm_demux_${task.index}_$report_gmm
GMM-demux -c $filtered_hto_matrix_dir $hto_name_gmm -u $summary --report gmm_demux_${task.index}_$report_gmm --full gmm_demux_${task.index} $extract_droplets -t $threshold_gmm
gmm_demux_params.py --path_hto $filtered_hto_matrix_dir --hto_name_gmm $hto_name_gmm --summary $summary --report gmm_demux_${task.index}_$report_gmm --mode $mode_GMM $extract_droplets --threshold_gmm $threshold_gmm $ambiguous_droplets --outputdir gmm_demux_${task.index}
"""
}else {
"""
mkdir gmm_demux_${sampleId}
touch gmm_demux_${sampleId}_$report_gmm
GMM-demux $filtered_hto_matrix_dir $hto_name_gmm -u $summary -r gmm_demux_${sampleId}_$report_gmm --full gmm_demux_${sampleId} -o gmm_demux_${sampleId} $extract_droplets -t $threshold_gmm
gmm_demux_params.py --path_hto $filtered_hto_matrix_dir --hto_name_gmm $hto_name_gmm --summary $summary --report gmm_demux_${sampleId}_$report_gmm --mode $mode_GMM $extract_droplets --threshold_gmm $threshold_gmm $ambiguous_droplets --outputdir gmm_demux_${sampleId}
mkdir gmm_demux_${task.index}
touch gmm_demux_${task.index}_$report_gmm
GMM-demux $filtered_hto_matrix_dir $hto_name_gmm -u $summary -r gmm_demux_${task.index}_$report_gmm --full gmm_demux_${task.index} -o gmm_demux_${task.index} $extract_droplets -t $threshold_gmm
gmm_demux_params.py --path_hto $filtered_hto_matrix_dir --hto_name_gmm $hto_name_gmm --summary $summary --report gmm_demux_${task.index}_$report_gmm --mode $mode_GMM $extract_droplets --threshold_gmm $threshold_gmm $ambiguous_droplets --outputdir gmm_demux_${task.index}
"""
}
}
Expand All @@ -57,15 +61,17 @@ workflow gmm_demux_hashing {
take:
hto_matrix
main:
hto_name_gmm = params.hto_name_gmm
summary = params.summary
report_gmm = params.report_gmm
mode = params.mode_GMM
extract = params.extract
threshold_gmm = params.threshold_gmm
ambiguous = params.ambiguous

gmm_demux(hto_matrix, summary, report_gmm, mode, extract, threshold_gmm, ambiguous)

gmm_demux(hto_matrix,hto_name_gmm,summary,report_gmm,mode,extract,threshold_gmm,ambiguous)

emit:
gmm_demux.out.collect()
}
Expand Down
7 changes: 5 additions & 2 deletions test.config
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
params {
outdir = "result_test"
// input for hashing-based deconvolution
hto_matrix_raw = "$projectDir/test_data/hto"
hto_matrix_filtered = "$projectDir/test_data/hto"
hto_matrix_raw = "$projectDir/test_data/hto_gz"
hto_matrix_filtered = "$projectDir/test_data/hto_gz"
rna_matrix_raw = "$projectDir/test_data/rna"
rna_matrix_filtered = "$projectDir/test_data/rna"

Expand All @@ -23,4 +23,7 @@ params {
// donor genotype file provided by popscle doesnt work on souporcell
use_known_genotype = "False"
ignore = "True"
//gmm_demux input
hto_name_gmm = "hto_1,hto_2"

}
Binary file added test_data/hto_gz/barcodes.tsv.gz
Binary file not shown.
Binary file added test_data/hto_gz/features.tsv.gz
Binary file not shown.
Binary file added test_data/hto_gz/matrix.mtx.gz
Binary file not shown.
Binary file added test_data/rna_gz/barcodes.tsv.gz
Binary file not shown.
Binary file added test_data/rna_gz/genes.tsv.gz
Binary file not shown.
Binary file added test_data/rna_gz/matrix.mtx.gz
Binary file not shown.

0 comments on commit a9a11a2

Please sign in to comment.