Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge from main #40

Merged
merged 15 commits into from
Feb 5, 2024
7 changes: 4 additions & 3 deletions bin/bff.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ parser$add_argument("--methodsForConsensus", help='By default, a consensus call
parser$add_argument("--cellbarcodeWhitelist", help='A vector of expected cell barcodes. This allows reporting on the total set of expected barcodes, not just those in the filtered count matrix',default=NULL)
parser$add_argument("--metricsFile", help='If provided, summary metrics will be written to this file.', default="metrics_cell_hash_r.csv")
parser$add_argument("--doTSNE", help='If true, tSNE will be run on the resulting hashing calls after each caller.', default=TRUE)
parser$add_argument("--preprocess", help='If true, the PreProcess function by CellHashR is executed', default=FALSE)
parser$add_argument("--preprocess_bff", help='If given, the PreProcess function by CellHashR is executed', action="store_true")
parser$add_argument("--barcodeWhitelist", help='A vector of barcode names to retain, used for preprocessing step', default=NULL)
parser$add_argument("--doHeatmap", help='f true, Seurat::HTOHeatmap will be run on the results of each caller.', default=TRUE)
parser$add_argument("--perCellSaturation", help='An optional dataframe with the columns cellbarcode and saturation',default=NULL)
Expand All @@ -31,6 +31,7 @@ parser$add_argument("--assignmentOutBff", help="Prefix name for the file contain
parser$add_argument("--outputdir", help='Output directory')
args <- parser$parse_args()

print(args$preprocess_bff)
#Parameters originally Null
methodsForConsensus <- args$methodsForConsensus
if(is.null(methodsForConsensus)){
Expand Down Expand Up @@ -62,7 +63,7 @@ Argument <- c("HTO-File", "methods", "methodsForConsensus", "cellbarcodeWhitelis
Value <- c(args$fileHto, args$methods, methodsForConsensus, cellbarcodeWhitelist, args$metricsFile, perCellSaturation, majorityConsensusThreshold, callerDisagreementThreshold, args$doTSNE, args$doHeatmap,args$chemistry)
params <- data.frame(Argument, Value)

if(as.logical(args$preprocess)){
if(args$preprocess_bff == TRUE){
print("Preprocessing activated")
print(args$preprocess)
#get barcodes
Expand Down Expand Up @@ -90,7 +91,7 @@ if (!is.null(args$methodsForConsensus)) {
perCell_args <- args$perCellSaturation
perCell <- ifelse(perCell_args == "null" || perCell_args == "Null", NULL, perCell_args)

if(args$methodsForConsensus=="bff_raw" || args$methodsForConsensus=="bff_cluster" || args$methodsForConsensus=="bff_raw,bff_cluster" || is.null(args$methodsForConsensus) )
if(args$methodsForConsensus=="bff_raw" || args$methodsForConsensus=="bff_cluster" || args$methodsForConsensus=="bff_raw,bff_cluster" || args$methodsForConsensus=="bff_cluster,bff_raw"|| is.null(args$methodsForConsensus) )
#Only Bff in its different variations is available
if (args$methods == "bff_raw") {
print("Executing BFF raw")
Expand Down
168 changes: 114 additions & 54 deletions bin/summary_hash.py

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions modules/hash_demulti/bff.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,14 @@ process bff {

script:

def run_preprocess = preprocess_bff != 'False' ? " --preprocess_bff" : ''
"""
mkdir bff_${sampleId}
bff.R --fileHto hto_data --methods $methods --methodsForConsensus $methodsForConsensus \
--cellbarcodeWhitelist $cellbarcodeWhitelist --metricsFile bff_${sampleId}_$metricsFile \
--doTSNE $doTSNE --doHeatmap $doHeatmap --perCellSaturation $perCellSaturation --majorityConsensusThreshold $majorityConsensusThreshold \
--chemistry $chemistry --callerDisagreementThreshold $callerDisagreementThreshold --outputdir bff_${sampleId} \
--assignmentOutBff $assignmentOutBff --preprocess $preprocess_bff --barcodeWhitelist $barcodeWhitelist
--cellbarcodeWhitelist $cellbarcodeWhitelist --metricsFile bff_${sampleId}_$metricsFile \
--doTSNE $doTSNE --doHeatmap $doHeatmap --perCellSaturation $perCellSaturation --majorityConsensusThreshold $majorityConsensusThreshold \
--chemistry $chemistry --callerDisagreementThreshold $callerDisagreementThreshold --outputdir bff_${sampleId} \
--assignmentOutBff $assignmentOutBff ${run_preprocess} --barcodeWhitelist $barcodeWhitelist
"""
}

Expand Down
4 changes: 2 additions & 2 deletions modules/hash_demulti/gmm_demux.nf
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ process gmm_demux{

workflow gmm_demux_hashing{
take:
hto_matrix
input_list
main:
summary = params.summary
report_gmm = params.report_gmm
Expand All @@ -70,7 +70,7 @@ take:
threshold_gmm = params.threshold_gmm
ambiguous = params.ambiguous

gmm_demux(hto_matrix,summary,report_gmm,mode,extract,threshold_gmm,ambiguous)
gmm_demux(input_list,summary,report_gmm,mode,extract,threshold_gmm,ambiguous)

emit:
gmm_demux.out.collect()
Expand Down
8 changes: 6 additions & 2 deletions modules/hash_demultiplexing.nf
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,12 @@ workflow hash_demultiplexing {
if (params.gmmDemux == 'True') {
Channel.fromPath(params.multi_input) \
| splitCsv(header:true) \
| map { row-> tuple(row.sampleId, params.hto_matrix_gmm_demux == 'raw' ? row.hto_matrix_raw : row.hto_matrix_filtered, row.hto_name_gmm) }
| gmm_demux_hashing

| map { row-> tuple(row.sampleId,
params.hto_matrix_gmm_demux == "raw" ? row.hto_matrix_raw : row.hto_matrix_filtered,
row.hto_name_gmm )}
| set {input_list_gmm_demux}
| gmm_demux_hashing(input_list_gmm_demux)
gmmDemux_out = gmm_demux_hashing.out
}
else {
Expand Down
3 changes: 2 additions & 1 deletion modules/single/hash_demulti/bff.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,15 @@ process bff {
path "bff_${task.index}"

script:
def run_preprocess = preprocess_bff != 'False' ? " --preprocess_bff" : ''

"""
mkdir bff_${task.index}
bff.R --fileHto hto_data --methods $methods --methodsForConsensus $methodsForConsensus \
--cellbarcodeWhitelist $cellbarcodeWhitelist --metricsFile bff_${task.index}_$metricsFile \
--doTSNE $doTSNE --doHeatmap $doHeatmap --perCellSaturation $perCellSaturation --majorityConsensusThreshold $majorityConsensusThreshold \
--chemistry $chemistry --callerDisagreementThreshold $callerDisagreementThreshold --outputdir bff_${task.index} --assignmentOutBff $assignmentOutBff \
--preprocess $preprocess_bff --barcodeWhitelist $barcodeWhitelist
${run_preprocess} --barcodeWhitelist $barcodeWhitelist
"""
}

Expand Down
42 changes: 24 additions & 18 deletions modules/single/hash_demulti/gmm_demux.nf
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2

process gmm_demux {
publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti/gmm_demux", mode:'copy'

process gmm_demux{
publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/gmm_demux", mode:'copy'

label 'small_mem'
conda "$projectDir/conda/gmm_demux.yml"

input:
tuple val(sampleId), path(filtered_hto_matrix_dir), val(hto_name_gmm)
path filtered_hto_matrix_dir
//HTO names as string separated by commas
//val hto_name_gmm
val hto_name_gmm
//mode 2
//need estimate number of cells in the single cell assay
//obligatory
Expand All @@ -26,29 +28,31 @@ process gmm_demux {
val ambiguous

output:
path "gmm_demux_${sampleId}"

path "gmm_demux_${task.index}"


script:
def extract_droplets = extract != 'None' ? " -x ${extract}" : ''
def ambiguous_droplets = extract != 'None' ? " --ambiguous ${ambiguous}" : ''

if (mode_GMM == 'csv') {
"""
mkdir gmm_demux_${sampleId}
touch gmm_demux_${sampleId}_$report_gmm

GMM-demux -c $filtered_hto_matrix_dir $hto_name_gmm -u $summary --report gmm_demux_${sampleId}_$report_gmm --full gmm_demux_${sampleId} $extract_droplets -t $threshold_gmm
gmm_demux_params.py --path_hto $filtered_hto_matrix_dir --hto_name_gmm $hto_name_gmm --summary $summary --report gmm_demux_${sampleId}_$report_gmm --mode $mode_GMM $extract_droplets --threshold_gmm $threshold_gmm $ambiguous_droplets --outputdir gmm_demux_${sampleId}

mkdir gmm_demux_${task.index}
touch gmm_demux_${task.index}_$report_gmm
GMM-demux -c $filtered_hto_matrix_dir $hto_name_gmm -u $summary --report gmm_demux_${task.index}_$report_gmm --full gmm_demux_${task.index} $extract_droplets -t $threshold_gmm
gmm_demux_params.py --path_hto $filtered_hto_matrix_dir --hto_name_gmm $hto_name_gmm --summary $summary --report gmm_demux_${task.index}_$report_gmm --mode $mode_GMM $extract_droplets --threshold_gmm $threshold_gmm $ambiguous_droplets --outputdir gmm_demux_${task.index}
"""
}else {
"""
mkdir gmm_demux_${sampleId}
touch gmm_demux_${sampleId}_$report_gmm

GMM-demux $filtered_hto_matrix_dir $hto_name_gmm -u $summary -r gmm_demux_${sampleId}_$report_gmm --full gmm_demux_${sampleId} -o gmm_demux_${sampleId} $extract_droplets -t $threshold_gmm
gmm_demux_params.py --path_hto $filtered_hto_matrix_dir --hto_name_gmm $hto_name_gmm --summary $summary --report gmm_demux_${sampleId}_$report_gmm --mode $mode_GMM $extract_droplets --threshold_gmm $threshold_gmm $ambiguous_droplets --outputdir gmm_demux_${sampleId}

mkdir gmm_demux_${task.index}
touch gmm_demux_${task.index}_$report_gmm
GMM-demux $filtered_hto_matrix_dir $hto_name_gmm -u $summary -r gmm_demux_${task.index}_$report_gmm --full gmm_demux_${task.index} -o gmm_demux_${task.index} $extract_droplets -t $threshold_gmm
gmm_demux_params.py --path_hto $filtered_hto_matrix_dir --hto_name_gmm $hto_name_gmm --summary $summary --report gmm_demux_${task.index}_$report_gmm --mode $mode_GMM $extract_droplets --threshold_gmm $threshold_gmm $ambiguous_droplets --outputdir gmm_demux_${task.index}
"""
}
}
Expand All @@ -57,15 +61,17 @@ workflow gmm_demux_hashing {
take:
hto_matrix
main:
hto_name_gmm = params.hto_name_gmm
summary = params.summary
report_gmm = params.report_gmm
mode = params.mode_GMM
extract = params.extract
threshold_gmm = params.threshold_gmm
ambiguous = params.ambiguous

gmm_demux(hto_matrix, summary, report_gmm, mode, extract, threshold_gmm, ambiguous)

gmm_demux(hto_matrix,hto_name_gmm,summary,report_gmm,mode,extract,threshold_gmm,ambiguous)

emit:
gmm_demux.out.collect()
}
Expand Down
7 changes: 5 additions & 2 deletions test.config
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
params {
outdir = "result_test"
// input for hashing-based deconvolution
hto_matrix_raw = "$projectDir/test_data/hto"
hto_matrix_filtered = "$projectDir/test_data/hto"
hto_matrix_raw = "$projectDir/test_data/hto_gz"
hto_matrix_filtered = "$projectDir/test_data/hto_gz"
rna_matrix_raw = "$projectDir/test_data/rna"
rna_matrix_filtered = "$projectDir/test_data/rna"

Expand All @@ -23,4 +23,7 @@ params {
// donor genotype file provided by popscle doesnt work on souporcell
use_known_genotype = "False"
ignore = "True"
//gmm_demux input
hto_name_gmm = "hto_1,hto_2"

}
Binary file added test_data/hto_gz/barcodes.tsv.gz
Binary file not shown.
Binary file added test_data/hto_gz/features.tsv.gz
Binary file not shown.
Binary file added test_data/hto_gz/matrix.mtx.gz
Binary file not shown.
Binary file added test_data/rna_gz/barcodes.tsv.gz
Binary file not shown.
Binary file added test_data/rna_gz/genes.tsv.gz
Binary file not shown.
Binary file added test_data/rna_gz/matrix.mtx.gz
Binary file not shown.
Loading