From 749f8be933d35c95b177af9d52760362dff8439e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Mon, 6 Nov 2023 19:24:41 +0100
Subject: [PATCH 01/32] correction to merged files

---
 bin/bff.R           | 13 -------------
 bin/demuxem.py      |  4 ++++
 bin/summary_hash.py |  5 ++---
 3 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/bin/bff.R b/bin/bff.R
index e4a4606..6154dcf 100755
--- a/bin/bff.R
+++ b/bin/bff.R
@@ -89,19 +89,6 @@ if (!is.null(args$methodsForConsensus)) {
 
 perCell_args <- args$perCellSaturation
 perCell <- ifelse(perCell_args == "null" || perCell_args == "Null", NULL, perCell_args)
-<<<<<<< HEAD
-<<<<<<< HEAD
-print("---------------------")
-print(perCell)
-print("---------------------")
-=======
-print(perCell)
-
->>>>>>> c781241 (bff re-added problematic parameter)
-=======
-print("---------------------")
-print(perCell)
-print("---------------------")
 
 if(args$methodsForConsensus=="bff_raw" || args$methodsForConsensus=="bff_cluster" || args$methodsForConsensus=="bff_raw,bff_cluster" || is.null(args$methodsForConsensus)  )
   #Only Bff in its different variations is available
diff --git a/bin/demuxem.py b/bin/demuxem.py
index 02b57a8..e6e8702 100755
--- a/bin/demuxem.py
+++ b/bin/demuxem.py
@@ -41,6 +41,10 @@
     # Extract rna and hashing data
     #rna_data = data.get_data(modality="rna")
     #hashing_data = data.get_data(modality="hashing")
+    rna = args.rna_matrix_dir
+    print("-------------------")
+    print(rna)
+    print("-------------------")
     filter = ""
     if args.filter_demuxem.lower() in ['true', 't', 'yes', 'y', '1']:
         filter = True
diff --git a/bin/summary_hash.py b/bin/summary_hash.py
index f43603a..bdf8582 100755
--- a/bin/summary_hash.py
+++ b/bin/summary_hash.py
@@ -450,13 +450,13 @@ def bff_summary(bff_res,raw_adata, raw_mudata):
                     dt_assign = dt_assign.drop([column], axis=1)
             dt_assign.loc[dt_assign["consensuscall"] == "Singlet", "consensuscall"] = "singlet"
             dt_assign.loc[dt_assign["consensuscall"] == "Doublet", "consensuscall"] = "doublet"
+            dt_assign.loc[dt_assign["consensuscall"] == "Negative", "consensuscall"] = "negative"
+            dt_assign['consensuscall'] = dt_assign['consensuscall'].astype('category')
             dt_assign = dt_assign.rename(columns={"cellbarcode": "Barcode", "consensuscall": os.path.basename(x)})
             assign.append(dt_assign)
             if raw_adata is not None:
                 adata = raw_adata.copy()
-
                 adata.obs = adata.obs.merge(dt_assign, left_index=True, right_index=True, how='left')
-
                 adata.obs.rename(columns={adata.obs.columns[0]: 'donor'}, inplace=True)
                 adata.obs.donor = adata.obs.donor.fillna("negative")
                 adata.obs.donor = adata.obs.donor.astype(str)
@@ -465,7 +465,6 @@ def bff_summary(bff_res,raw_adata, raw_mudata):
 
             if raw_mudata is not None:
                 mudata = raw_mudata.copy()
-
                 mudata['rna'].obs = mudata['rna'].obs.merge(dt_assign, left_index=True, right_index=True, how='left')
                 mudata['rna'].obs.rename(columns={mudata['rna'].obs.columns[0]: 'donor'}, inplace=True)
                 mudata['rna'].obs.donor = mudata['rna'].obs.donor.fillna("negative")

From e5810bc2f9537a48aa983873c8106542c171eb89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Mon, 6 Nov 2023 22:55:48 +0100
Subject: [PATCH 02/32] correction merged files

---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index afecaa3..bd056af 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -448,7 +448,7 @@ process {
         conda = "$projectDir/conda/demuxmix.yml"
     }
     withName:bff {
-        conda = './conda/bff.yml'
+        conda = "$projectDir/conda/dropletutils.yml"
     }
     withName:matchDonor {
         conda = "$projectDir/conda/donor_match.yml"

From 2393ad3cbc92a88156cca6abaf31f9fde807984d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Mon, 6 Nov 2023 23:00:24 +0100
Subject: [PATCH 03/32] correction merged filed

---
 modules/single/hash_demulti/gmm_demux.nf | 64 ++++++++++--------------
 1 file changed, 27 insertions(+), 37 deletions(-)

diff --git a/modules/single/hash_demulti/gmm_demux.nf b/modules/single/hash_demulti/gmm_demux.nf
index 0f0ec38..990b340 100644
--- a/modules/single/hash_demulti/gmm_demux.nf
+++ b/modules/single/hash_demulti/gmm_demux.nf
@@ -2,32 +2,31 @@
 nextflow.enable.dsl=2
 
 process gmm_demux{
-    publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/gmm_demux", mode:'copy'
+    publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti/gmm_demux", mode:'copy'
     label 'small_mem'
     input:
-        path path_hto
+        tuple val(sampleId), path(filtered_hto_matrix_dir), val(hto_name_gmm)
         //HTO names as string separated by commas
-        each hto_name_gmm
-        //5 cases are available for the tool it all depends on the vars given
+        //val hto_name_gmm
         //mode 2
         //need estimate number of cells in the single cell assay
         //obligatory
-        each summary
+        val summary
         //need to be combined with summary to get a report as file
-        each report_gmm 
+        val report_gmm 
         //mode 4
         // write csv or tsv - type of input
-        each mode_GMM
+        val mode_GMM
         //case 5
-        each extract 
+        val extract 
         //float between 0 and 1
-        each threshold_gmm
-        each ambiguous
+        val threshold_gmm
+        val ambiguous
         
         
     
     output:
-        path "gmm_demux_${task.index}"
+        path "gmm_demux_${sampleId}"
         
     script:
         def extract_droplets = extract != 'None' ? " -x ${extract}" : ''
@@ -35,20 +34,20 @@ process gmm_demux{
 
         if(mode_GMM=="csv"){
             """
-            mkdir gmm_demux_${task.index}
-            touch gmm_demux_${task.index}_$report_gmm
+            mkdir gmm_demux_${sampleId}
+            touch gmm_demux_${sampleId}_$report_gmm
             
-            GMM-demux -c $path_hto $hto_name_gmm -u $summary --report gmm_demux_${task.index}_$report_gmm --full gmm_demux_${task.index} $extract_droplets -t $threshold_gmm
-            gmm_demux_params.py --path_hto $path_hto --hto_name_gmm $hto_name_gmm --summary $summary --report gmm_demux_${task.index}_$report_gmm   --mode $mode_GMM  $extract_droplets --threshold_gmm $threshold_gmm $ambiguous_droplets  --outputdir gmm_demux_${task.index}
+            GMM-demux -c $filtered_hto_matrix_dir $hto_name_gmm -u $summary --report gmm_demux_${sampleId}_$report_gmm --full gmm_demux_${sampleId} $extract_droplets -t $threshold_gmm
+            gmm_demux_params.py --path_hto $filtered_hto_matrix_dir --hto_name_gmm $hto_name_gmm --summary $summary --report gmm_demux_${sampleId}_$report_gmm   --mode $mode_GMM  $extract_droplets --threshold_gmm $threshold_gmm $ambiguous_droplets  --outputdir gmm_demux_${sampleId}
             
             """
         }else {
             """
-            mkdir gmm_demux_${task.index}
-            touch gmm_demux_${task.index}_$report_gmm
+            mkdir gmm_demux_${sampleId}
+            touch gmm_demux_${sampleId}_$report_gmm
             
-            GMM-demux $path_hto $hto_name_gmm -u $summary -r gmm_demux_${task.index}_$report_gmm --full gmm_demux_${task.index} -o gmm_demux_${task.index} $extract_droplets -t $threshold_gmm
-            gmm_demux_params.py --path_hto $path_hto --hto_name_gmm $hto_name_gmm --summary $summary --report gmm_demux_${task.index}_$report_gmm --mode $mode_GMM $extract_droplets  --threshold_gmm $threshold_gmm $ambiguous_droplets --outputdir gmm_demux_${task.index}
+            GMM-demux $filtered_hto_matrix_dir $hto_name_gmm -u $summary -r gmm_demux_${sampleId}_$report_gmm --full gmm_demux_${sampleId} -o gmm_demux_${sampleId} $extract_droplets -t $threshold_gmm
+            gmm_demux_params.py --path_hto $filtered_hto_matrix_dir --hto_name_gmm $hto_name_gmm --summary $summary --report gmm_demux_${sampleId}_$report_gmm --mode $mode_GMM $extract_droplets  --threshold_gmm $threshold_gmm $ambiguous_droplets --outputdir gmm_demux_${sampleId}
             
             """
         }
@@ -56,28 +55,19 @@ process gmm_demux{
 
 }
 
-def split_input(input){
-    if (input =~ /;/ ){
-        Channel.from(input).map{ return it.tokenize(';')}.flatten()
-    }
-    else{
-        Channel.from(input)
-    }
-}
 
 workflow gmm_demux_hashing{
-    take:   
+take: 
         hto_matrix
-    main:
-            hto_name_gmm = split_input(params.hto_name_gmm)
-            summary = split_input(params.summary)
-            report_gmm = split_input(params.report_gmm)
-            mode = split_input(params.mode_GMM)
-            extract = split_input(params.extract)
-            threshold_gmm = split_input(params.threshold_gmm)
-            ambiguous = split_input(params.ambiguous)
+  main:
+        summary = params.summary
+        report_gmm = params.report_gmm
+        mode = params.mode_GMM
+        extract = params.extract
+        threshold_gmm = params.threshold_gmm
+        ambiguous = params.ambiguous
 
-            gmm_demux(hto_matrix,hto_name_gmm,summary,report_gmm,mode,extract,threshold_gmm,ambiguous)
+        gmm_demux(hto_matrix,summary,report_gmm,mode,extract,threshold_gmm,ambiguous)
   
   emit:
         gmm_demux.out.collect()

From c93582eb9a6f66f489ac2642d0ee12de28437f79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Mon, 6 Nov 2023 23:07:55 +0100
Subject: [PATCH 04/32] correction merged files

---
 modules/hash_demultiplexing.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/hash_demultiplexing.nf b/modules/hash_demultiplexing.nf
index 4fb87a6..c2a61ae 100644
--- a/modules/hash_demultiplexing.nf
+++ b/modules/hash_demultiplexing.nf
@@ -91,7 +91,7 @@ process summary{
             generate_mdata = "--generate_mudata --read_rna_mtx rna_data --read_hto_mtx hto_data"
         }
         
-        """
+          """
         summary_hash.py $demuxem_files $htodemux_files $multiseq_files $hashedDrops_files $hashsolo_files $demuxmix_files $gmmDemux_files $bff_files $generate_adata $generate_mdata --sampleId $sampleId
         """
 }

From 2dfd17f57fcfb7b5862d93820d29e3f7aa8bc7d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Mon, 6 Nov 2023 23:12:00 +0100
Subject: [PATCH 05/32] correction merged

---
 modules/hash_demultiplexing.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/hash_demultiplexing.nf b/modules/hash_demultiplexing.nf
index c2a61ae..4fb87a6 100644
--- a/modules/hash_demultiplexing.nf
+++ b/modules/hash_demultiplexing.nf
@@ -91,7 +91,7 @@ process summary{
             generate_mdata = "--generate_mudata --read_rna_mtx rna_data --read_hto_mtx hto_data"
         }
         
-          """
+        """
         summary_hash.py $demuxem_files $htodemux_files $multiseq_files $hashedDrops_files $hashsolo_files $demuxmix_files $gmmDemux_files $bff_files $generate_adata $generate_mdata --sampleId $sampleId
         """
 }

From 142eca88029eda5828899924e8d9631dcb086c66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Mon, 6 Nov 2023 23:31:49 +0100
Subject: [PATCH 06/32] correction merge

---
 modules/hash_demulti/gmm_demux.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/hash_demulti/gmm_demux.nf b/modules/hash_demulti/gmm_demux.nf
index 337ae31..990b340 100644
--- a/modules/hash_demulti/gmm_demux.nf
+++ b/modules/hash_demulti/gmm_demux.nf
@@ -59,7 +59,6 @@ process gmm_demux{
 workflow gmm_demux_hashing{
 take: 
         hto_matrix
-        hto_name_gmm
   main:
         summary = params.summary
         report_gmm = params.report_gmm

From a1e6078c3d6fc1e3095856f0ced10d13781d472b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Tue, 7 Nov 2023 21:34:07 +0100
Subject: [PATCH 07/32] general code for mudata added

---
 bin/generate_data.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/bin/generate_data.py b/bin/generate_data.py
index 96fc494..b1d1043 100755
--- a/bin/generate_data.py
+++ b/bin/generate_data.py
@@ -27,5 +27,17 @@
         adata.write("adata_with_donor_matching.h5ad")
     
     if args.generate_mudata:
-        # write mudata_with_donor_matching.h5mu data
-        pass
+        rna_data = sc.read_10x_mtx(args.read_rna_mtx)
+        hto_data = sc.read_10x_mtx(args.read_hto_mtx, gex_only=False)
+        assignment_dir = os.path.join(args.assignment, 
+                                    [filename for filename in os.listdir(args.assignment) if filename == "all_assignment_after_match.csv"][0])
+
+        assignment = pd.read_csv(assignment_dir, index_col = 0)
+        mudata = MuData({"rna": rna_data, "hto": hto_data })
+
+        mudata['rna'].obs = mudata['rna'].obs.merge(args.assignment, left_index=True, right_index=True, how='left')
+        mudata['rna'].obs.rename(columns={mudata['rna'].obs.columns[0]: 'donor'}, inplace=True)
+        mudata['rna'].obs.donor = mudata['rna'].obs.donor.fillna("negative")
+        mudata['rna'].obs.donor = mudata['rna'].obs.donor.astype(str)
+        mudata.update()
+        mudata.write("mudata_with_donor_matching.h5mu") 

From 1588feb36af4ee44d25ff7781dce590337a53637 Mon Sep 17 00:00:00 2001
From: zethson <lukas.heumos@posteo.net>
Date: Fri, 17 Nov 2023 17:10:54 +0100
Subject: [PATCH 08/32] Add notebook to sphinx

Signed-off-by: zethson <lukas.heumos@posteo.net>
---
 docs/requirements.txt                            | 1 +
 docs/source/conf.py                              | 6 ++++--
 notebook.ipynb => docs/source/hadge_output.ipynb | 9 ++++++++-
 3 files changed, 13 insertions(+), 3 deletions(-)
 rename notebook.ipynb => docs/source/hadge_output.ipynb (99%)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index db72b7b..92e893c 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,3 +1,4 @@
 sphinx
+nb-sphinx
 myst-parser
 furo
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 779af44..e314080 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -3,7 +3,8 @@
 author = 'Fabiola Curion, Xichen Wu, Lukas Heumos'
 release = '1.0.0'
 
-extensions = ['myst_parser']
+extensions = ['myst_parser',
+              'nbsphinx']
 
 source_suffix = {
     '.rst': 'restructuredtext',
@@ -16,5 +17,6 @@
 exclude_patterns = []
 
 html_theme = "furo"
+html_static_path = ['_static']
 
-html_static_path = ['_static']
\ No newline at end of file
+nbsphinx_execute = 'never'
diff --git a/notebook.ipynb b/docs/source/hadge_output.ipynb
similarity index 99%
rename from notebook.ipynb
rename to docs/source/hadge_output.ipynb
index ecd6e92..ac881e7 100644
--- a/notebook.ipynb
+++ b/docs/source/hadge_output.ipynb
@@ -1,5 +1,12 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Working with hadge output"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -16,7 +23,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This notebook serves to get familiar with the ouput of hadge and reproduce some figures from the manuscript."
+    "This notebook serves to get familiar with the output of hadge and reproduce some figures from the manuscript."
    ]
   },
   {

From 0f99fd89d789eafdcc334f856f2e3e6ee7dcbc34 Mon Sep 17 00:00:00 2001
From: zethson <lukas.heumos@posteo.net>
Date: Fri, 17 Nov 2023 17:11:52 +0100
Subject: [PATCH 09/32] Fix nbsphinx requirements

Signed-off-by: zethson <lukas.heumos@posteo.net>
---
 docs/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 92e893c..2b919fd 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,4 +1,4 @@
 sphinx
-nb-sphinx
+nbsphinx
 myst-parser
 furo

From 45c5db7a93148cf4e433657d96a58e1f509b7377 Mon Sep 17 00:00:00 2001
From: zethson <lukas.heumos@posteo.net>
Date: Fri, 17 Nov 2023 17:19:41 +0100
Subject: [PATCH 10/32] Pandoc pls

Signed-off-by: zethson <lukas.heumos@posteo.net>
---
 docs/source/conf.py  | 2 +-
 docs/source/index.md | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index e314080..5678883 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -14,7 +14,7 @@
 
 templates_path = ['_templates']
 
-exclude_patterns = []
+exclude_patterns = ['_build', '**.ipynb_checkpoints']
 
 html_theme = "furo"
 html_static_path = ['_static']
diff --git a/docs/source/index.md b/docs/source/index.md
index fb0656e..6d74528 100644
--- a/docs/source/index.md
+++ b/docs/source/index.md
@@ -56,7 +56,7 @@ nextflow run main.nf -profile test
 
 ## Notebook
 
-Check[](../../notebook.ipynb) to get familiar with the output of hadge.
+Check[](hadge_output.ipynb) to get familiar with the output of hadge.
 
 ## **Pipeline output**
 
@@ -98,6 +98,7 @@ general
 genetic
 hashing
 rescue
+hadge_output
 ```
 
 # Indices and tables

From 9bcebac0931c1a76e529e23a56a19f3e0b49e1c9 Mon Sep 17 00:00:00 2001
From: zethson <lukas.heumos@posteo.net>
Date: Fri, 17 Nov 2023 17:21:31 +0100
Subject: [PATCH 11/32] Remove docs wf

Signed-off-by: zethson <lukas.heumos@posteo.net>
---
 .github/workflows/build_docs.yml | 22 ----------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 .github/workflows/build_docs.yml

diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml
deleted file mode 100644
index e0c9ef6..0000000
--- a/.github/workflows/build_docs.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-name: sphinx-docs
-
-on:
-  push:
-  pull_request:
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v2
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: "3.11"
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -r docs/requirements.txt
-
-      - name: Build documentation
-        run: cd docs && make html

From ce91ab36582a5216006c8745ead4b91f41e21fb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Mon, 20 Nov 2023 20:02:34 +0100
Subject: [PATCH 12/32] Docs for hashing

---
 docs/source/hashing.md | 74 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 73 insertions(+), 1 deletion(-)

diff --git a/docs/source/hashing.md b/docs/source/hashing.md
index af91a13..d77253f 100644
--- a/docs/source/hashing.md
+++ b/docs/source/hashing.md
@@ -16,7 +16,11 @@ The input data depends heavily on the deconvolution tools. In the following tabl
 | Multiseq             | - Seurat object with both UMI and hashing count matrix (RDS)                                                       | `params.rna_matrix_multiseq` <br> `params.hto_matrix_multiseq` |
 | HashSolo             | - 10x mtx directory with hashing count matrix (H5)                                                                 | `params.hto_matrix_hashsolo` <br> `params.rna_matrix_hashsolo` |
 | HashedDrops          | - 10x mtx directory with hashing count matrix (Directory)                                                          | `params.hto_matrix_hashedDrops`                                |
-| Demuxem              | - 10x mtx directory with UMI count matrix (Directory)<br>- 10x mtx directory with hashing count matrix (Directory) | `params.hto_matrix_demuxem`<br>`params.rna_matrix_demuxem`     |
+| Demuxem              | - 10x mtx directory with UMI count matrix (Directory) <br>- 10x mtx directory with hashing count matrix (Directory) | `params.hto_matrix_demuxem`<br>`params.rna_matrix_demuxem`     |
+| GMM - Demux              | - 10x mtx directory with UMI count matrix (Directory)<br> - 10x mtx directory with hashing count matrix (Directory) | `params.hto_matrix_gmm_demux`    |
+| Demuxmix            | - 10x mtx directory with UMI count matrix (Directory)<br>- 10x mtx directory with hashing count matrix (Directory) | `params.hto_matrix_demuxmix`<br>`params.rna_matrix_demuxmix`     |
+| BFF             | - 10x mtx directory with UMI count matrix (Directory)<br>- 10x mtx directory with hashing count matrix (Directory) | `params.hto_matrix_bff`     |
+
 
 Similary as genotype-based deconvlution methods, hashing methods also have some input in common. So we also try to utilize common input parameters `params.[rna/hto]_matrix_[raw/filtered]` to store count matrices for better control and `params.[rna/hto]_matrix_[method]` is used to specify whether to use raw or filtered counts for each method.
 
@@ -279,3 +283,71 @@ output directory: `$pipeline_output_folder/bff/bff_[task_ID/sampleId]`
 | combinations             | An integer matrix specifying valid combinations of HTOs. Each row corresponds to a single sample and specifies the indices of rows in x corresponding to the HTOs used to label that sample. Default: NULL |
 | objectOutHashedDrops     | Prefix of the hashedDrops output RDS object. Default: hashedDrops                                                                                                                                          |
 | assignmentOutHashedDrops | Prefix of the hashedDrops output CSV file. Default: hashedDrops                                                                                                                                            |
+### GMM-Demux
+|                          |                                                                                              |
+| ------------------------ | -------------------------------------------------------------------------------------------- |
+| gmmDemux                 | Whether to perform GMMDemux. Default: True                                                   |
+| hto_matrix_gmm_demux      | Whether to use raw or filtered HTO count matrix. Default: filtered                                                            |
+| assignmentOutGmmDemux      | Name for the folder output. Default: gmm_demux|
+| hto_name_gmm          |   list of sample tags (HTOs) separated by ',' without whitespace. Default: None   |
+| summary           |  the estimated total count of cells in the single cell assay. Default: 2000     |
+| report_gmm           | Name for the file generated by the summary. Default:report.txt            |
+| mode_GMM    | Format of the input, either tsv or csv. Default: tsv                 |
+| extract             | extract names of the sample barcoding tag(s) to extract, separated by ','. Joint tags are linked with '+'.  Default: None       |
+| threshold_gmm | Provide the confidence threshold value. Requires a float in (0,1). Default: 0.8    |
+| ambiguous    | The estimated chance of having a phony GEM getting included in a pure type GEM cluster by the clustering algorithm. Default: 0.5.            |
+| plotOutHashSolo          | Prefix of the output figures. Default: hashsolo                                              |
+
+### BFF
+|                          |                                                                                              |
+| ------------------------ | -------------------------------------------------------------------------------------------- |
+| BFF                 | Whether to perform BFF. Default: False                                                   |
+| hto_matrix_bff      | Whether to use raw or filtered HTO count matrix. Default: raw                                                            |
+| rna_matrix_bff   | Whether to use raw or filtered scRNA-seq count matrix. Default: raw                                                           |
+| assignmentOutBff      | Name for the folder output. Default: bff|
+| methods          |   method or list of methods to be used. Default: combined_bff   |
+| methodsForConsensus           | a consensus call will be generated using all methods especified. Default: NULL   |
+| cellbarcodeWhitelist           | 	A vector of expected cell barcodes. Default:NULL            |
+| metricsFile    | summary metrics will be written to this file. Default: metrics_bff.cvs                 |
+| doTSNE             |  tSNE will be run on the resulting hashing calls after each caller.  Default: True       |
+| doHeatmap | if true, Seurat::HTOHeatmap will be run on the results of each calle Default: True   |
+| perCellSaturation    | An optional dataframe with the columns cellbarcode and saturation. Default: NULL            |
+| majorityConsensusThreshold | This applies to calculating a consensus call when multiple algorithms are used. Default: NULL    |
+| chemistry     | This string is passed to EstimateMultipletRate. Should be either 10xV2 or 10xV3. Default: 10xV3      |
+| callerDisagreementThreshold     | If provided, the agreement rate will be calculated between each caller and the simple majority call, ignoring discordant and no-call cells. Default: NULL      |
+| preprocess_bff     | When True, the data is preprocess using the method ProcessCountMatrix from CellHashR. Default: False    |
+| barcodeWhitelist     | A vector of barcode names to retain. This parameter is used only when the pre-processing step is executed. Default: NULL    |
+
+### Demuxmix
+|                          |                                                                                              |
+| ------------------------ | -------------------------------------------------------------------------------------------- |
+| demuxmix                 | Whether to perform Demuxmix. Default: False                                                   |
+| hto_matrix_demuxmix      | Whether to use raw or filtered HTO count matrix. Default: raw                                                            |
+| rna_matrix_demuxmix   | Whether to use raw or filtered scRNA-seq count matrix. Default: raw                                                           |
+| assignmentOutDemuxmix      | Name for the folder output. Default: demuxmix|
+| correctTails | If TRUE, droplets meeting the threshold defined by alpha (beta) are classified as "negative" ("positive") even if the mixture model suggests a different classification.  Default: True   |
+| rna_available           | If TRUE, it indicates that RNA data is available and some modes are execuatble with Demuxmix. Default: TRUE     |
+| model   | A character specifying the type of mixture model to be used. Either "naive", "regpos", "reg" or "auto". The last three options require parameter rna to be specified. Default:naive  |
+| alpha_demuxmix    | Threshold defining the left tail of the mixture distribution where droplets should not be classified as "positive". Threshold must be between 0 and 1 Default: 0.9  |
+| beta_demuxmix  | Threshold for defining the right tail of the mixture distribution where droplets should not be classified as "negative"  Default: 0.9       |
+| tol_demuxmix | Convergence criterion for the EM algorithm used to fit the mixture models.  Default: 0.00001   |
+| maxIter_demuxmix    | Maximum number of iterations for the EM algorithm and for the alternating iteration process fitting the NB regression models Default:1000           |
+| k_hto   |Factor to define outliers in the HTO count Default: 1.5       |
+| k_rna   |Factor to define outliers in the numbers of detected genes. Default: 1.5       |
+
+### General Use
+
+#### Single sample use
+The use of the pipeline for a single samples require the definition of certain parameters in order to run the tools under default configuration.
+The parameter `--mode hashing` must be included with the purpose of running the hashing tools only.
+##### GMM-Demux
+The names of the hashtags must be given as a list of string, separated by ','. This list is given under the parameter `--hto_name_gmm`
+##### BFF
+The demultiplexing method for the experiment must be given under the parameter `--methods`. Multiple methods can be given as a list, separated by ','. 
+Besides, the method or methods for consensus must be given under the parameter `--methodsForConsensus`. 
+
+
+```bash
+nextflow run main.nf --mode hashing --match_donor False  --hto_matrix_raw /data_folder/raw_hto_data 
+--hto_matrix_filtered /data_folder/filtered_hto_data --barcodes /data_folder/filtered_hto_data/barcodes.tsv.gz --rna_matrix_raw /data_folder/raw_rna_data --rna_matrix_filtered /data_folder/filtered_rna_data --hto_name_gmm "hto_name_1,hto_name_2,hto_name_3" --methods bff_cluster  --methodsForConsensus bff_cluster  --demuxmix False
+```
\ No newline at end of file

From 1d4f0bcdf041268fa1d6f9a6c55006c6b100b501 Mon Sep 17 00:00:00 2001
From: Lukas Heumos <lukas.heumos@posteo.net>
Date: Thu, 23 Nov 2023 18:59:48 +0100
Subject: [PATCH 13/32] Add quickstart to README

---
 README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.md b/README.md
index dbb0c56..6b3c23d 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,14 @@
 
 Read the [documentation](https://hadge.readthedocs.io/en/latest/) and the associated [manuscript](https://www.biorxiv.org/content/10.1101/2023.07.23.550061v1)
 
+## Quickstart
+
+```bash
+git clone https://github.com/theislab/hadge.git
+sh hadge/test_data/download_data.sh
+nextflow run main.nf -profile test
+```
+
 ## Citation
 
 hadge: a comprehensive pipeline for donor deconvolution in single cell

From f4e5b27367c291f249b502e93afd37b358141f95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Mon, 27 Nov 2023 15:02:52 +0100
Subject: [PATCH 14/32] demuxmix out

---
 bin/demuxmix.R                          | 104 ------------------------
 bin/summary_hash.py                     |  65 ---------------
 docs/source/hashing.md                  |  25 +-----
 docs/source/index.md                    |   3 +-
 modules/hash_demulti/demuxmix.nf        |  68 ----------------
 modules/hash_demultiplexing.nf          |  25 +-----
 modules/single/hash_demulti/demuxmix.nf |  79 ------------------
 modules/single/hash_demultiplexing.nf   |  19 +----
 nextflow.config                         |  18 ----
 9 files changed, 6 insertions(+), 400 deletions(-)
 delete mode 100755 bin/demuxmix.R
 delete mode 100644 modules/hash_demulti/demuxmix.nf
 delete mode 100644 modules/single/hash_demulti/demuxmix.nf

diff --git a/bin/demuxmix.R b/bin/demuxmix.R
deleted file mode 100755
index 6552507..0000000
--- a/bin/demuxmix.R
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env Rscript
-library(Seurat)
-library(demuxmix)
-library(argparse)
-library(data.table)
-
-
-# Create a parser
-parser <- ArgumentParser("Parameters for Demuxmix")
-parser$add_argument("--fileUmi", help = "Path to file UMI count matrix.")
-parser$add_argument("--fileHto", help = "Path to file HTO count matrix.")
-parser$add_argument("--ndelim", help = "For the initial identity calss for each cell, delimiter for the cell's column name", default = "_")
-parser$add_argument("--pAcpt", help='Acceptance probability that must be reached in order to assign a droplet to a hashtag. ')
-parser$add_argument("--assay", help='Assay name')
-parser$add_argument("--gene_col", help = "Specify which column of genes.tsv or features.tsv to use for gene names; default is 2", type="integer", default = 2)
-parser$add_argument("--rna_available", help='TRUE if RNA assay is available',default = FALSE)
-parser$add_argument("--correctTails", help='If TRUE, droplets meeting the threshold defined by alpha (beta) are classified as "negative" ("positive") even if the mixture model suggests a different classification',default = TRUE)
-parser$add_argument("--model", help='A character specifying the type of mixture model to be used. Either "naive", "regpos", "reg" or "auto".', default = 'naive')
-parser$add_argument("--alpha_demuxmix", help='Threshold defining the left tail of the mixture distribution where droplets should not be classified as "positive".',type = "double",default=0.9)
-parser$add_argument("--beta_demuxmix", help='Threshold for defining the right tail of the mixture distribution where droplets should not be classified as "negative".',type = "double",default=0.9)
-parser$add_argument("--tol_demuxmix", help='Convergence criterion for the EM algorithm used to fit the mixture models.', type="double")
-parser$add_argument("--maxIter_demuxmix", help='Maximum number of iterations for the EM algorithm',type = "integer",default=100)
-parser$add_argument("--k_hto", help='Factor to define outliers in the HTO counts.',type = "double",default=1.5)
-parser$add_argument("--k_rna", help='Factor to define outliers in the numbers of detected genes.',type = "double",default=1.5)
-parser$add_argument("--assignmentOutDemuxmix", help="Prefix name for the file containing the output of Demuxmix assignment", type = "character", default = "demuxmix")
-parser$add_argument("--outputdir", help='Output directory')
-args <- parser$parse_args()
-
-
-if(as.logical(args$rna_available)){ 
-    
-    umi <- Read10X(data.dir = args$fileUmi, gene.column=args$gene_col, strip.suffix = TRUE)
-    
-}
-counts <- Read10X(data.dir = args$fileHto, gene.column=args$gene_col, strip.suffix = TRUE)
-
-
-if(as.logical(args$rna_available)){
-    Argument <- c("fileUmi", "fileHto")
-    Value <- c(args$fileUmi, args$fileHto)
-    params <- data.frame(Argument, Value)
-    #demuxmix requires a Seurat object created from raw data, 
-    #without any type of pre-processing (Normalisation, ScaleData, etc)
-    # Setup Seurat object
-    hashtag <- CreateSeuratObject(counts = umi, names.delim = args$ndelim)
-    hashtag[[args$assay]] <- CreateAssayObject(counts = counts)
-  }else{
-    #If RNA data is not available, a Seurat object containing the HTO data is created
-    hashtag <- CreateSeuratObject(counts = counts, names.delim = args$ndelim, assay = args$assay)
-    Argument <- c( "fileHto")
-    Value <- c(args$fileHto)
-    params <- data.frame(Argument, Value)
-  }
-
-
-Argument <- c("fileUmi", "fileHto","assay", "model", "alpha_demuxmix", "beta_demuxmix", "tol_demuxmix", "maxIter_demuxmix", "k_hto","k_rna","correct_tails")
-Value <- c(args$fileUmi, args$fileHto, args$assay, args$model, args$alpha_demuxmix, args$beta_demuxmix, args$tol_demuxmix, args$maxIter_demuxmix, args$k_hto, args$k_rna,args$correctTails)
-
-params <- data.frame(Argument, Value)
-
-
-### Demuxmix
-hto_counts <- as.matrix(GetAssayData(hashtag[[args$assay]], slot = "counts"))
-hashtags_object <- GetAssayData(hashtag[[args$assay]], slot = "counts")
-hashtag_list<-hashtags_object@Dimnames
-
-tryCatch({
-#Demultiplexing process
-    if(args$model != 'naive' && as.logical(args$rna_available)){
-        rna_counts <- hashtag$nCount_RNA
-        demuxmix_demul <- demuxmix(hto_counts,rna= rna_counts, model = args$model, alpha= args$alpha_demuxmix,beta= args$beta_demuxmix,maxIter=args$maxIter_demuxmix,k.hto=args$k_hto, correctTails = as.logical(args$correctTails))
-
-    }else{
-        print("Executing naive mode for Demuxmix")
-        demuxmix_demul <- demuxmix(hto_counts, model = args$model, alpha= args$alpha_demuxmix,beta= args$beta_demuxmix,maxIter=args$maxIter_demuxmix,k.hto=args$k_hto)
-    }
-    demuxmix_classify <- dmmClassify(demuxmix_demul)
-    sumary_demuxmix <- summary(demuxmix_demul)
-    demuxmix_classify$Barcode <- hashtag_list[[2]]
-
-    res_dt <- as.data.table(demuxmix_classify)
-    res_dt[, Classification := Type]
-    res_dt[Classification == "multiplet", Classification := "doublet"]
-    res_dt[Classification == "uncertain", Classification := "negative"]
-    
-    res_dt$HTO <- gsub(',', '_', res_dt$HTO)
-    write.csv(res_dt, paste0(args$outputdir, "/", args$assignmentOutDemuxmix, "_assignment_demuxmix.csv"), row.names=FALSE)
-    write.csv(sumary_demuxmix, paste0(args$outputdir, "/", args$assignmentOutDemuxmix, "_summary_results.csv"), row.names=FALSE)
-
-}, error = function(e) {
-    print("Error for Demuxmix")
-    error_message <- conditionMessage(e)
-    writeLines(e, "error_log.txt")
-    df <- data.frame()
-    write.csv(df, paste0(args$outputdir, "/", args$assignmentOutDemuxmix, "_assignment_demuxmix.csv"), row.names=FALSE)
-},finally = {
-    write.csv(params, paste0(args$outputdir, "/params.csv"))
-})    
-
-
-
-
-
-
diff --git a/bin/summary_hash.py b/bin/summary_hash.py
index bdf8582..f68f05e 100755
--- a/bin/summary_hash.py
+++ b/bin/summary_hash.py
@@ -12,7 +12,6 @@
 parser.add_argument("--multiseq", help="Folder containing output files of multiseq", default=None)
 parser.add_argument("--hashsolo", help="Folder containing output files of hashsolo", default=None)
 parser.add_argument("--hashedDrops", help="Folder containing output files of hashedDrops", default=None)
-parser.add_argument("--demuxmix", help="Folder containing output files of Demuxmix", default=None)
 parser.add_argument("--bff", help="Folder containing output files of BFF", default=None)
 parser.add_argument("--gmm_demux", help="Folder containing output files of GMM-Demux", default=None)
 parser.add_argument("--generate_anndata", help="Generate anndata", action='store_true')
@@ -291,66 +290,6 @@ def htodemux_summary(htodemux_res, raw_adata, raw_mudata):
     
     params = pd.concat(params, axis=1)
     params.to_csv("hash_summary/htodemux_params.csv")
-        
-def demuxmix_summary(demuxmix_res,raw_adata, raw_mudata):
-    classi = []
-    assign = []
-    params = []
-    files_in_folder = [item for item in demuxmix_res if os.path.isfile(os.path.join(demuxmix_res, item))]
-
-    if len(files_in_folder) > 0:
-        for x in demuxmix_res:
-            obs_res_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename.endswith("_assignment_demuxmix.csv")][0])
-            demuxmix_asign = pd.read_csv(obs_res_dir)
-            if demuxmix_asign.empty:
-                #no results create empty dataframe for empty col
-                column_names = ['Barcode', os.path.basename(x)]
-                # Create an empty dataframe with only column names
-                df = pd.DataFrame(columns=column_names)
-                classi.append(df)
-                assign.append(df)
-            else:
-                dt_assign = demuxmix_asign[["Barcode", "HTO"]]
-                dt_assign.columns = ["Barcode", os.path.basename(x)]
-                assign.append(dt_assign)
-
-                if raw_adata is not None:
-                    adata = raw_adata.copy()
-                    adata.obs = adata.obs.merge(demuxmix_asign, left_index=True, right_on='Barcode', how='left').set_index('Barcode')
-                    adata.obs.rename(columns={adata.obs.columns[0]: 'donor'}, inplace=True)
-                    adata.obs.donor = adata.obs.donor.fillna("negative")
-                    adata.obs.donor = adata.obs.donor.astype(str)
-                    adata.write("hash_summary/adata/adata_with_"+os.path.basename(x)+".h5ad")
-
-                if raw_mudata is not None:
-                    mudata = raw_mudata.copy()
-                    mudata['rna'].obs = mudata['rna'].obs.merge(demuxmix_asign, left_index=True, right_on='Barcode', how='left').set_index('Barcode')
-                    mudata['rna'].obs.rename(columns={mudata['rna'].obs.columns[0]: 'donor'}, inplace=True)
-                    mudata['rna'].obs.donor = mudata['rna'].obs.donor.fillna("negative")
-                    mudata['rna'].obs.donor = mudata['rna'].obs.donor.astype(str)
-                    mudata.update()
-                    mudata.write("hash_summary/mudata/mudata_with_mudata_"+ os.path.basename(x)+".h5mu") 
-
-                demuxmix_classi = pd.read_csv(obs_res_dir)
-                dt_classi = demuxmix_classi[["Barcode", "Classification"]]
-                dt_classi.columns = ["Barcode", os.path.basename(x)]
-                classi.append(dt_classi) 
-
-                params_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename == "params.csv"][0])
-                params_res = pd.read_csv(params_dir, usecols=[1, 2], keep_default_na=False, index_col=0)     
-                params_res.columns = [os.path.basename(x)]
-                params.append(params_res)
-
-        classi_df = pd.concat(classi, axis=1, join="outer")
-        classi_df.to_csv("hash_summary" + "/demuxmix_classification.csv",index=False)
-        
-        assign_df = pd.concat(assign, axis=1, join="outer")
-        assign_df.to_csv("hash_summary"  +"/demuxmix_assignment.csv",index=False)
-
-        params = pd.concat(params, axis=1)
-        params.to_csv("hash_summary"  +"/demuxmix_params.csv",index=False)
-    else:
-        print("No results found for Demuxmix")
 
 def gmm_summary(gmmDemux_res,raw_adata, raw_mudata):
     classi = []
@@ -535,10 +474,6 @@ def bff_summary(bff_res,raw_adata, raw_mudata):
     if args.htodemux is not None:
         htodemux_res = args.htodemux.split(':')
         htodemux_summary(htodemux_res, adata, mudata)
-    
-    if args.demuxmix is not None:
-        demuxmix_res = args.demuxmix.split(':')
-        demuxmix_summary(demuxmix_res, adata, mudata)
 
     if args.gmm_demux is not None:
         gmmDemux_res = args.gmm_demux.split(':')
diff --git a/docs/source/hashing.md b/docs/source/hashing.md
index d77253f..59c1dd5 100644
--- a/docs/source/hashing.md
+++ b/docs/source/hashing.md
@@ -18,7 +18,6 @@ The input data depends heavily on the deconvolution tools. In the following tabl
 | HashedDrops          | - 10x mtx directory with hashing count matrix (Directory)                                                          | `params.hto_matrix_hashedDrops`                                |
 | Demuxem              | - 10x mtx directory with UMI count matrix (Directory) <br>- 10x mtx directory with hashing count matrix (Directory) | `params.hto_matrix_demuxem`<br>`params.rna_matrix_demuxem`     |
 | GMM - Demux              | - 10x mtx directory with UMI count matrix (Directory)<br> - 10x mtx directory with hashing count matrix (Directory) | `params.hto_matrix_gmm_demux`    |
-| Demuxmix            | - 10x mtx directory with UMI count matrix (Directory)<br>- 10x mtx directory with hashing count matrix (Directory) | `params.hto_matrix_demuxmix`<br>`params.rna_matrix_demuxmix`     |
 | BFF             | - 10x mtx directory with UMI count matrix (Directory)<br>- 10x mtx directory with hashing count matrix (Directory) | `params.hto_matrix_bff`     |
 
 
@@ -111,12 +110,6 @@ output directory: `$pipeline_output_folder/hashedDrops/hashedDrops_[task_ID/samp
 - `${params.objectOutHashedDrops}_LogFC.png`: a diagnostic plot comparing the log-fold change between the second HTO's abundance and the ambient contamination
 - `params.csv`: specified parameters in the HashedDrops task
 
-### Demuxmix
-
-output directory: `$pipeline_output_folder/demuxmix/demuxmix_[task_ID/sampleId]`
-
-- `${params.assignmentOutDemuxmix}_assignment_demuxmix.csv`: the assignment and classification results produced by Demuxmix
-- `params.csv`: specified parameters in the Demuxmix task
 
 ### GMM-Demux
 
@@ -318,22 +311,6 @@ output directory: `$pipeline_output_folder/bff/bff_[task_ID/sampleId]`
 | preprocess_bff     | When True, the data is preprocess using the method ProcessCountMatrix from CellHashR. Default: False    |
 | barcodeWhitelist     | A vector of barcode names to retain. This parameter is used only when the pre-processing step is executed. Default: NULL    |
 
-### Demuxmix
-|                          |                                                                                              |
-| ------------------------ | -------------------------------------------------------------------------------------------- |
-| demuxmix                 | Whether to perform Demuxmix. Default: False                                                   |
-| hto_matrix_demuxmix      | Whether to use raw or filtered HTO count matrix. Default: raw                                                            |
-| rna_matrix_demuxmix   | Whether to use raw or filtered scRNA-seq count matrix. Default: raw                                                           |
-| assignmentOutDemuxmix      | Name for the folder output. Default: demuxmix|
-| correctTails | If TRUE, droplets meeting the threshold defined by alpha (beta) are classified as "negative" ("positive") even if the mixture model suggests a different classification.  Default: True   |
-| rna_available           | If TRUE, it indicates that RNA data is available and some modes are execuatble with Demuxmix. Default: TRUE     |
-| model   | A character specifying the type of mixture model to be used. Either "naive", "regpos", "reg" or "auto". The last three options require parameter rna to be specified. Default:naive  |
-| alpha_demuxmix    | Threshold defining the left tail of the mixture distribution where droplets should not be classified as "positive". Threshold must be between 0 and 1 Default: 0.9  |
-| beta_demuxmix  | Threshold for defining the right tail of the mixture distribution where droplets should not be classified as "negative"  Default: 0.9       |
-| tol_demuxmix | Convergence criterion for the EM algorithm used to fit the mixture models.  Default: 0.00001   |
-| maxIter_demuxmix    | Maximum number of iterations for the EM algorithm and for the alternating iteration process fitting the NB regression models Default:1000           |
-| k_hto   |Factor to define outliers in the HTO count Default: 1.5       |
-| k_rna   |Factor to define outliers in the numbers of detected genes. Default: 1.5       |
 
 ### General Use
 
@@ -349,5 +326,5 @@ Besides, the method or methods for consensus must be given under the parameter `
 
 ```bash
 nextflow run main.nf --mode hashing --match_donor False  --hto_matrix_raw /data_folder/raw_hto_data 
---hto_matrix_filtered /data_folder/filtered_hto_data --barcodes /data_folder/filtered_hto_data/barcodes.tsv.gz --rna_matrix_raw /data_folder/raw_rna_data --rna_matrix_filtered /data_folder/filtered_rna_data --hto_name_gmm "hto_name_1,hto_name_2,hto_name_3" --methods bff_cluster  --methodsForConsensus bff_cluster  --demuxmix False
+--hto_matrix_filtered /data_folder/filtered_hto_data --barcodes /data_folder/filtered_hto_data/barcodes.tsv.gz --rna_matrix_raw /data_folder/raw_rna_data --rna_matrix_filtered /data_folder/filtered_rna_data --hto_name_gmm "hto_name_1,hto_name_2,hto_name_3" --methods bff_cluster  --methodsForConsensus bff_cluster
 ```
\ No newline at end of file
diff --git a/docs/source/index.md b/docs/source/index.md
index cb4f111..dc70159 100644
--- a/docs/source/index.md
+++ b/docs/source/index.md
@@ -16,14 +16,13 @@ The genetics-based deconvolution workflow includes 5 methods:
 - Souporcell
 - scSplit
 
-The hashing-based deconvolution includes 8 methods:
+The hashing-based deconvolution includes 7 methods:
 
 - hashedDrops
 - Multiseq
 - HTODemux
 - Demuxem
 - HashSolo
-- Demuxmix
 - BFF
 - GMM_Demux
 
diff --git a/modules/hash_demulti/demuxmix.nf b/modules/hash_demulti/demuxmix.nf
deleted file mode 100644
index 6f93482..0000000
--- a/modules/hash_demulti/demuxmix.nf
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/env nextflow
-nextflow.enable.dsl=2
-
-process demuxmix{
-    publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti/demuxmix", mode:'copy'
-    label 'small_mem'
-    
-    input:
-        tuple val(sampleId), path(raw_hto_matrix_dir, stageAs: "hto_data_${params.hto_matrix_demuxmix}"), 
-                             path(raw_rna_matrix_dir, stageAs: "rna_data_${params.rna_matrix_demuxmix}")
-        val hto_raw_or_filtered
-        val rna_raw_or_filtered
-        val rna_available
-        val assay
-        val ndelim
-        val model
-        val alpha_demuxmix
-        val beta_demuxmix
-        val tol_demuxmix
-        val maxIter_demuxmix
-        val k_hto
-        val k_rna
-        val correctTails
-        val assignmentOutDemuxmix
-        val gene_col
-        
-    output:
-        path "demuxmix_${sampleId}"
-        
-    script:
-        """
-        mkdir demuxmix_${sampleId}
-
-        demuxmix.R --fileUmi rna_data_${params.rna_matrix_demuxmix} --fileHto hto_data_${params.hto_matrix_demuxmix} --rna_available $rna_available --assay $assay \
-                    --ndelim $ndelim --model $model --alpha_demuxmix $alpha_demuxmix --beta_demuxmix $beta_demuxmix --tol_demuxmix $tol_demuxmix  \
-                    --maxIter_demuxmix $maxIter_demuxmix --correctTails $correctTails --k_hto $k_hto  --k_rna $k_rna --outputdir demuxmix_${sampleId} \
-                    --assignmentOutDemuxmix $assignmentOutDemuxmix --gene_col $gene_col
-        """
-
-}
-
-workflow demuxmix_hashing{
-  take:
-      input_list
-  main:
-        assay = params.assay
-        ndelim = params.ndelim
-        model = params.model
-        alpha_demuxmix =  params.alpha_demuxmix
-        beta_demuxmix = params.beta_demuxmix
-        tol_demuxmix = params.tol_demuxmix
-        maxIter_demuxmix = params.maxIter_demuxmix
-        k_hto = params.k_hto
-        k_rna = params.k_rna
-        correctTails = params.correctTails
-        assignmentOutDemuxmix = params.assignmentOutDemuxmix 
-        gene_col = params.gene_col
-        
-
-        demuxmix(input_list, assay,ndelim,model,alpha_demuxmix, beta_demuxmix, tol_demuxmix, maxIter_demuxmix, k_hto, k_rna,correctTails,assignmentOutDemuxmix, gene_col)
-  
-  emit:
-        demuxmix.out.collect()
-}
-
-workflow{
-    demuxmix_hashing()
-}
diff --git a/modules/hash_demultiplexing.nf b/modules/hash_demultiplexing.nf
index 4fb87a6..eb95f5e 100644
--- a/modules/hash_demultiplexing.nf
+++ b/modules/hash_demultiplexing.nf
@@ -7,7 +7,6 @@ include { htodemux_hashing } from './hash_demulti/htodemux'
 include { hash_solo_hashing } from './hash_demulti/hashsolo'
 include { hashedDrops_hashing } from './hash_demulti/hashedDrops'
 include { demuxem_hashing } from './hash_demulti/demuxem'
-include { demuxmix_hashing } from './hash_demulti/demuxmix'
 include { gmm_demux_hashing } from './hash_demulti/gmm_demux'
 include { bff_hashing } from './hash_demulti/bff'
 
@@ -22,7 +21,6 @@ process summary{
         val htodemux_result
         val multiseq_result
         val hashedDrops_result
-        val demuxmix_result
         val bff_result
         val gmmDemux_result
         val generate_anndata
@@ -32,7 +30,6 @@ process summary{
         tuple val(sampleId), path("hash_summary")
 
     script:
-        def demuxem_files = ""
         def htodemux_files = ""
         def hashsolo_files = ""
         def multiseq_files = ""
@@ -43,10 +40,6 @@ process summary{
         def generate_adata = ""
         def generate_mdata = ""
         
-        if (demuxem_result != "no_result"){
-            demuxem_res = demuxem_result.find{it.name.contains(sampleId)}
-            demuxem_files = "--demuxem ${demuxem_res}"
-        }
         if (hashsolo_result != "no_result"){
             hashsolo_res = hashsolo_result.find{it.name.contains(sampleId)}
             hashsolo_files = "--hashsolo ${hashsolo_res}"
@@ -92,7 +85,7 @@ process summary{
         }
         
         """
-        summary_hash.py $demuxem_files $htodemux_files $multiseq_files $hashedDrops_files $hashsolo_files $demuxmix_files $gmmDemux_files $bff_files $generate_adata $generate_mdata --sampleId $sampleId
+        summary_hash.py $demuxem_files $htodemux_files $multiseq_files $hashedDrops_files $hashsolo_files $gmmDemux_files $bff_files $generate_adata $generate_mdata --sampleId $sampleId
         """
 }
 
@@ -172,20 +165,6 @@ workflow hash_demultiplexing{
     }
     
 
-    if (params.demuxmix == "True"){
-        Channel.fromPath(params.multi_input) \
-                | splitCsv(header:true) \
-                | map { row-> tuple(row.sampleId, params.hto_matrix_demuxmix == "raw" ? row.hto_matrix_raw : row.hto_matrix_filtered,
-                                    params.rna_matrix_demuxmix == "raw" ? row.rna_matrix_raw : row.rna_matrix_filtered, 
-                                    params.hto_matrix_demuxmix,params.rna_matrix_demuxmix)}
-                |set {input_list_demuxmix}   
-                demuxmix_hashing (input_list_demuxmix,params.rna_available)
-        demuxmix_out = demuxmix_hashing.out
-    }
-    else{
-        demuxmix_out = channel.value("no_result")
-    }
-
     if (params.bff == "True"){
         Channel.fromPath(params.multi_input) \
                 | splitCsv(header:true) \
@@ -215,7 +194,7 @@ workflow hash_demultiplexing{
                 | splitCsv(header:true) \
                 | map { row-> tuple(row.sampleId, row.hto_matrix_filtered, row.rna_matrix_filtered)}
                 | set {input_list_summary}
-    summary(input_list_summary, demuxem_out, hashsolo_out, htodemux_out, multiseq_out, hashedDrops_out,demuxmix_out,bff_out,gmmDemux_out, params.generate_anndata, params.generate_mudata)
+    summary(input_list_summary, demuxem_out, hashsolo_out, htodemux_out, multiseq_out, hashedDrops_out,bff_out,gmmDemux_out, params.generate_anndata, params.generate_mudata)
             
     emit:
         summary.out
diff --git a/modules/single/hash_demulti/demuxmix.nf b/modules/single/hash_demulti/demuxmix.nf
deleted file mode 100644
index 021b887..0000000
--- a/modules/single/hash_demulti/demuxmix.nf
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/env nextflow
-nextflow.enable.dsl=2
-
-process demuxmix{
-    publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/demuxmix", mode:'copy'
-    label 'small_mem'
-    input:
-        path hto_matrix, stageAs: 'hto_data'
-        path umi_matrix, stageAs: 'rna_data'
-        val hto_raw_or_filtered
-        val rna_raw_or_filtered
-        each rna_available
-        each assay
-        val ndelim
-        each model
-        each alpha_demuxmix
-        each beta_demuxmix
-        each tol_demuxmix
-        each maxIter_demuxmix
-        each k_hto
-        each k_rna
-        each correctTails
-        each assignmentOutDemuxmix
-        each gene_col
-        
-    output:
-        path "demuxmix_${task.index}"
-        
-    script:
-        
-        """
-        mkdir demuxmix_${task.index}
-        demuxmix.R --fileUmi rna_data --fileHto hto_data --rna_available $rna_available --assay $assay --ndelim $ndelim --model $model --alpha_demuxmix $alpha_demuxmix \
-            --beta_demuxmix $beta_demuxmix --tol_demuxmix $tol_demuxmix --maxIter_demuxmix $maxIter_demuxmix --correctTails $correctTails \
-            --k_hto $k_hto  --k_rna $k_rna --outputdir demuxmix_${task.index} --assignmentOutDemuxmix $assignmentOutDemuxmix --gene_col $gene_col
-        """
-
-}
-
-def split_input(input){
-    if (input =~ /;/ ){
-        Channel.from(input).map{ return it.tokenize(';')}.flatten()
-    }
-    else{
-        Channel.from(input)
-    }
-}
-
-workflow demuxmix_hashing{
-  take:
-        hto_matrix
-        rna_matrix
-        hto_raw_or_filtered
-        rna_raw_or_filtered
-        rna_available
-  main:
-        assay = split_input(params.assay)
-        ndelim = params.ndelim
-        model = split_input(params.model)
-        alpha_demuxmix =  split_input(params.alpha_demuxmix)
-        beta_demuxmix = split_input(params.beta_demuxmix)
-        tol_demuxmix = split_input(params.tol_demuxmix)
-        maxIter_demuxmix = split_input(params.maxIter_demuxmix)
-        k_hto = split_input(params.k_hto)
-        k_rna = split_input(params.k_rna)
-        correctTails = split_input(params.correctTails)
-        assignmentOutDemuxmix = split_input(params.assignmentOutDemuxmix) 
-        gene_col = split_input(params.gene_col)
-        
-
-        demuxmix(hto_matrix,rna_matrix,hto_raw_or_filtered,rna_raw_or_filtered,rna_available, assay,ndelim,model, alpha_demuxmix, beta_demuxmix, tol_demuxmix, maxIter_demuxmix, k_hto, k_rna,correctTails,assignmentOutDemuxmix,gene_col )
-  
-  emit:
-        demuxmix.out.collect()
-}
-
-workflow{
-    demuxmix_hashing()
-}
diff --git a/modules/single/hash_demultiplexing.nf b/modules/single/hash_demultiplexing.nf
index 0a011a9..3bea0fa 100644
--- a/modules/single/hash_demultiplexing.nf
+++ b/modules/single/hash_demultiplexing.nf
@@ -7,7 +7,6 @@ include { htodemux_hashing } from './hash_demulti/htodemux'
 include { hash_solo_hashing } from './hash_demulti/hashsolo'
 include { hashedDrops_hashing } from './hash_demulti/hashedDrops'
 include { demuxem_hashing } from './hash_demulti/demuxem'
-include { demuxmix_hashing } from './hash_demulti/demuxmix'
 include { gmm_demux_hashing } from './hash_demulti/gmm_demux'
 include { bff_hashing } from './hash_demulti/bff'
 
@@ -22,7 +21,6 @@ process summary{
         val multiseq_result
         val hashedDrops_result
         val gmmDemux_result
-        val demuxmix_result
         val bff_result
         val generate_anndata
         val generate_mudata
@@ -62,9 +60,6 @@ process summary{
         if (gmmDemux_result != "no_result"){
             gmmDemux_files = "--gmm_demux ${gmmDemux_result.join(":")}"
         }
-        if (demuxmix_result != "no_result"){
-            demuxmix_files = "--demuxmix ${demuxmix_result.join(":")}"
-        }
         if (bff_result != "no_result"){
             bff_files = "--bff ${bff_result.join(":")}"
         }
@@ -85,7 +80,7 @@ process summary{
         }
         
         """
-        summary_hash.py $demuxem_files $htodemux_files $multiseq_files $hashedDrops_files $hashsolo_files $demuxmix_files $gmmDemux_files $bff_files $generate_adata $generate_mdata
+        summary_hash.py $demuxem_files $htodemux_files $multiseq_files $hashedDrops_files $hashsolo_files $gmmDemux_files $bff_files $generate_adata $generate_mdata
         """
 }
 
@@ -157,16 +152,6 @@ workflow hash_demultiplexing{
     else{
         hashedDrops_out = channel.value("no_result")
     }
-    if (params.demuxmix == "True"){
-        demuxmix_rna_input = params.rna_available == "False" ? channel.value("None") :
-                            (params.rna_matrix_demuxmix == "raw" ? hto_matrix_raw : hto_matrix_filtered)
-        demuxmix_hto_input = params.hto_matrix_demuxmix == "raw" ? rna_matrix_raw : rna_matrix_filtered
-        demuxmix_hashing(demuxmix_hto_input,demuxmix_rna_input,params.hto_matrix_demuxmix, params.rna_matrix_demuxmix,params.rna_available)
-        demuxmix_out = demuxmix_hashing.out
-    }
-    else{
-        demuxmix_out = channel.value("no_result")
-    }
     if (params.bff == "True"){
         bff_hto_input = params.hto_matrix_bff == "raw" ? hto_matrix_raw : hto_matrix_filtered
         bff_hashing(bff_hto_input)
@@ -185,7 +170,7 @@ workflow hash_demultiplexing{
     }
 
 
-    summary(demuxem_out, hashsolo_out, htodemux_out, multiseq_out, hashedDrops_out,gmmDemux_out, demuxmix_out,bff_out,
+    summary(demuxem_out, hashsolo_out, htodemux_out, multiseq_out, hashedDrops_out,gmmDemux_out,bff_out,
             params.generate_anndata, params.generate_mudata, rna_matrix_filtered, hto_matrix_filtered)
     emit:
         summary.out
diff --git a/nextflow.config b/nextflow.config
index bd056af..9ac0bbf 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -137,21 +137,6 @@ params {
   threshold_gmm = 0.8
   ambiguous = 0.05
 
-  //demuxmix
-  demuxmix = "False"
-  rna_matrix_demuxmix = "raw"
-  hto_matrix_demuxmix = "raw"
-  assignmentOutDemuxmix = "demuxmix"
-  correctTails = "TRUE"
-  rna_available = 'TRUE'
-  model = 'naive'
-  alpha_demuxmix = 0.9
-  beta_demuxmix = 0.9
-  tol_demuxmix = 0.00001
-  maxIter_demuxmix = 1000
-  k_hto = 1.5
-  k_rna = 1.5
-
   //bff
   bff = "False"
   rna_matrix_bff = "raw"
@@ -444,9 +429,6 @@ process {
     withName:gmm_demux {
         conda = "$projectDir/conda/gmm_demux.yml"
     }
-    withName:demuxmix {
-        conda = "$projectDir/conda/demuxmix.yml"
-    }
     withName:bff {
         conda = "$projectDir/conda/dropletutils.yml"
     }

From e67a52f60e82ffb0800bfd493c1680eb7bf81538 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Mon, 27 Nov 2023 15:08:53 +0100
Subject: [PATCH 15/32] demuxmix env deleted

---
 conda/demuxmix.yml | 10 ----------
 1 file changed, 10 deletions(-)
 delete mode 100644 conda/demuxmix.yml

diff --git a/conda/demuxmix.yml b/conda/demuxmix.yml
deleted file mode 100644
index 8556ed1..0000000
--- a/conda/demuxmix.yml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: demuxmix
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - conda-forge::r-base>=4.2
-  - bioconductor-demuxmix
-  - r-seurat
-  - r-argparse
-  - r-data.table

From c33b1c74fe6247d50fdb329d9a2101a82a9503d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Mon, 27 Nov 2023 22:27:59 +0100
Subject: [PATCH 16/32] deleting demuxmix parts

---
 modules/hash_demulti/demuxmix.nf      | 70 ---------------------------
 modules/hash_demultiplexing.nf        | 11 ++---
 modules/single/hash_demultiplexing.nf |  1 -
 3 files changed, 5 insertions(+), 77 deletions(-)
 delete mode 100644 modules/hash_demulti/demuxmix.nf

diff --git a/modules/hash_demulti/demuxmix.nf b/modules/hash_demulti/demuxmix.nf
deleted file mode 100644
index 5103a02..0000000
--- a/modules/hash_demulti/demuxmix.nf
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env nextflow
-nextflow.enable.dsl=2
-
-process demuxmix{
-    publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti/demuxmix", mode:'copy'
-    label 'small_mem'
-    
-    conda "$projectDir/conda/demuxmix.yml"
-    
-    input:
-        tuple val(sampleId), path(raw_hto_matrix_dir, stageAs: "hto_data_${params.hto_matrix_demuxmix}"), 
-                             path(raw_rna_matrix_dir, stageAs: "rna_data_${params.rna_matrix_demuxmix}")
-        val hto_raw_or_filtered
-        val rna_raw_or_filtered
-        val rna_available
-        val assay
-        val ndelim
-        val model
-        val alpha_demuxmix
-        val beta_demuxmix
-        val tol_demuxmix
-        val maxIter_demuxmix
-        val k_hto
-        val k_rna
-        val correctTails
-        val assignmentOutDemuxmix
-        val gene_col
-        
-    output:
-        path "demuxmix_${sampleId}"
-        
-    script:
-        """
-        mkdir demuxmix_${sampleId}
-
-        demuxmix.R --fileUmi rna_data_${params.rna_matrix_demuxmix} --fileHto hto_data_${params.hto_matrix_demuxmix} --rna_available $rna_available --assay $assay \
-                    --ndelim $ndelim --model $model --alpha_demuxmix $alpha_demuxmix --beta_demuxmix $beta_demuxmix --tol_demuxmix $tol_demuxmix \
-                    --maxIter_demuxmix $maxIter_demuxmix --correctTails $correctTails --k_hto $k_hto --k_rna $k_rna --outputdir demuxmix_${sampleId} \
-                    --assignmentOutDemuxmix $assignmentOutDemuxmix --gene_col $gene_col
-        """
-
-}
-
-workflow demuxmix_hashing{
-  take:
-      input_list
-  main:
-        assay = params.assay
-        ndelim = params.ndelim
-        model = params.model
-        alpha_demuxmix =  params.alpha_demuxmix
-        beta_demuxmix = params.beta_demuxmix
-        tol_demuxmix = params.tol_demuxmix
-        maxIter_demuxmix = params.maxIter_demuxmix
-        k_hto = params.k_hto
-        k_rna = params.k_rna
-        correctTails = params.correctTails
-        assignmentOutDemuxmix = params.assignmentOutDemuxmix 
-        gene_col = params.gene_col
-        
-
-        demuxmix(input_list, assay,ndelim,model,alpha_demuxmix, beta_demuxmix, tol_demuxmix, maxIter_demuxmix, k_hto, k_rna,correctTails,assignmentOutDemuxmix, gene_col)
-  
-  emit:
-        demuxmix.out.collect()
-}
-
-workflow{
-    demuxmix_hashing()
-}
diff --git a/modules/hash_demultiplexing.nf b/modules/hash_demultiplexing.nf
index 7c29cd7..ee51c75 100644
--- a/modules/hash_demultiplexing.nf
+++ b/modules/hash_demultiplexing.nf
@@ -37,11 +37,15 @@ process summary{
         def multiseq_files = ""
         def hashedDrops_files = ""
         def gmmDemux_files = ""
-        def demuxmix_files = ""
+        def demuxem_files = ""
         def bff_files = ""
         def generate_adata = ""
         def generate_mdata = ""
         
+         if (demuxem_result != "no_result"){
+            demuxem_res = demuxem_result.find{it.name.contains(sampleId)}
+            demuxem_files = "--demuxem ${demuxem_res}"
+        }
         if (hashsolo_result != "no_result"){
             hashsolo_res = hashsolo_result.find{it.name.contains(sampleId)}
             hashsolo_files = "--hashsolo ${hashsolo_res}"
@@ -62,10 +66,6 @@ process summary{
             gmmDemux_res = gmmDemux_result.find{it.name.contains(sampleId)}
             gmmDemux_files = "--gmm_demux ${gmmDemux_res}"
         }
-        if (demuxmix_result != "no_result"){
-            demuxmix_res = demuxmix_result.find{it.name.contains(sampleId)}
-            demuxmix_files = "--demuxmix ${demuxmix_res}"
-        }
         if (bff_result != "no_result"){
             bff_res = bff_result.find{it.name.contains(sampleId)}
             bff_files = "--bff ${bff_res}"
@@ -87,7 +87,6 @@ process summary{
         }
         
         """
-        summary_hash.py $demuxem_files $htodemux_files $multiseq_files $hashedDrops_files $hashsolo_files $demuxmix_files $gmmDemux_files $bff_files $generate_adata $generate_mdata
         summary_hash.py $demuxem_files $htodemux_files $multiseq_files $hashedDrops_files $hashsolo_files $gmmDemux_files $bff_files $generate_adata $generate_mdata --sampleId $sampleId
         """
 }
diff --git a/modules/single/hash_demultiplexing.nf b/modules/single/hash_demultiplexing.nf
index 57dca67..fc3592a 100644
--- a/modules/single/hash_demultiplexing.nf
+++ b/modules/single/hash_demultiplexing.nf
@@ -39,7 +39,6 @@ process summary{
         def multiseq_files = ""
         def hashedDrops_files = ""
         def gmmDemux_files = ""
-        def demuxmix_files = ""
         def bff_files = ""
         def generate_adata = ""
         def generate_mdata = ""

From 20fd60f724f3bf982b848ed6f141ae90171657d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Tue, 28 Nov 2023 13:11:48 +0100
Subject: [PATCH 17/32] demuxmix module out - single

---
 modules/single/hash_demulti/demuxmix.nf | 82 -------------------------
 1 file changed, 82 deletions(-)
 delete mode 100644 modules/single/hash_demulti/demuxmix.nf

diff --git a/modules/single/hash_demulti/demuxmix.nf b/modules/single/hash_demulti/demuxmix.nf
deleted file mode 100644
index b687e97..0000000
--- a/modules/single/hash_demulti/demuxmix.nf
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/env nextflow
-nextflow.enable.dsl=2
-
-process demuxmix{
-    publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/demuxmix", mode:'copy'
-    label 'small_mem'
-
-    conda "$projectDir/conda/demuxmix.yml"
-    
-    input:
-        path hto_matrix, stageAs: 'hto_data'
-        path umi_matrix, stageAs: 'rna_data'
-        val hto_raw_or_filtered
-        val rna_raw_or_filtered
-        each rna_available
-        each assay
-        val ndelim
-        each model
-        each alpha_demuxmix
-        each beta_demuxmix
-        each tol_demuxmix
-        each maxIter_demuxmix
-        each k_hto
-        each k_rna
-        each correctTails
-        each assignmentOutDemuxmix
-        each gene_col
-        
-    output:
-        path "demuxmix_${task.index}"
-        
-    script:
-        
-        """
-        mkdir demuxmix_${task.index}
-        demuxmix.R --fileUmi rna_data --fileHto hto_data --rna_available $rna_available --assay $assay --ndelim $ndelim --model $model --alpha_demuxmix $alpha_demuxmix \
-            --beta_demuxmix $beta_demuxmix --tol_demuxmix $tol_demuxmix --maxIter_demuxmix $maxIter_demuxmix --correctTails $correctTails \
-            --k_hto $k_hto  --k_rna $k_rna --outputdir demuxmix_${task.index} --assignmentOutDemuxmix $assignmentOutDemuxmix --gene_col $gene_col
-        """
-
-}
-
-def split_input(input){
-    if (input =~ /;/ ){
-        Channel.from(input).map{ return it.tokenize(';')}.flatten()
-    }
-    else{
-        Channel.from(input)
-    }
-}
-
-workflow demuxmix_hashing{
-  take:
-        hto_matrix
-        rna_matrix
-        hto_raw_or_filtered
-        rna_raw_or_filtered
-        rna_available
-  main:
-        assay = split_input(params.assay)
-        ndelim = params.ndelim
-        model = split_input(params.model)
-        alpha_demuxmix =  split_input(params.alpha_demuxmix)
-        beta_demuxmix = split_input(params.beta_demuxmix)
-        tol_demuxmix = split_input(params.tol_demuxmix)
-        maxIter_demuxmix = split_input(params.maxIter_demuxmix)
-        k_hto = split_input(params.k_hto)
-        k_rna = split_input(params.k_rna)
-        correctTails = split_input(params.correctTails)
-        assignmentOutDemuxmix = split_input(params.assignmentOutDemuxmix) 
-        gene_col = split_input(params.gene_col)
-        
-
-        demuxmix(hto_matrix,rna_matrix,hto_raw_or_filtered,rna_raw_or_filtered,rna_available, assay,ndelim,model, alpha_demuxmix, beta_demuxmix, tol_demuxmix, maxIter_demuxmix, k_hto, k_rna,correctTails,assignmentOutDemuxmix,gene_col )
-  
-  emit:
-        demuxmix.out.collect()
-}
-
-workflow{
-    demuxmix_hashing()
-}

From 5b722edc715a1848a018152b8c514fc7e6ba4ced Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Wed, 29 Nov 2023 11:21:08 +0100
Subject: [PATCH 18/32] fix for hashed drops, nextflow passing incompatible
 strings for  NULL values

---
 bin/dropletUtils.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bin/dropletUtils.R b/bin/dropletUtils.R
index d775e91..0b83d06 100755
--- a/bin/dropletUtils.R
+++ b/bin/dropletUtils.R
@@ -83,10 +83,11 @@ png(paste0(args$outputdir, "/", "plot_emptyDrops.png"))
 plot(emptyDrops_out$Total, -emptyDrops_out$LogProb, col=colors, xlab="Total UMI count", ylab="-Log Probability")
 dev.off()
 
+combinations_transformed <- ifelse(tolower(args$combinations) == "null", NULL, args$combinations)
 if (args$ambient == TRUE) {
-    hashedDrops_out <- hashedDrops(hto[,which(is.cell)], min.prop = args$minProp, ambient = metadata(emptyDrops_out)$ambient, pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient, doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin, doublet.mixture = args$doubletMixture, confident.nmads = args$confidentNmads, confident.min = args$confidenMin, combinations = args$combinations)
+    hashedDrops_out <- hashedDrops(hto[,which(is.cell)], min.prop = args$minProp, ambient = metadata(emptyDrops_out)$ambient, pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient, doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin, doublet.mixture = args$doubletMixture, confident.nmads = args$confidentNmads, confident.min = args$confidenMin, combinations = combinations_transformed)
 } else {
-    hashedDrops_out <- hashedDrops(hto[,which(is.cell)], min.prop = args$minProp, pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient, doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin, doublet.mixture = args$doubletMixture, confident.nmads = args$confidentNmads, confident.min = args$confidenMin, combinations = args$combinations)
+    hashedDrops_out <- hashedDrops(hto[,which(is.cell)], min.prop = args$minProp, pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient, doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin, doublet.mixture = args$doubletMixture, confident.nmads = args$confidentNmads, confident.min = args$confidenMin, combinations = combinations_transformed)
 }
 
 print("------------------- hashedDrops finished ---------------------------------")

From 64d1bb19117dcb277e7661a1e813b229acfc7c07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Wed, 29 Nov 2023 11:27:43 +0100
Subject: [PATCH 19/32] fix for hashed drops, nextflow passing incompatible
 strings for  NULL values

---
 bin/dropletUtils.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bin/dropletUtils.R b/bin/dropletUtils.R
index 0b83d06..8cb7cfa 100755
--- a/bin/dropletUtils.R
+++ b/bin/dropletUtils.R
@@ -60,10 +60,10 @@ parser$add_argument("--gene_col", help = "Specify which column of genes.tsv or f
 args <- parser$parse_args()
 
 hto <- Read10X(data.dir = args$raw_hto_matrix_dir, gene.column = args$gene_col)
-
+ignore_transformed <- ifelse(tolower(args$ignore) == "null", NULL, args$ignore)
 emptyDrops_out <- emptyDrops(hto, lower = args$lower, niters = args$niters,
                              test.ambient = args$testAmbient,
-                             ignore = args$ignore,
+                             ignore = ignore_transformed,
                              alpha = args$alpha, round = args$round,
                              by.rank = args$byRank)
 
@@ -84,6 +84,7 @@ plot(emptyDrops_out$Total, -emptyDrops_out$LogProb, col=colors, xlab="Total UMI
 dev.off()
 
 combinations_transformed <- ifelse(tolower(args$combinations) == "null", NULL, args$combinations)
+
 if (args$ambient == TRUE) {
     hashedDrops_out <- hashedDrops(hto[,which(is.cell)], min.prop = args$minProp, ambient = metadata(emptyDrops_out)$ambient, pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient, doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin, doublet.mixture = args$doubletMixture, confident.nmads = args$confidentNmads, confident.min = args$confidenMin, combinations = combinations_transformed)
 } else {

From f00e1e62ef99d8faa64125b410b6ace0ea0d8881 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Wed, 29 Nov 2023 12:41:47 +0100
Subject: [PATCH 20/32] test NULL for hashing drops

---
 bin/dropletUtils.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/dropletUtils.R b/bin/dropletUtils.R
index 8cb7cfa..159eedf 100755
--- a/bin/dropletUtils.R
+++ b/bin/dropletUtils.R
@@ -63,7 +63,7 @@ hto <- Read10X(data.dir = args$raw_hto_matrix_dir, gene.column = args$gene_col)
 ignore_transformed <- ifelse(tolower(args$ignore) == "null", NULL, args$ignore)
 emptyDrops_out <- emptyDrops(hto, lower = args$lower, niters = args$niters,
                              test.ambient = args$testAmbient,
-                             ignore = ignore_transformed,
+                             ignore = NULL,
                              alpha = args$alpha, round = args$round,
                              by.rank = args$byRank)
 

From 8575aa4af6cff7fa88233e52b57b71ffe632fd35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Wed, 6 Dec 2023 11:14:36 +0100
Subject: [PATCH 21/32] debugging

---
 .gitignore             | 3 ++-
 docs/source/hashing.md | 2 +-
 main.nf                | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index b2b6327..b390657 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,5 @@ result*/
 testing/
 testing*
 *.pyc
-docs/build/
\ No newline at end of file
+docs/build/
+nextflow_internal.config
\ No newline at end of file
diff --git a/docs/source/hashing.md b/docs/source/hashing.md
index 16c869c..9ced8bd 100644
--- a/docs/source/hashing.md
+++ b/docs/source/hashing.md
@@ -134,7 +134,7 @@ output directory: `$pipeline_output_folder/gmm_demux/gmm_demux_[task_ID/sampleId
 
 output directory: `$pipeline_output_folder/bff/bff_[task_ID/sampleId]`
 
-- `${params.assignmentOutBff}_assignment_demuxmix.csv`: the assignment and classification results produced by BFF
+- `${params.assignmentOutBff}_assignment_bff.csv`: the assignment and classification results produced by BFF
 - `params.csv`: specified parameters in the BFF task
 
 ## **Parameter**
diff --git a/main.nf b/main.nf
index 547e332..000d535 100644
--- a/main.nf
+++ b/main.nf
@@ -72,7 +72,7 @@ workflow run_single{
         }
     }
     else if (params.mode == "hashing"){
-
+        print("Running single sample")
         hash_demultiplexing(params.rna_matrix_raw, params.rna_matrix_filtered, params.hto_matrix_raw, params.hto_matrix_filtered)
         if (params.match_donor == "True"){
             donor_match(hash_demultiplexing.out)
@@ -102,6 +102,7 @@ workflow run_single{
 }
 
 workflow {
+    print(params.multi_input)
     if (params.multi_input == null){
         run_single()
     }

From c68f70aec369efc97e48b06978ecdc1622b5aef8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Wed, 6 Dec 2023 23:37:01 +0100
Subject: [PATCH 22/32] debugging

---
 bin/dropletUtils.R | 5 +++--
 main.nf            | 1 -
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/bin/dropletUtils.R b/bin/dropletUtils.R
index 159eedf..9504872 100755
--- a/bin/dropletUtils.R
+++ b/bin/dropletUtils.R
@@ -63,10 +63,11 @@ hto <- Read10X(data.dir = args$raw_hto_matrix_dir, gene.column = args$gene_col)
 ignore_transformed <- ifelse(tolower(args$ignore) == "null", NULL, args$ignore)
 emptyDrops_out <- emptyDrops(hto, lower = args$lower, niters = args$niters,
                              test.ambient = args$testAmbient,
-                             ignore = NULL,
+                             ignore = ignore_transformed,
                              alpha = args$alpha, round = args$round,
                              by.rank = args$byRank)
-
+print("-------------")
+print(emptyDrops_out)
 print("------------------- emptyDrops finished ---------------------------------")
 
 
diff --git a/main.nf b/main.nf
index 000d535..c455a47 100644
--- a/main.nf
+++ b/main.nf
@@ -102,7 +102,6 @@ workflow run_single{
 }
 
 workflow {
-    print(params.multi_input)
     if (params.multi_input == null){
         run_single()
     }

From d49018fda80d970af7b7c8dfeef70c17760404b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Wed, 13 Dec 2023 09:14:53 -0400
Subject: [PATCH 23/32] summary correction classification and assignment

---
 bin/bff.R           | 2 +-
 bin/summary_hash.py | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/bin/bff.R b/bin/bff.R
index 6154dcf..b0ea7af 100755
--- a/bin/bff.R
+++ b/bin/bff.R
@@ -74,8 +74,8 @@ if(as.logical(args$preprocess)){
   # Step 3: Create a vector from the barcodesl
   vector <- unlist(words)
   print("Preprocessing")
-  #counts <- Read10X(args$fileHto) 
   counts <- ProcessCountMatrix(rawCountData = args$fileHto, barcodeBlacklist = vector)
+  print("Preprocessing done")
 }else{
   print("No preprocessing")
   counts <- Read10X(args$fileHto) 
diff --git a/bin/summary_hash.py b/bin/summary_hash.py
index bad62f7..d47fd16 100755
--- a/bin/summary_hash.py
+++ b/bin/summary_hash.py
@@ -364,7 +364,6 @@ def bff_summary(bff_res,raw_adata, raw_mudata):
             for column in column_names:
                 if column in dt_assign.columns:
                     dt_assign = dt_assign.drop([column], axis=1)
-            dt_assign.loc[dt_assign["consensuscall"] == "Singlet", "consensuscall"] = "singlet"
             dt_assign.loc[dt_assign["consensuscall"] == "Doublet", "consensuscall"] = "doublet"
             dt_assign.loc[dt_assign["consensuscall"] == "Negative", "consensuscall"] = "negative"
             dt_assign['consensuscall'] = dt_assign['consensuscall'].astype('category')
@@ -392,14 +391,14 @@ def bff_summary(bff_res,raw_adata, raw_mudata):
             dt_classi = data_bff.copy()
             column_names_class = ["bff_raw","bff_cluster","consensuscall"]
             for column in column_names_class:
-                if column in dt_assign.columns:
+                if column in dt_classi.columns:
                     dt_classi = dt_classi.drop([column], axis=1)
             dt_classi.loc[dt_classi["consensuscall.global"] == "Singlet", "consensuscall.global"] = "singlet"
             dt_classi.loc[dt_classi["consensuscall.global"] == "Doublet", "consensuscall.global"] = "doublet"
             dt_classi.loc[dt_classi["consensuscall.global"] == "Negative", "consensuscall.global"] = "negative"
             dt_classi = dt_classi.rename(columns={"cellbarcode": "Barcode", "consensuscall.global": os.path.basename(x)})
             classi.append(dt_classi)
-
+            
         params_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename == "params.csv"][0])
         params_res = pd.read_csv(params_dir, usecols=[1, 2], keep_default_na=False, index_col=0)     
         params_res.columns = [os.path.basename(x)]

From 1940f26e2521e10f7e5f0a143a0ba9e91989e887 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Wed, 13 Dec 2023 21:14:21 -0400
Subject: [PATCH 24/32] restructured GMM-demux summary for assignment and
 classification

---
 bin/summary_hash.py | 54 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/bin/summary_hash.py b/bin/summary_hash.py
index d47fd16..8e3d6fb 100755
--- a/bin/summary_hash.py
+++ b/bin/summary_hash.py
@@ -268,15 +268,33 @@ def htodemux_summary(htodemux_res, raw_adata, raw_mudata):
     params = pd.concat(params, axis=1)
     params.to_csv("hash_summary/htodemux_params.csv")
 
+
+
 def gmm_summary(gmmDemux_res,raw_adata, raw_mudata):
     classi = []
     assign = []
     params = []
     for x in gmmDemux_res:
         obs_res_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename.endswith("GMM_full.csv")][0])
+         
+        #we get the number of hashes used for the experiment from the parameters
+        #so that we now how many kinds of singlets we could find in the assignment
+        params_dir = os.path.join(x, "params.csv")
+        params_res = pd.read_csv(params_dir,index_col=False)
+        params_res.columns = ["Argument", os.path.basename(x)]
+        params.append(params_res)
+
         
+        result_row = params_res[params_res['Argument'].str.contains('hto_name_gmm', case=False, na=False)]
+        hashes_used = ""
+        if not result_row.empty:
+            hashes_used = result_row[os.path.basename(x)].iloc[0]
+        else:
+            print("No row contains the number of hashes")
+        hashes = hashes_used.split(',')
+        number_of_hashes = len(hashes)
+        #GMM assignement file
         gmm_classi = pd.read_csv(obs_res_dir)
-        
         #GMM full is the name given by GMM_demux per default to all results
         classification_config = os.path.join(x, "GMM_full.config")
         classif_file = pd.read_csv(classification_config,header=None)
@@ -284,23 +302,39 @@ def gmm_summary(gmmDemux_res,raw_adata, raw_mudata):
         #Classification and Assigment come from the same file
         gmm_dt = pd.DataFrame(gmm_classi)
         classification_dt = pd.DataFrame(classif_file)
+
         #change column names
         classification_dt = classification_dt.rename(columns={0: "Cluster_id", 1: "assignment"})
+
         gmm_dt = gmm_dt.rename(columns={"Unnamed: 0": "Barcode"})
         #Create classification following the assignment found for the barcodes
         #we keep the original assigment and add a classification column
-        classification_dt["assignment_binary"] = classification_dt["assignment"].str.contains("-")
-        classification_dt["classification"] = classification_dt["assignment"].apply(lambda x: "doublet" if x else "singlet")
-        classification_dt.at[0, "classification"] = "negative"
+
+        ##Inner function
+        def classify_hash(row,number_hashes):
+            if row == 0:
+                return 'negative'
+            elif 0 > row <= number_hashes:
+                return 'singlet'
+            else:
+                return 'doublet'
+
+        classification_dt['Classification'] = classification_dt['Cluster_id'].apply(lambda x: classify_hash(x, number_of_hashes))
+
         #Compare classification guide file with classification found
         merged = pd.merge(classification_dt, gmm_dt, on='Cluster_id', how='left')
-
-        gmm_dt['Classification'] = merged['classification']
+        gmm_dt['Classification'] = merged['Classification']
         gmm_dt['Assignment'] = merged['assignment']
-        
+        #instead of multiple hashes, add doublet
+
+        gmm_dt["Assignment"] = gmm_dt["Assignment"].apply(lambda x: "doublet" if "-" in x else x)
+        classification_dt['Classification'] = classification_dt['Classification'].str.replace(' ', '')
         #Assigment for GMM-Demux
+        #'Cluster_id',
         gmm_dt_assign = gmm_dt.drop(['Cluster_id','Confidence','Classification' ], axis=1)
+        gmm_dt_assign['Assignment'] = gmm_dt_assign['Assignment'].str.replace(' ', '')
         gmm_dt_assign.columns = ["Barcode", os.path.basename(x)]
+   
         assign.append(gmm_dt_assign)
 
         if raw_adata is not None:
@@ -325,11 +359,7 @@ def gmm_summary(gmmDemux_res,raw_adata, raw_mudata):
         gmm_dt_classi.columns =["Barcode", os.path.basename(x)]
         classi.append(gmm_dt_classi)
 
-        params_dir = os.path.join(x, "params.csv")
-        params_res = pd.read_csv(params_dir,index_col=False)
-        params_res.columns = ["Argument", os.path.basename(x)]
-        params.append(params_res)
-
+        
     classi_df = pd.concat(classi, axis=1, join="outer")
     classi_df.to_csv("hash_summary"  +"/GMM_classification.csv", index=False)
     

From d52e505ae5b3616fb07618c7fc3408d852ef24dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Wed, 13 Dec 2023 21:54:16 -0400
Subject: [PATCH 25/32] correction Bff params

---
 bin/summary_hash.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/bin/summary_hash.py b/bin/summary_hash.py
index 8e3d6fb..7b4f022 100755
--- a/bin/summary_hash.py
+++ b/bin/summary_hash.py
@@ -430,8 +430,10 @@ def bff_summary(bff_res,raw_adata, raw_mudata):
             classi.append(dt_classi)
             
         params_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename == "params.csv"][0])
-        params_res = pd.read_csv(params_dir, usecols=[1, 2], keep_default_na=False, index_col=0)     
-        params_res.columns = [os.path.basename(x)]
+        params_res = pd.read_csv(params_dir,sep=';',index_col=None)
+        params_res = params_res.drop(['Unnamed: 0' ], axis=1)  
+        params_res = params_res.dropna(subset=['Argument', 'Value'], how='any') 
+        params_res.columns = ["Argument",os.path.basename(x)]
         params.append(params_res)
 
     classi_df = pd.concat(classi, axis=1, join="outer")

From fa48b0c5084b8196494eacc24c398e4998a73c92 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Thu, 14 Dec 2023 09:14:17 -0400
Subject: [PATCH 26/32] fixed summary hash general

---
 bin/summary_hash.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/bin/summary_hash.py b/bin/summary_hash.py
index 7b4f022..d484a8a 100755
--- a/bin/summary_hash.py
+++ b/bin/summary_hash.py
@@ -398,6 +398,7 @@ def bff_summary(bff_res,raw_adata, raw_mudata):
             dt_assign.loc[dt_assign["consensuscall"] == "Negative", "consensuscall"] = "negative"
             dt_assign['consensuscall'] = dt_assign['consensuscall'].astype('category')
             dt_assign = dt_assign.rename(columns={"cellbarcode": "Barcode", "consensuscall": os.path.basename(x)})
+            dt_assign['Barcode'] = dt_assign['Barcode'].apply(lambda x: str(x) + "-1")
             assign.append(dt_assign)
             if raw_adata is not None:
                 adata = raw_adata.copy()
@@ -427,6 +428,7 @@ def bff_summary(bff_res,raw_adata, raw_mudata):
             dt_classi.loc[dt_classi["consensuscall.global"] == "Doublet", "consensuscall.global"] = "doublet"
             dt_classi.loc[dt_classi["consensuscall.global"] == "Negative", "consensuscall.global"] = "negative"
             dt_classi = dt_classi.rename(columns={"cellbarcode": "Barcode", "consensuscall.global": os.path.basename(x)})
+            dt_classi['Barcode'] = dt_classi['Barcode'].apply(lambda x: str(x) + "-1")
             classi.append(dt_classi)
             
         params_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename == "params.csv"][0])
@@ -494,19 +496,23 @@ def bff_summary(bff_res,raw_adata, raw_mudata):
 
     # Read and combine assignment files
     assignment = [file for file in os.listdir("hash_summary") if file.endswith("_assignment.csv")]
-    assignment_all = pd.read_csv(os.path.join("hash_summary", assignment[0]))
-   
-    if len(assignment) > 1:
-        for df in assignment[1:]:
-            df = pd.read_csv(os.path.join("hash_summary", df))
-            assignment_all = pd.merge(assignment_all, df, on='Barcode', how='outer')
+    assignment_all = pd.DataFrame()
+    
+    for file in assignment:
+        file_path = os.path.join("hash_summary", file)
+        df = pd.read_csv(file_path, usecols=['Barcode', pd.read_csv(file_path).columns[1]])
+        assignment_all = pd.concat([assignment_all, df.set_index('Barcode')], axis=1, join='outer')
+
+    # Reset the index to have 'Barcode' as a regular column
+    assignment_all.reset_index(inplace=True)
+
     assignment_all.to_csv("hash_summary/hashing_assignment_all.csv", index=False)
 
     # Read and combine classification files
     classification = [file for file in os.listdir("hash_summary") if file.endswith("_classification.csv")]
     classification_all = pd.read_csv(os.path.join("hash_summary", classification[0]))
-
-    if len(classification) > 1:
+    
+    if len(classification) > 1: 
         for df in classification[1:]:
             df = pd.read_csv(os.path.join("hash_summary", df))
             classification_all = pd.merge(classification_all, df, on='Barcode', how='outer')

From 33e90f2e2c3803cc0f11a9754e6781cd63e1b351 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Thu, 14 Dec 2023 09:41:16 -0400
Subject: [PATCH 27/32] fix warning hashsolo

---
 bin/summary_hash.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bin/summary_hash.py b/bin/summary_hash.py
index d484a8a..4c18a49 100755
--- a/bin/summary_hash.py
+++ b/bin/summary_hash.py
@@ -104,7 +104,8 @@ def hashsolo_summary(hashsolo_res, raw_adata, raw_mudata):
             mudata.write("hash_summary/mudata/mudata_with_mudata_"+ os.path.basename(x)+".h5mu") 
 
         hashsolo_classi = obs_res[["most_likely_hypothesis"]]
-        hashsolo_classi.loc[:, "most_likely_hypothesis"] = hashsolo_classi["most_likely_hypothesis"].replace({0.0: "negative", 1.0: "singlet", 2.0: "doublet"})
+        #hashsolo_classi.loc[:, "most_likely_hypothesis"] = hashsolo_classi["most_likely_hypothesis"].replace({0.0: "negative", 1.0: "singlet", 2.0: "doublet"})
+        hashsolo_classi['most_likely_hypothesis'] = hashsolo_classi['most_likely_hypothesis'].apply(lambda x: 'negative' if x == 0.0 else ('singlet' if x == 1.0 else 'doublet'))
         hashsolo_classi.columns = [os.path.basename(x)]
         classi.append(hashsolo_classi)
     

From a2f9037b72c777005bbdffed9ba5152ee9c7f35c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Mon, 18 Dec 2023 10:39:58 -0400
Subject: [PATCH 28/32] fixes hashed drops

---
 bin/dropletUtils.R                         | 48 +++++++++++-----------
 modules/single/hash_demulti/bff.nf         |  1 +
 modules/single/hash_demulti/demuxem.nf     |  2 +-
 modules/single/hash_demulti/hashedDrops.nf |  7 +++-
 modules/single/hash_demulti/hashsolo.nf    |  2 +-
 nextflow.config                            |  1 +
 6 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/bin/dropletUtils.R b/bin/dropletUtils.R
index 9504872..6cc3a62 100755
--- a/bin/dropletUtils.R
+++ b/bin/dropletUtils.R
@@ -27,6 +27,8 @@ parser$add_argument("--objectOutEmptyDrops", default = "emptyDroplets",
                     help = "Prefix name for the emptyDrops RDS file")
 parser$add_argument("--assignmentOutEmptyDrops", default = "emptyDroplets",
                     help = "prefex name for emptyDrops assignment CSV file")
+parser$add_argument("--runEmptyDrops", action="store_true",
+                    help = "Executes emptyDrops function only when desired, recomended only for raw data")
 
 #for hashedDrops
 parser$add_argument("--ambient", action = "store_true",
@@ -60,29 +62,29 @@ parser$add_argument("--gene_col", help = "Specify which column of genes.tsv or f
 args <- parser$parse_args()
 
 hto <- Read10X(data.dir = args$raw_hto_matrix_dir, gene.column = args$gene_col)
-ignore_transformed <- ifelse(tolower(args$ignore) == "null", NULL, args$ignore)
-emptyDrops_out <- emptyDrops(hto, lower = args$lower, niters = args$niters,
-                             test.ambient = args$testAmbient,
-                             ignore = ignore_transformed,
-                             alpha = args$alpha, round = args$round,
-                             by.rank = args$byRank)
-print("-------------")
-print(emptyDrops_out)
-print("------------------- emptyDrops finished ---------------------------------")
-
-
-print("-------- Following Files are saved in folder hashedDrops_out ------------")
-print(paste0(args$objectOutEmptyDrops, ".rds"))
-print(paste0(args$assignmentOutEmptyDrops, ".csv"))
-write.csv(emptyDrops_out, paste0(args$outputdir, "/", args$assignmentOutEmptyDrops, ".csv"))
-saveRDS(emptyDrops_out, file=paste0(args$outputdir, "/", args$objectOutEmptyDrops, ".rds"))
-
-print("------------------- filtering empty droplets ----------------------------")
-is.cell <- emptyDrops_out$FDR <= args$isCellFDR
-colors <- ifelse(is.cell, "red", "black")
-png(paste0(args$outputdir, "/", "plot_emptyDrops.png"))
-plot(emptyDrops_out$Total, -emptyDrops_out$LogProb, col=colors, xlab="Total UMI count", ylab="-Log Probability")
-dev.off()
+rna <- Read10X(data.dir = args$raw_rna_matrix_dir, gene.column = args$gene_col)
+print(args$runEmptyDrops)
+#if (args$runEmptyDrops == TRUE) {
+    print("------------------- executing emptyDrops ---------------------------------")
+    ignore_transformed <- ifelse(tolower(args$ignore) == "null", NULL, args$ignore)
+    emptyDrops_out <- emptyDrops(rna, lower = args$lower, niters = args$niters,
+                                test.ambient = args$testAmbient,
+                                ignore = NULL,
+                                alpha = args$alpha, round = args$round,
+                                by.rank = args$byRank)
+                        
+
+    print("------------------- emptyDrops finished ---------------------------------")
+    write.csv(emptyDrops_out, paste0(args$outputdir, "/", args$assignmentOutEmptyDrops, ".csv"))
+    saveRDS(emptyDrops_out, file=paste0(args$outputdir, "/", args$objectOutEmptyDrops, ".rds"))
+
+    print("------------------- filtering empty droplets ----------------------------")
+    is.cell <- emptyDrops_out$FDR <= args$isCellFDR
+    colors <- ifelse(is.cell, "red", "black")
+    png(paste0(args$outputdir, "/", "plot_emptyDrops.png"))
+    plot(emptyDrops_out$Total, -emptyDrops_out$LogProb, col=colors, xlab="Total UMI count", ylab="-Log Probability")
+    dev.off()
+#}
 
 combinations_transformed <- ifelse(tolower(args$combinations) == "null", NULL, args$combinations)
 
diff --git a/modules/single/hash_demulti/bff.nf b/modules/single/hash_demulti/bff.nf
index f60e098..1160786 100644
--- a/modules/single/hash_demulti/bff.nf
+++ b/modules/single/hash_demulti/bff.nf
@@ -7,6 +7,7 @@ process bff{
 
     conda "$projectDir/conda/bff.yml"
     
+
     input:
 
         path hto_matrix, stageAs: 'hto_data'
diff --git a/modules/single/hash_demulti/demuxem.nf b/modules/single/hash_demulti/demuxem.nf
index 4d94480..32d59b3 100644
--- a/modules/single/hash_demulti/demuxem.nf
+++ b/modules/single/hash_demulti/demuxem.nf
@@ -6,7 +6,7 @@ process demuxem{
     label 'small_mem'
     
     conda "bioconda::pegasuspy demuxEM scanpy"
-
+    
     input:
         path raw_rna_matrix_dir, stageAs: "rna_data_${params.rna_matrix_demuxem}"
         path raw_hto_matrix_dir, stageAs: "hto_data_${params.hto_matrix_demuxem}"
diff --git a/modules/single/hash_demulti/hashedDrops.nf b/modules/single/hash_demulti/hashedDrops.nf
index a89a7ce..f470750 100755
--- a/modules/single/hash_demulti/hashedDrops.nf
+++ b/modules/single/hash_demulti/hashedDrops.nf
@@ -19,6 +19,7 @@ process hashedDrops{
         each isCellFDR
         val objectOutEmptyDrops
         val assignmentOutEmptyDrops
+        each runEmptyDrops
 	   
         each ambient
         each minProp
@@ -45,11 +46,12 @@ process hashedDrops{
         def alp = alpha != "None" ? " --alpha ${alpha}" : ''
         def byR = byRank != "None" ? " --by.rank ${byRank}" : ''
         def amb = ambient != 'False' ? " --ambient" : ''
+        def run_empty = runEmptyDrops != 'False' ? " --runEmptyDrops" : ''
         def comb = combinations != "None" ? " --combinations ${combinations}" : ''
 
         """
         mkdir hashedDrops_${task.index}
-        dropletUtils.R --raw_hto_matrix_dir $raw_hto_matrix_dir --lower $lower --niters $niters --isCellFDR $isCellFDR --objectOutEmptyDrops $objectOutEmptyDrops --assignmentOutEmptyDrops $assignmentOutEmptyDrops --minProp $minProp --pseudoCount $pseudoCount --doubletNmads $doubletNmads --doubletMin $doubletMin --confidentNmads $confidentNmads --confidenMin $confidenMin --objectOutHashedDrops $objectOutHashedDrops --outputdir hashedDrops_${task.index} --assignmentOutHashedDrops ${assignmentOutHashedDrops}${testAmb}${ign}${alp}${rou}${byR}${constantAmb}${doubletMix}${amb}${comb} --gene_col $gene_col
+        dropletUtils.R --raw_hto_matrix_dir $raw_hto_matrix_dir --lower $lower --niters $niters --isCellFDR $isCellFDR --objectOutEmptyDrops $objectOutEmptyDrops --assignmentOutEmptyDrops $assignmentOutEmptyDrops --minProp $minProp --pseudoCount $pseudoCount --doubletNmads $doubletNmads --doubletMin $doubletMin --confidentNmads $confidentNmads --confidenMin $confidenMin --objectOutHashedDrops $objectOutHashedDrops --outputdir hashedDrops_${task.index} --assignmentOutHashedDrops ${assignmentOutHashedDrops}${testAmb}${ign}${alp}${rou}${byR}${constantAmb}${doubletMix}${amb}${comb}${run_empty} --gene_col $gene_col --runEmptyDrops $runEmptyDrops
         """
 
 }
@@ -79,6 +81,7 @@ workflow hashedDrops_hashing{
         isCellFDR = split_input(params.isCellFDR)
         objectOutEmptyDrops = params.objectOutEmptyDrops
         assignmentOutEmptyDrops = params.assignmentOutEmptyDrops
+        runEmptyDrops = split_input(params.runEmptyDrops)
        
         ambient = split_input(params.ambient)
         minProp = split_input(params.minProp)
@@ -94,7 +97,7 @@ workflow hashedDrops_hashing{
         assignmentOutHashedDrops = params.assignmentOutHashedDrops
         gene_col = split_input(params.gene_col)
 
-        hashedDrops(hto_matrix, lower, niters, testAmbient, ignore, alpha, round, byRank, isCellFDR, objectOutEmptyDrops, assignmentOutEmptyDrops, ambient, minProp, pseudoCount, constantAmbient, doubletNmads, doubletMin, doubletMixture, confidentNmads, confidenMin, combinations, objectOutHashedDrops, assignmentOutHashedDrops, gene_col)
+        hashedDrops(hto_matrix, lower, niters, testAmbient, ignore, alpha, round, byRank, isCellFDR, objectOutEmptyDrops, assignmentOutEmptyDrops,runEmptyDrops, ambient, minProp, pseudoCount, constantAmbient, doubletNmads, doubletMin, doubletMixture, confidentNmads, confidenMin, combinations, objectOutHashedDrops, assignmentOutHashedDrops, gene_col)
         
     emit:
         hashedDrops.out.collect()
diff --git a/modules/single/hash_demulti/hashsolo.nf b/modules/single/hash_demulti/hashsolo.nf
index d2a03a8..e6ce254 100755
--- a/modules/single/hash_demulti/hashsolo.nf
+++ b/modules/single/hash_demulti/hashsolo.nf
@@ -5,7 +5,7 @@ process hash_solo{
     label 'small_mem'
 
     conda "$projectDir/conda/hashsolo_py.yml"
-
+    
     input:
         path hto_data, stageAs: "hto_data_${params.hto_matrix_hashsolo}"
         each priors_negative
diff --git a/nextflow.config b/nextflow.config
index 3e9c700..f8834a8 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -107,6 +107,7 @@ params {
   combinations = "None"
   objectOutHashedDrops = "hashedDrops"
   assignmentOutHashedDrops = "hashedDrops"
+  runEmptyDrops = "True"
   
   // demuxem
   demuxem = "True"

From 73ca4770177b9bf94164171432acde3e9039c74d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Mon, 18 Dec 2023 14:16:00 -0400
Subject: [PATCH 29/32] fixes to hashed drops

---
 bin/dropletUtils.R                         | 26 +++++++++++++---------
 modules/single/hash_demulti/hashedDrops.nf |  2 +-
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/bin/dropletUtils.R b/bin/dropletUtils.R
index 6cc3a62..83c5caa 100755
--- a/bin/dropletUtils.R
+++ b/bin/dropletUtils.R
@@ -27,11 +27,11 @@ parser$add_argument("--objectOutEmptyDrops", default = "emptyDroplets",
                     help = "Prefix name for the emptyDrops RDS file")
 parser$add_argument("--assignmentOutEmptyDrops", default = "emptyDroplets",
                     help = "prefex name for emptyDrops assignment CSV file")
-parser$add_argument("--runEmptyDrops", action="store_true",
+parser$add_argument("--runEmptyDrops", action="store_false",
                     help = "Executes emptyDrops function only when desired, recomended only for raw data")
 
 #for hashedDrops
-parser$add_argument("--ambient", action = "store_true",
+parser$add_argument("--ambient", action = "store_false",
                     help = "Whether to use the relative abundance of each HTO in the ambient solution from emtpyDrops, set TRUE only when test_ambient is TRUE.")
 parser$add_argument("--minProp", default = 0.05, type = "double",
                     help = "Numeric scalar to be used to infer the ambient profile when ambient=NULL,")
@@ -62,9 +62,13 @@ parser$add_argument("--gene_col", help = "Specify which column of genes.tsv or f
 args <- parser$parse_args()
 
 hto <- Read10X(data.dir = args$raw_hto_matrix_dir, gene.column = args$gene_col)
-rna <- Read10X(data.dir = args$raw_rna_matrix_dir, gene.column = args$gene_col)
-print(args$runEmptyDrops)
-#if (args$runEmptyDrops == TRUE) {
+combinations_transformed <- ifelse(tolower(args$combinations) == "null", NULL, args$combinations)
+
+print(args$constantAmbient)
+print("----------------")
+print(args$ambient)
+if (args$runEmptyDrops == TRUE) {
+    rna <- Read10X(data.dir = args$raw_rna_matrix_dir,gene.column = args$gene_col)
     print("------------------- executing emptyDrops ---------------------------------")
     ignore_transformed <- ifelse(tolower(args$ignore) == "null", NULL, args$ignore)
     emptyDrops_out <- emptyDrops(rna, lower = args$lower, niters = args$niters,
@@ -84,14 +88,16 @@ print(args$runEmptyDrops)
     png(paste0(args$outputdir, "/", "plot_emptyDrops.png"))
     plot(emptyDrops_out$Total, -emptyDrops_out$LogProb, col=colors, xlab="Total UMI count", ylab="-Log Probability")
     dev.off()
-#}
 
-combinations_transformed <- ifelse(tolower(args$combinations) == "null", NULL, args$combinations)
 
-if (args$ambient == TRUE) {
-    hashedDrops_out <- hashedDrops(hto[,which(is.cell)], min.prop = args$minProp, ambient = metadata(emptyDrops_out)$ambient, pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient, doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin, doublet.mixture = args$doubletMixture, confident.nmads = args$confidentNmads, confident.min = args$confidenMin, combinations = combinations_transformed)
+    if (args$ambient == TRUE) {
+        hashedDrops_out <- hashedDrops(hto[,which(is.cell)], min.prop = args$minProp, ambient = metadata(emptyDrops_out)$ambient, pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient, doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin, doublet.mixture = args$doubletMixture, confident.nmads = args$confidentNmads, confident.min = args$confidenMin, combinations = combinations_transformed)
+    } else {
+        hashedDrops_out <- hashedDrops(hto[,which(is.cell)], min.prop = args$minProp, pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient, doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin, doublet.mixture = args$doubletMixture, confident.nmads = args$confidentNmads, confident.min = args$confidenMin, combinations = combinations_transformed)
+    }
 } else {
-    hashedDrops_out <- hashedDrops(hto[,which(is.cell)], min.prop = args$minProp, pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient, doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin, doublet.mixture = args$doubletMixture, confident.nmads = args$confidentNmads, confident.min = args$confidenMin, combinations = combinations_transformed)
+    hashedDrops_out <- hashedDrops(hto,min.prop = args$minProp,pseudo.count = args$pseudoCount, constant.ambient = args$constantAmbient,doublet.nmads = args$doubletNmads, doublet.min = args$doubletMin,confident.nmads = args$confidentNmads,confident.min = args$confidenMin)
+   
 }
 
 print("------------------- hashedDrops finished ---------------------------------")
diff --git a/modules/single/hash_demulti/hashedDrops.nf b/modules/single/hash_demulti/hashedDrops.nf
index f470750..ec4bd43 100755
--- a/modules/single/hash_demulti/hashedDrops.nf
+++ b/modules/single/hash_demulti/hashedDrops.nf
@@ -51,7 +51,7 @@ process hashedDrops{
 
         """
         mkdir hashedDrops_${task.index}
-        dropletUtils.R --raw_hto_matrix_dir $raw_hto_matrix_dir --lower $lower --niters $niters --isCellFDR $isCellFDR --objectOutEmptyDrops $objectOutEmptyDrops --assignmentOutEmptyDrops $assignmentOutEmptyDrops --minProp $minProp --pseudoCount $pseudoCount --doubletNmads $doubletNmads --doubletMin $doubletMin --confidentNmads $confidentNmads --confidenMin $confidenMin --objectOutHashedDrops $objectOutHashedDrops --outputdir hashedDrops_${task.index} --assignmentOutHashedDrops ${assignmentOutHashedDrops}${testAmb}${ign}${alp}${rou}${byR}${constantAmb}${doubletMix}${amb}${comb}${run_empty} --gene_col $gene_col --runEmptyDrops $runEmptyDrops
+        dropletUtils.R --raw_hto_matrix_dir $raw_hto_matrix_dir --lower $lower --niters $niters --isCellFDR $isCellFDR --objectOutEmptyDrops $objectOutEmptyDrops --assignmentOutEmptyDrops $assignmentOutEmptyDrops --minProp $minProp --pseudoCount $pseudoCount --doubletNmads $doubletNmads --doubletMin $doubletMin --confidentNmads $confidentNmads --confidenMin $confidenMin --objectOutHashedDrops $objectOutHashedDrops --outputdir hashedDrops_${task.index} --assignmentOutHashedDrops ${assignmentOutHashedDrops}${testAmb}${ign}${alp}${rou}${byR}${constantAmb}${doubletMix}${amb}${comb}${run_empty} --gene_col $gene_col
         """
 
 }

From c119904fe7aaba74f07cc1278cac5e60bd00c83a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Tue, 19 Dec 2023 12:58:15 -0400
Subject: [PATCH 30/32] taking out comments debugging

---
 bin/dropletUtils.R | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/bin/dropletUtils.R b/bin/dropletUtils.R
index 83c5caa..8efdcde 100755
--- a/bin/dropletUtils.R
+++ b/bin/dropletUtils.R
@@ -64,9 +64,6 @@ args <- parser$parse_args()
 hto <- Read10X(data.dir = args$raw_hto_matrix_dir, gene.column = args$gene_col)
 combinations_transformed <- ifelse(tolower(args$combinations) == "null", NULL, args$combinations)
 
-print(args$constantAmbient)
-print("----------------")
-print(args$ambient)
 if (args$runEmptyDrops == TRUE) {
     rna <- Read10X(data.dir = args$raw_rna_matrix_dir,gene.column = args$gene_col)
     print("------------------- executing emptyDrops ---------------------------------")
@@ -78,7 +75,6 @@ if (args$runEmptyDrops == TRUE) {
                                 by.rank = args$byRank)
                         
 
-    print("------------------- emptyDrops finished ---------------------------------")
     write.csv(emptyDrops_out, paste0(args$outputdir, "/", args$assignmentOutEmptyDrops, ".csv"))
     saveRDS(emptyDrops_out, file=paste0(args$outputdir, "/", args$objectOutEmptyDrops, ".rds"))
 
@@ -100,7 +96,6 @@ if (args$runEmptyDrops == TRUE) {
    
 }
 
-print("------------------- hashedDrops finished ---------------------------------")
 
 ignore <- args$ignore
 if (is.null(ignore)) {

From aaba40265a5cfa205c16972f86b6898a4a4960b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Tue, 19 Dec 2023 15:30:57 -0400
Subject: [PATCH 31/32] docs hashing

---
 docs/source/hashing.md | 94 ++++++++++++++++++++++--------------------
 1 file changed, 49 insertions(+), 45 deletions(-)

diff --git a/docs/source/hashing.md b/docs/source/hashing.md
index 9ced8bd..9414658 100644
--- a/docs/source/hashing.md
+++ b/docs/source/hashing.md
@@ -17,16 +17,15 @@ nextflow run main.nf -profile test --mode hashing
 
 The input data depends heavily on the deconvolution tools. In the following table, you will find the minimal input data required by different tools.
 
-| Deconvolution method | Input data                                                                                                         | Parameter                                                      |
-| -------------------- | ------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------- |
-| HTODemux             | - Seurat object with both UMI and hashing count matrix (RDS)                                                       | `params.rna_matrix_htodemux` <br> `params.hto_matrix_htodemux` |
-| Multiseq             | - Seurat object with both UMI and hashing count matrix (RDS)                                                       | `params.rna_matrix_multiseq` <br> `params.hto_matrix_multiseq` |
-| HashSolo             | - 10x mtx directory with hashing count matrix (H5)                                                                 | `params.hto_matrix_hashsolo` <br> `params.rna_matrix_hashsolo` |
-| HashedDrops          | - 10x mtx directory with hashing count matrix (Directory)                                                          | `params.hto_matrix_hashedDrops`                                |
+| Deconvolution method | Input data                                                                                                          | Parameter                                                      |
+| -------------------- | ------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------- |
+| HTODemux             | - Seurat object with both UMI and hashing count matrix (RDS)                                                        | `params.rna_matrix_htodemux` <br> `params.hto_matrix_htodemux` |
+| Multiseq             | - Seurat object with both UMI and hashing count matrix (RDS)                                                        | `params.rna_matrix_multiseq` <br> `params.hto_matrix_multiseq` |
+| HashSolo             | - 10x mtx directory with hashing count matrix (H5)                                                                  | `params.hto_matrix_hashsolo` <br> `params.rna_matrix_hashsolo` |
+| HashedDrops          | - 10x mtx directory with hashing count matrix (Directory)                                                           | `params.hto_matrix_hashedDrops`                                |
 | Demuxem              | - 10x mtx directory with UMI count matrix (Directory) <br>- 10x mtx directory with hashing count matrix (Directory) | `params.hto_matrix_demuxem`<br>`params.rna_matrix_demuxem`     |
-| GMM - Demux              | - 10x mtx directory with UMI count matrix (Directory)<br> - 10x mtx directory with hashing count matrix (Directory) | `params.hto_matrix_gmm_demux`    |
-| BFF             | - 10x mtx directory with UMI count matrix (Directory)<br>- 10x mtx directory with hashing count matrix (Directory) | `params.hto_matrix_bff`     |
-
+| GMM - Demux          | - 10x mtx directory with UMI count matrix (Directory)<br> - 10x mtx directory with hashing count matrix (Directory) | `params.hto_matrix_gmm_demux`                                  |
+| BFF                  | - 10x mtx directory with UMI count matrix (Directory)<br>- 10x mtx directory with hashing count matrix (Directory)  | `params.hto_matrix_bff`                                        |
 
 Similary as genotype-based deconvlution methods, hashing methods also have some input in common. So we also try to utilize common input parameters `params.[rna/hto]_matrix_[raw/filtered]` to store count matrices for better control and `params.[rna/hto]_matrix_[method]` is used to specify whether to use raw or filtered counts for each method.
 
@@ -117,7 +116,6 @@ output directory: `$pipeline_output_folder/hashedDrops/hashedDrops_[task_ID/samp
 - `${params.objectOutHashedDrops}_LogFC.png`: a diagnostic plot comparing the log-fold change between the second HTO's abundance and the ambient contamination
 - `params.csv`: specified parameters in the HashedDrops task
 
-
 ### GMM-Demux
 
 output directory: `$pipeline_output_folder/gmm_demux/gmm_demux_[task_ID/sampleId]`
@@ -283,55 +281,61 @@ output directory: `$pipeline_output_folder/bff/bff_[task_ID/sampleId]`
 | combinations             | An integer matrix specifying valid combinations of HTOs. Each row corresponds to a single sample and specifies the indices of rows in x corresponding to the HTOs used to label that sample. Default: None |
 | objectOutHashedDrops     | Prefix of the hashedDrops output RDS object. Default: hashedDrops                                                                                                                                          |
 | assignmentOutHashedDrops | Prefix of the hashedDrops output CSV file. Default: hashedDrops                                                                                                                                            |
+
 ### GMM-Demux
-|                          |                                                                                              |
-| ------------------------ | -------------------------------------------------------------------------------------------- |
-| gmmDemux                 | Whether to perform GMMDemux. Default: True                                                   |
-| hto_matrix_gmm_demux      | Whether to use raw or filtered HTO count matrix. Default: filtered                                                            |
-| assignmentOutGmmDemux      | Name for the folder output. Default: gmm_demux|
-| hto_name_gmm          |   list of sample tags (HTOs) separated by ',' without whitespace. Default: None   |
-| summary           |  the estimated total count of cells in the single cell assay. Default: 2000     |
-| report_gmm           | Name for the file generated by the summary. Default:report.txt            |
-| mode_GMM    | Format of the input, either tsv or csv. Default: tsv                 |
-| extract             | extract names of the sample barcoding tag(s) to extract, separated by ','. Joint tags are linked with '+'.  Default: None       |
-| threshold_gmm | Provide the confidence threshold value. Requires a float in (0,1). Default: 0.8    |
-| ambiguous    | The estimated chance of having a phony GEM getting included in a pure type GEM cluster by the clustering algorithm. Default: 0.5.            |
-| plotOutHashSolo          | Prefix of the output figures. Default: hashsolo                                              |
+
+|                       |                                                                                                                                   |
+| --------------------- | --------------------------------------------------------------------------------------------------------------------------------- |
+| gmmDemux              | Whether to perform GMMDemux. Default: True                                                                                        |
+| hto_matrix_gmm_demux  | Whether to use raw or filtered HTO count matrix. Default: filtered                                                                |
+| assignmentOutGmmDemux | Name for the folder output. Default: gmm_demux                                                                                    |
+| hto_name_gmm          | list of sample tags (HTOs) separated by ',' without whitespace. Default: None                                                     |
+| summary               | the estimated total count of cells in the single cell assay. Default: 2000                                                        |
+| report_gmm            | Name for the file generated by the summary. Default:report.txt                                                                    |
+| mode_GMM              | Format of the input, either tsv or csv. Default: tsv                                                                              |
+| extract               | extract names of the sample barcoding tag(s) to extract, separated by ','. Joint tags are linked with '+'. Default: None          |
+| threshold_gmm         | Provide the confidence threshold value. Requires a float in (0,1). Default: 0.8                                                   |
+| ambiguous             | The estimated chance of having a phony GEM getting included in a pure type GEM cluster by the clustering algorithm. Default: 0.5. |
+| plotOutHashSolo       | Prefix of the output figures. Default: hashsolo                                                                                   |
 
 ### BFF
-|                          |                                                                                              |
-| ------------------------ | -------------------------------------------------------------------------------------------- |
-| BFF                 | Whether to perform BFF. Default: False                                                   |
-| hto_matrix_bff      | Whether to use raw or filtered HTO count matrix. Default: raw                                                            |
-| rna_matrix_bff   | Whether to use raw or filtered scRNA-seq count matrix. Default: raw                                                           |
-| assignmentOutBff      | Name for the folder output. Default: bff|
-| methods          |   method or list of methods to be used. Default: combined_bff   |
-| methodsForConsensus           | a consensus call will be generated using all methods especified. Default: NULL   |
-| cellbarcodeWhitelist           | 	A vector of expected cell barcodes. Default:NULL            |
-| metricsFile    | summary metrics will be written to this file. Default: metrics_bff.cvs                 |
-| doTSNE             |  tSNE will be run on the resulting hashing calls after each caller.  Default: True       |
-| doHeatmap | if true, Seurat::HTOHeatmap will be run on the results of each calle Default: True   |
-| perCellSaturation    | An optional dataframe with the columns cellbarcode and saturation. Default: NULL            |
-| majorityConsensusThreshold | This applies to calculating a consensus call when multiple algorithms are used. Default: NULL    |
-| chemistry     | This string is passed to EstimateMultipletRate. Should be either 10xV2 or 10xV3. Default: 10xV3      |
-| callerDisagreementThreshold     | If provided, the agreement rate will be calculated between each caller and the simple majority call, ignoring discordant and no-call cells. Default: NULL      |
-| preprocess_bff     | When True, the data is preprocess using the method ProcessCountMatrix from CellHashR. Default: False    |
-| barcodeWhitelist     | A vector of barcode names to retain. This parameter is used only when the pre-processing step is executed. Default: NULL    |
 
+|                             |                                                                                                                                                           |
+| --------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| BFF                         | Whether to perform BFF. Default: False                                                                                                                    |
+| hto_matrix_bff              | Whether to use raw or filtered HTO count matrix. Default: raw                                                                                             |
+| rna_matrix_bff              | Whether to use raw or filtered scRNA-seq count matrix. Default: raw                                                                                       |
+| assignmentOutBff            | Name for the folder output. Default: bff                                                                                                                  |
+| methods                     | method or list of methods to be used. Default: combined_bff                                                                                               |
+| methodsForConsensus         | a consensus call will be generated using all methods especified. Default: NULL                                                                            |
+| cellbarcodeWhitelist        | A vector of expected cell barcodes. Default:NULL                                                                                                          |
+| metricsFile                 | summary metrics will be written to this file. Default: metrics_bff.cvs                                                                                    |
+| doTSNE                      | tSNE will be run on the resulting hashing calls after each caller. Default: True                                                                          |
+| doHeatmap                   | if true, Seurat::HTOHeatmap will be run on the results of each calle Default: True                                                                        |
+| perCellSaturation           | An optional dataframe with the columns cellbarcode and saturation. Default: NULL                                                                          |
+| majorityConsensusThreshold  | This applies to calculating a consensus call when multiple algorithms are used. Default: NULL                                                             |
+| chemistry                   | This string is passed to EstimateMultipletRate. Should be either 10xV2 or 10xV3. Default: 10xV3                                                           |
+| callerDisagreementThreshold | If provided, the agreement rate will be calculated between each caller and the simple majority call, ignoring discordant and no-call cells. Default: NULL |
+| preprocess_bff              | When True, the data is preprocess using the method ProcessCountMatrix from CellHashR. Default: False                                                      |
+| barcodeWhitelist            | A vector of barcode names to retain. This parameter is used only when the pre-processing step is executed. Default: NULL                                  |
 
 ### General Use
 
 #### Single sample use
+
 The use of the pipeline for a single samples require the definition of certain parameters in order to run the tools under default configuration.
 The parameter `--mode hashing` must be included with the purpose of running the hashing tools only.
+
 ##### GMM-Demux
+
 The names of the hashtags must be given as a list of string, separated by ','. This list is given under the parameter `--hto_name_gmm`
+
 ##### BFF
-The demultiplexing method for the experiment must be given under the parameter `--methods`. Multiple methods can be given as a list, separated by ','. 
-Besides, the method or methods for consensus must be given under the parameter `--methodsForConsensus`. 
 
+The demultiplexing method for the experiment must be given under the parameter `--methods`. Multiple methods can be given as a list, separated by ','.
+Besides, the method or methods for consensus must be given under the parameter `--methodsForConsensus`.
 
 ```bash
-nextflow run main.nf --mode hashing --match_donor False  --hto_matrix_raw /data_folder/raw_hto_data 
+nextflow run main.nf --mode hashing --match_donor False  --hto_matrix_raw /data_folder/raw_hto_data
 --hto_matrix_filtered /data_folder/filtered_hto_data --barcodes /data_folder/filtered_hto_data/barcodes.tsv.gz --rna_matrix_raw /data_folder/raw_rna_data --rna_matrix_filtered /data_folder/filtered_rna_data --hto_name_gmm "hto_name_1,hto_name_2,hto_name_3" --methods bff_cluster  --methodsForConsensus bff_cluster
-```
\ No newline at end of file
+```

From 731635e558df8b15129941fd73db0ed09d49ecb3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Mariana=20Gonzales=20Andr=C3=A9?=
 <mariana.gonzales.andre@gmail.com>
Date: Mon, 8 Jan 2024 09:08:30 -0400
Subject: [PATCH 32/32] resolved comments PR, multi_sample_input file in
 test_data

---
 bin/demuxem.py                                         | 10 ----------
 bin/summary_hash.py                                    |  7 ++-----
 nextflow.config                                        |  4 ++--
 .../multi_sample_input.csv                             |  0
 4 files changed, 4 insertions(+), 17 deletions(-)
 rename multi_sample_input.csv => test_data/multi_sample_input.csv (100%)

diff --git a/bin/demuxem.py b/bin/demuxem.py
index 814baef..4fac528 100755
--- a/bin/demuxem.py
+++ b/bin/demuxem.py
@@ -33,17 +33,7 @@
     # load input rna data
     rna_data = sc.read_10x_mtx(args.rna_matrix_dir)
     hashing_data = sc.read_10x_mtx(args.hto_matrix_dir,gex_only=False)
-    #data.subset_data(modality_subset=['rna'])
-    #data.concat_data() # in case of multi-organism mixing data
-    # load input hashing data
-    #data.update(io.read_input(args.hto_matrix_dir, modality="hashing"))
-    # Extract rna and hashing data
-    #rna_data = data.get_data(modality="rna")
-    #hashing_data = data.get_data(modality="hashing")
     rna = args.rna_matrix_dir
-    print("-------------------")
-    print(rna)
-    print("-------------------")
     filter = ""
     if args.filter_demuxem.lower() in ['true', 't', 'yes', 'y', '1']:
         filter = True
diff --git a/bin/summary_hash.py b/bin/summary_hash.py
index 4c18a49..fac8ea8 100755
--- a/bin/summary_hash.py
+++ b/bin/summary_hash.py
@@ -104,7 +104,6 @@ def hashsolo_summary(hashsolo_res, raw_adata, raw_mudata):
             mudata.write("hash_summary/mudata/mudata_with_mudata_"+ os.path.basename(x)+".h5mu") 
 
         hashsolo_classi = obs_res[["most_likely_hypothesis"]]
-        #hashsolo_classi.loc[:, "most_likely_hypothesis"] = hashsolo_classi["most_likely_hypothesis"].replace({0.0: "negative", 1.0: "singlet", 2.0: "doublet"})
         hashsolo_classi['most_likely_hypothesis'] = hashsolo_classi['most_likely_hypothesis'].apply(lambda x: 'negative' if x == 0.0 else ('singlet' if x == 1.0 else 'doublet'))
         hashsolo_classi.columns = [os.path.basename(x)]
         classi.append(hashsolo_classi)
@@ -310,9 +309,7 @@ def gmm_summary(gmmDemux_res,raw_adata, raw_mudata):
         gmm_dt = gmm_dt.rename(columns={"Unnamed: 0": "Barcode"})
         #Create classification following the assignment found for the barcodes
         #we keep the original assigment and add a classification column
-
-        ##Inner function
-        def classify_hash(row,number_hashes):
+        def _classify_hash(row,number_hashes):
             if row == 0:
                 return 'negative'
             elif 0 > row <= number_hashes:
@@ -320,7 +317,7 @@ def classify_hash(row,number_hashes):
             else:
                 return 'doublet'
 
-        classification_dt['Classification'] = classification_dt['Cluster_id'].apply(lambda x: classify_hash(x, number_of_hashes))
+        classification_dt['Classification'] = classification_dt['Cluster_id'].apply(lambda x: _classify_hash(x, number_of_hashes))
 
         #Compare classification guide file with classification found
         merged = pd.merge(classification_dt, gmm_dt, on='Cluster_id', how='left')
diff --git a/nextflow.config b/nextflow.config
index f8834a8..ed91d0a 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -126,7 +126,7 @@ params {
   filter_demuxem = "True"
 
   // gmm-demux
-  gmmDemux = "False"
+  gmmDemux = "True"
   hto_matrix_gmm_demux = "filtered"
   assignmentOutGmmDemux = "gmm_demux"
   hto_name_gmm = "None"
@@ -138,7 +138,7 @@ params {
   ambiguous = 0.05
 
   // bff
-  bff = "False"
+  bff = "True"
   rna_matrix_bff = "raw"
   hto_matrix_bff = "raw"
   assignmentOutBff = "bff" 
diff --git a/multi_sample_input.csv b/test_data/multi_sample_input.csv
similarity index 100%
rename from multi_sample_input.csv
rename to test_data/multi_sample_input.csv