From f6cba52037205bba5846708a00431d266ad7afa2 Mon Sep 17 00:00:00 2001
From: Florian Wuennemann <flowuenne@gmail.com>
Date: Mon, 23 Oct 2023 23:43:12 +0000
Subject: [PATCH 1/9] Replace project spots and mcquant with spots2cell.

---
 bin/spot2cell.py               | 108 +++++++++++++++++++++++++++++++++
 modules/local/project_spots.nf |   1 +
 modules/local/spot2cell.nf     |  24 ++++++++
 workflows/molkart.nf           |  61 +++++++++----------
 4 files changed, 162 insertions(+), 32 deletions(-)
 create mode 100755 bin/spot2cell.py
 create mode 100644 modules/local/spot2cell.nf

diff --git a/bin/spot2cell.py b/bin/spot2cell.py
new file mode 100755
index 0000000..c294da7
--- /dev/null
+++ b/bin/spot2cell.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+
+## Import packages
+import pandas as pd
+import numpy as np
+from skimage.measure import regionprops_table
+import tifffile as tiff
+import argparse
+import os
+
+def assign_spots2cell(spot_table, cell_mask):
+
+    # Initialize a dictionary to hold the counts
+    gene_counts = {}
+
+    # Calculate cell properties for cell_mask using regionprops_table
+    cell_props = regionprops_table(cell_mask,
+                                   properties=["label", "centroid", "area", "major_axis_length", "minor_axis_length", "eccentricity", "solidity", "extent", "orientation"])
+                                 #properties=["label","centroid","area"])
+    # Turn cell props into a pandas DataFrame and add a Cell_ID column
+    name_map = {
+        "CellID": "label",
+        "X_centroid": "centroid-1",
+        "Y_centroid": "centroid-0",
+        "Area": "area",
+        "MajorAxisLength": "major_axis_length",
+        "MinorAxisLength": "minor_axis_length",
+        "Eccentricity": "eccentricity",
+        "Solidity": "solidity",
+        "Extent": "extent",
+        "Orientation": "orientation",
+    }
+    for new_name, old_name in name_map.items():
+        cell_props[new_name] = cell_props[old_name]
+
+    cell_props = pd.DataFrame(cell_props)
+
+    # Exclude any rows that contain Duplicated in the gene column from spot_table
+    spot_table = spot_table[~spot_table['gene'].str.contains("Duplicated")]
+
+    # Iterate over each row in the grouped DataFrame
+    for index,row in spot_table.iterrows():
+        # Get the x and y positions and gene
+        x = int(row['x'])
+        y = int(row['y'])
+        gene = row['gene']
+
+        # Get the cell ID from the labeled mask
+        cell_id = cell_mask[y, x]
+
+        # If the cell ID is not in the dictionary, add it
+        if cell_id not in gene_counts:
+            gene_counts[cell_id] = {}
+            if gene not in gene_counts[cell_id]:
+                gene_counts[cell_id][gene] = 1
+            else:
+                gene_counts[cell_id][gene] += 1
+        else:
+            if gene not in gene_counts[cell_id]:
+                gene_counts[cell_id][gene] = 1
+            else:
+                # Add the count for this gene in this cell ID
+                gene_counts[cell_id][gene] += 1
+
+    # Convert the dictionary of counts into a DataFrame
+    gene_counts_df = pd.DataFrame(gene_counts).T
+
+    # Add a column to gene_counts_df for the Cell_ID, make it the first column of the table
+    gene_counts_df['CellID'] = gene_counts_df.index
+
+    # Add the regionprops data from cell_props for each cell ID to gene_counts_df add NA when cell_ID exists in cell_props but not in gene_counts_df
+    gene_counts_df = gene_counts_df.merge(cell_props, on='CellID', how='outer')
+
+    # Convert NaN values to 0
+    gene_counts_df = gene_counts_df.fillna(0)
+
+    # Sort by Cell_ID in ascending order
+    gene_counts_df = gene_counts_df.sort_values(by=['CellID'])
+
+    # Make Cell_ID the first column in gene_counts_df
+    gene_counts_df = gene_counts_df.set_index('CellID').reset_index()
+
+    # Filter out cell_ID = 0 into it's own dataframe called background
+    background = gene_counts_df[gene_counts_df['CellID'] == 0]
+    gene_counts_df = gene_counts_df[gene_counts_df['CellID'] != 0]
+
+    # Return both gene_counts_df and background
+    return gene_counts_df, background
+
+if __name__ == "__main__":
+    # Add a python argument parser with options for input, output and image size in x and y
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-s", "--spot_table", help="Spot table to project.")
+    parser.add_argument("-c", "--cell_mask", help="Sample ID.")
+
+    args = parser.parse_args()
+
+    ## Read in spot table
+    spot_data = pd.read_csv(args.spot_table,
+                names=['x', 'y', 'z', 'gene',"empty"], sep='\t', header=None,index_col=None)
+
+    cell_mask  = tiff.imread(args.cell_mask)
+
+    gene_counts_df, background = assign_spots2cell(spot_data, cell_mask)
+
+    basename = os.path.basename(args.spot_table)
+    basename = os.path.splitext(basename)[0]
+    gene_counts_df.to_csv(f"{basename}.cellxgene.tsv", sep='\t',  header=True, index=False)
diff --git a/modules/local/project_spots.nf b/modules/local/project_spots.nf
index 669d759..c184510 100644
--- a/modules/local/project_spots.nf
+++ b/modules/local/project_spots.nf
@@ -1,6 +1,7 @@
 process PROJECT_SPOTS{
     debug false
     tag "Projecting spots $meta.id"
+    label 'process_medium'
 
     container 'docker.io/wuennemannflorian/project_spots:latest'
 
diff --git a/modules/local/spot2cell.nf b/modules/local/spot2cell.nf
new file mode 100644
index 0000000..983e7e0
--- /dev/null
+++ b/modules/local/spot2cell.nf
@@ -0,0 +1,24 @@
+process SPOT2CELL{
+    debug true
+    tag "Assigning spots to cells for $meta.id"
+    label 'process_single'
+
+    container 'ghcr.io/schapirolabor/background_subtraction:v0.3.3'
+
+    input:
+    tuple val(meta) , path(spot_table)
+    tuple val(meta2), path(cell_mask)
+
+    output:
+    tuple val(meta), path("*cellxgene.tsv"), emit: cellxgene_table
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    """
+    spot2cell.py \
+    --spot_table ${spot_table} \
+    --cell_mask ${cell_mask}
+    """
+}
diff --git a/workflows/molkart.nf b/workflows/molkart.nf
index 4948573..302cc8c 100644
--- a/workflows/molkart.nf
+++ b/workflows/molkart.nf
@@ -37,7 +37,7 @@ include { CREATEILASTIKTRAININGSUBSET } from '../modules/local/createilastiktrai
 include { CREATE_STACK                } from '../modules/local/create_stack'
 include { CLAHE_DASK                  } from '../modules/local/clahe_dask'
 include { MINDAGAP_DUPLICATEFINDER    } from '../modules/local/mindagap_duplicatefinder'
-include { PROJECT_SPOTS               } from '../modules/local/project_spots'
+include { SPOT2CELL                   } from '../modules/local/spot2cell'
 include { TIFFH5CONVERT               } from '../modules/local/tiffh5convert'
 include { MOLCART_QC                  } from '../modules/local/molcart_qc'
 
@@ -61,7 +61,6 @@ include { CELLPOSE                    } from '../modules/nf-core/cellpose/main'
 include { DEEPCELL_MESMER             } from '../modules/nf-core/deepcell/mesmer/main'
 include { ILASTIK_PIXELCLASSIFICATION } from '../modules/nf-core/ilastik/pixelclassification/main'
 include { ILASTIK_MULTICUT            } from '../modules/nf-core/ilastik/multicut/main'
-include { MCQUANT                     } from '../modules/nf-core/mcquant/main'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -173,18 +172,6 @@ workflow MOLKART {
 
     qc_spots = MINDAGAP_DUPLICATEFINDER.out.marked_dups_spots
 
-    qc_spots.join(
-        image_tuple.map {
-            meta, tiff ->
-            [meta.subMap("id"), tiff]
-        }
-    ).set { dedup_spots }
-
-    PROJECT_SPOTS(
-        dedup_spots.map(it -> tuple(it[0],it[1])),
-        dedup_spots.map(it -> it[2])
-    )
-
     //
     // MODULE: DeepCell Mesmer segmentation
     //
@@ -248,33 +235,43 @@ workflow MOLKART {
                 .combine(Channel.of('ilastik')))
     }
 
-    PROJECT_SPOTS.out.img_spots
-        .join(PROJECT_SPOTS.out.channel_names)
-        .map{
-            meta,tiff,channels -> [meta,tiff,channels]
-            }
+    // Assigning of spots to mask
+    qc_spots
         .combine(segmentation_masks, by: 0)
-        .map {
-            meta, tiff, channels, mask, seg ->
-            new_meta = meta.clone()
-            new_meta.segmentation = seg
-            [new_meta, tiff, channels, mask]
-        }.set{ mcquant_in }
+        .set { dedup_spots }
+
+    SPOT2CELL(
+        dedup_spots.map(it -> tuple(it[0],it[1])),
+        dedup_spots.map(it -> tuple(it[0],it[2]))
+    )
+
+    // PROJECT_SPOTS.out.img_spots
+    //     .join(PROJECT_SPOTS.out.channel_names)
+    //     .map{
+    //         meta,tiff,channels -> [meta,tiff,channels]
+    //         }
+    //     .combine(segmentation_masks, by: 0)
+    //     .map {
+    //         meta, tiff, channels, mask, seg ->
+    //         new_meta = meta.clone()
+    //         new_meta.segmentation = seg
+    //         [new_meta, tiff, channels, mask]
+    //     }.set{ mcquant_in }
 
     //
     // MODULE: MCQuant
     //
-    MCQUANT(
-        mcquant_in.map{it -> tuple(it[0],it[1])},
-        mcquant_in.map{it -> tuple(it[0],it[3])},
-        mcquant_in.map{it -> tuple(it[0],it[2])}
-        )
-    ch_versions = ch_versions.mix(MCQUANT.out.versions)
+    // MCQUANT(
+    //     mcquant_in.map{it -> tuple(it[0],it[1])},
+    //     mcquant_in.map{it -> tuple(it[0],it[3])},
+    //     mcquant_in.map{it -> tuple(it[0],it[2])}
+    //     )
+    // ch_versions = ch_versions.mix(MCQUANT.out.versions)
 
     //
     // MODULE: MOLCART_QC
     //
-    MCQUANT.out.csv
+    SPOT2CELL.out.cellxgene_table
         .map {
             meta, quant ->
             [meta.subMap("id"), quant, meta.segmentation]

From b7b64443c06fa33968bf41112de5538c294dc777 Mon Sep 17 00:00:00 2001
From: Florian Wuennemann <flowuenne@gmail.com>
Date: Mon, 23 Oct 2023 23:57:53 +0000
Subject: [PATCH 2/9] Added process labels to modules missing it.

---
 CHANGELOG.md                  | 5 +++++
 modules/local/clahe_dask.nf   | 1 +
 modules/local/create_stack.nf | 1 +
 modules/local/molcart_qc.nf   | 1 +
 4 files changed, 8 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 62c7de8..5ebe370 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,11 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## v1.0.1dev - [2023.23.10]
+
+- Replace `PROJECT_SPOTS` and `MCQUANT` modules with spot2cells. This new (for now local) module reduces the RAM requirements drastically, because it doesn't create a multi-channel stack for the spots. Spots are assigned by looking up cell IDs at x,y, positions and iterating over the deduplicated spots table.
+- Added process labels to many modules to fix linting warnings
+
 ## v1.0.1dev - [2023.22.10]
 
 Replaced the `clahe` param with `skip_clahe` so that the default value for running CLAHE is `False`.
diff --git a/modules/local/clahe_dask.nf b/modules/local/clahe_dask.nf
index e6a16ca..65e83c6 100644
--- a/modules/local/clahe_dask.nf
+++ b/modules/local/clahe_dask.nf
@@ -1,6 +1,7 @@
 process CLAHE_DASK{
     debug false
     tag "Applying CLAHE to $meta.id"
+    label 'process_low'
 
     container 'ghcr.io/schapirolabor/background_subtraction:v0.3.3'
 
diff --git a/modules/local/create_stack.nf b/modules/local/create_stack.nf
index 68cf74a..45dc867 100644
--- a/modules/local/create_stack.nf
+++ b/modules/local/create_stack.nf
@@ -1,5 +1,6 @@
 process CREATE_STACK {
     tag "Stacking channels for $meta.id"
+    label 'process_medium'
 
     container 'ghcr.io/schapirolabor/background_subtraction:v0.3.3'
 
diff --git a/modules/local/molcart_qc.nf b/modules/local/molcart_qc.nf
index 3011a30..6e1f97d 100644
--- a/modules/local/molcart_qc.nf
+++ b/modules/local/molcart_qc.nf
@@ -1,6 +1,7 @@
 process MOLCART_QC{
     tag "${meta.id}"
     container 'docker.io/wuennemannflorian/project_spots:latest'
+    label 'process_single'
 
     input:
     tuple val(meta), path(mcquant)

From 8e540c0d1bc4822c370e677bcb0a8b805483ed67 Mon Sep 17 00:00:00 2001
From: Florian Wuennemann <flowuenne@gmail.com>
Date: Tue, 24 Oct 2023 01:19:01 +0000
Subject: [PATCH 3/9] MolcartQC working with spots2cell.

---
 CHANGELOG.md                |  1 +
 bin/collect_QC.py           | 29 ++++++++++++++++-------------
 bin/spot2cell.py            | 13 ++++++++-----
 modules/local/molcart_qc.nf |  4 ++--
 workflows/molkart.nf        | 13 ++++++++++---
 5 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5ebe370..034ed53 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Replace `PROJECT_SPOTS` and `MCQUANT` modules with spot2cells. This new (for now local) module reduces the RAM requirements drastically, because it doesn't create a multi-channel stack for the spots. Spots are assigned by looking up cell IDs at x,y, positions and iterating over the deduplicated spots table.
 - Added process labels to many modules to fix linting warnings
+- Added meta map to molcart_qc output to remove linting warning
 
 ## v1.0.1dev - [2023.22.10]
 
diff --git a/bin/collect_QC.py b/bin/collect_QC.py
index fb6ad13..f50d0e7 100755
--- a/bin/collect_QC.py
+++ b/bin/collect_QC.py
@@ -14,19 +14,22 @@ def summarize_spots(spot_table):
     ## Calculate the total number of spots in spot_table
     total_spots = spot_table.shape[0]
 
-    return (tx_per_gene, total_spots)
+    ## Get list of genes
+    genes = spot_table['gene'].unique()
 
+    return (tx_per_gene, total_spots, genes)
 
-def summarize_segmasks(mcquant, spots_summary):
-    ## Calculate the total number of cells (rows) in mcquant
-    total_cells = mcquant.shape[0]
 
-    ## Calculate the average segmentation area from column Area in mcquant
-    avg_area = mcquant["Area"].mean()
+def summarize_segmasks(cellxgene_table, spots_summary):
+    ## Calculate the total number of cells (rows) in cellxgene_table
+    total_cells = cellxgene_table.shape[0]
+
+    ## Calculate the average segmentation area from column Area in cellxgene_table
+    avg_area = cellxgene_table["Area"].mean()
 
     ## Calculate the % of spots assigned
-    ## Subset mcquant for all columns with _intensity_sum in the column name and sum the column values
-    spot_assign = mcquant.filter(regex="_intensity_sum").sum(axis=1)
+    ## Subset cellxgene_table for all columns with _intensity_sum in the column name and sum the column values
+    spot_assign = cellxgene_table[spots_summary[2]].sum(axis=1)
     spot_assign_total = int(sum(spot_assign))
     spot_assign_per_cell = total_cells and spot_assign_total / total_cells or 0
     # spot_assign_per_cell = spot_assign_total / total_cells
@@ -36,9 +39,9 @@ def summarize_segmasks(mcquant, spots_summary):
 
 
 if __name__ == "__main__":
-    # Write an argparse with input options mcquant_in, spots and output options outdir, sample_id
+    # Write an argparse with input options cellxgene_table, spots and output options outdir, sample_id
     parser = argparse.ArgumentParser()
-    parser.add_argument("-i", "--mcquant", help="mcquant regionprops_table.")
+    parser.add_argument("-i", "--cellxgene", help="cellxgene regionprops_table.")
     parser.add_argument("-s", "--spots", help="Resolve biosciences spot table.")
     parser.add_argument("-o", "--outdir", help="Output directory.")
 
@@ -48,8 +51,8 @@ def summarize_segmasks(mcquant, spots_summary):
 
     args = parser.parse_args()
 
-    ## Read in mcquant table
-    mcquant = pd.read_csv(args.mcquant)
+    ## Read in cellxgene_table table
+    cellxgene_table = pd.read_csv(args.cellxgene, sep =",")
 
     ## Read in spot table
     spots = pd.read_table(args.spots, sep="\t", names=["x", "y", "z", "gene"])
@@ -57,7 +60,7 @@ def summarize_segmasks(mcquant, spots_summary):
 
     ## Summarize spots table
     summary_spots = summarize_spots(spots)
-    summary_segmentation = summarize_segmasks(mcquant, summary_spots)
+    summary_segmentation = summarize_segmasks(cellxgene_table, summary_spots)
 
     ## Create pandas data frame with one row per parameter and write each value in summary_segmentation to a new row in the data frame
     summary_df = pd.DataFrame(
diff --git a/bin/spot2cell.py b/bin/spot2cell.py
index c294da7..693d44b 100755
--- a/bin/spot2cell.py
+++ b/bin/spot2cell.py
@@ -14,9 +14,8 @@ def assign_spots2cell(spot_table, cell_mask):
     gene_counts = {}
 
     # Calculate cell properties for cell_mask using regionprops_table
-    cell_props = regionprops_table(cell_mask,
-                                   properties=["label", "centroid", "area", "major_axis_length", "minor_axis_length", "eccentricity", "solidity", "extent", "orientation"])
-                                 #properties=["label","centroid","area"])
+    cell_props = regionprops_table(cell_mask, properties=["label", "centroid", "area", "major_axis_length", "minor_axis_length", "eccentricity", "solidity", "extent", "orientation"])
+
     # Turn cell props into a pandas DataFrame and add a Cell_ID column
     name_map = {
         "CellID": "label",
@@ -28,11 +27,15 @@ def assign_spots2cell(spot_table, cell_mask):
         "Eccentricity": "eccentricity",
         "Solidity": "solidity",
         "Extent": "extent",
-        "Orientation": "orientation",
+        "Orientation": "orientation"
     }
+
     for new_name, old_name in name_map.items():
         cell_props[new_name] = cell_props[old_name]
 
+    for old_name in set(name_map.values()):
+        del cell_props[old_name]
+
     cell_props = pd.DataFrame(cell_props)
 
     # Exclude any rows that contain Duplicated in the gene column from spot_table
@@ -105,4 +108,4 @@ def assign_spots2cell(spot_table, cell_mask):
 
     basename = os.path.basename(args.spot_table)
     basename = os.path.splitext(basename)[0]
-    gene_counts_df.to_csv(f"{basename}.cellxgene.tsv", sep='\t',  header=True, index=False)
+    gene_counts_df.to_csv(f"{basename}.cellxgene.tsv", sep=',',  header=True, index=False)
diff --git a/modules/local/molcart_qc.nf b/modules/local/molcart_qc.nf
index 6e1f97d..0dfd835 100644
--- a/modules/local/molcart_qc.nf
+++ b/modules/local/molcart_qc.nf
@@ -4,7 +4,7 @@ process MOLCART_QC{
     label 'process_single'
 
     input:
-    tuple val(meta), path(mcquant)
+    tuple val(meta), path(cellxgene_table)
     tuple val(meta2), path(spot_table)
     val(segmethod)
 
@@ -19,7 +19,7 @@ process MOLCART_QC{
     def sample_id = "${meta.id}"
     """
     collect_QC.py \
-        --mcquant $mcquant \
+        --cellxgene $cellxgene_table \
         --spots $spot_table \
         --sample_id $sample_id \
         --segmentation_method $segmethod \
diff --git a/workflows/molkart.nf b/workflows/molkart.nf
index 302cc8c..f0784f0 100644
--- a/workflows/molkart.nf
+++ b/workflows/molkart.nf
@@ -238,6 +238,12 @@ workflow MOLKART {
     // Assigning of spots to mask
     qc_spots
         .combine(segmentation_masks, by: 0)
+        .map {
+            meta, spots_table, mask, segmethod ->
+            new_meta = meta.clone()
+            new_meta.segmentation = segmethod
+            [new_meta, spots_table, mask]
+            }
         .set { dedup_spots }
 
     SPOT2CELL(
@@ -275,11 +281,12 @@ workflow MOLKART {
         .map {
             meta, quant ->
             [meta.subMap("id"), quant, meta.segmentation]
-        }.set { mcquant_out }
+        }.set { spot2cell_out }
 
-    qc_spots.combine(
-        mcquant_out, by: 0)
+    qc_spots
+        .combine(spot2cell_out, by: 0)
         .set{ molcart_qc }
+    molcart_qc.view()
 
     MOLCART_QC(
             molcart_qc.map{it -> tuple(it[0],it[2])},

From de1389afccc98a24e30f655f77ca773022d130bd Mon Sep 17 00:00:00 2001
From: Florian Wuennemann <flowuenne@gmail.com>
Date: Tue, 24 Oct 2023 01:21:01 +0000
Subject: [PATCH 4/9] Added meta map to MOLCART_QC.

---
 modules/local/molcart_qc.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/molcart_qc.nf b/modules/local/molcart_qc.nf
index 0dfd835..c4a0696 100644
--- a/modules/local/molcart_qc.nf
+++ b/modules/local/molcart_qc.nf
@@ -9,7 +9,7 @@ process MOLCART_QC{
     val(segmethod)
 
     output:
-    path("*.csv"), emit: qc
+    tuple val(meta), path("*.csv"), emit: qc
 
     when:
     task.ext.when == null || task.ext.when

From a4d746110db49795f3bae6b5ef213a1d46ce7ff3 Mon Sep 17 00:00:00 2001
From: Florian Wuennemann <flowuenne@gmail.com>
Date: Tue, 24 Oct 2023 01:26:46 +0000
Subject: [PATCH 5/9] Python black fixes.

---
 bin/collect_QC.py |  4 ++--
 bin/spot2cell.py  | 51 ++++++++++++++++++++++++++++++-----------------
 2 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/bin/collect_QC.py b/bin/collect_QC.py
index f50d0e7..e2337bc 100755
--- a/bin/collect_QC.py
+++ b/bin/collect_QC.py
@@ -15,7 +15,7 @@ def summarize_spots(spot_table):
     total_spots = spot_table.shape[0]
 
     ## Get list of genes
-    genes = spot_table['gene'].unique()
+    genes = spot_table["gene"].unique()
 
     return (tx_per_gene, total_spots, genes)
 
@@ -52,7 +52,7 @@ def summarize_segmasks(cellxgene_table, spots_summary):
     args = parser.parse_args()
 
     ## Read in cellxgene_table table
-    cellxgene_table = pd.read_csv(args.cellxgene, sep =",")
+    cellxgene_table = pd.read_csv(args.cellxgene, sep=",")
 
     ## Read in spot table
     spots = pd.read_table(args.spots, sep="\t", names=["x", "y", "z", "gene"])
diff --git a/bin/spot2cell.py b/bin/spot2cell.py
index 693d44b..c0fe42f 100755
--- a/bin/spot2cell.py
+++ b/bin/spot2cell.py
@@ -8,13 +8,26 @@
 import argparse
 import os
 
-def assign_spots2cell(spot_table, cell_mask):
 
+def assign_spots2cell(spot_table, cell_mask):
     # Initialize a dictionary to hold the counts
     gene_counts = {}
 
     # Calculate cell properties for cell_mask using regionprops_table
-    cell_props = regionprops_table(cell_mask, properties=["label", "centroid", "area", "major_axis_length", "minor_axis_length", "eccentricity", "solidity", "extent", "orientation"])
+    cell_props = regionprops_table(
+        cell_mask,
+        properties=[
+            "label",
+            "centroid",
+            "area",
+            "major_axis_length",
+            "minor_axis_length",
+            "eccentricity",
+            "solidity",
+            "extent",
+            "orientation",
+        ],
+    )
 
     # Turn cell props into a pandas DataFrame and add a Cell_ID column
     name_map = {
@@ -27,7 +40,7 @@ def assign_spots2cell(spot_table, cell_mask):
         "Eccentricity": "eccentricity",
         "Solidity": "solidity",
         "Extent": "extent",
-        "Orientation": "orientation"
+        "Orientation": "orientation",
     }
 
     for new_name, old_name in name_map.items():
@@ -39,14 +52,14 @@ def assign_spots2cell(spot_table, cell_mask):
     cell_props = pd.DataFrame(cell_props)
 
     # Exclude any rows that contain Duplicated in the gene column from spot_table
-    spot_table = spot_table[~spot_table['gene'].str.contains("Duplicated")]
+    spot_table = spot_table[~spot_table["gene"].str.contains("Duplicated")]
 
     # Iterate over each row in the grouped DataFrame
-    for index,row in spot_table.iterrows():
+    for index, row in spot_table.iterrows():
         # Get the x and y positions and gene
-        x = int(row['x'])
-        y = int(row['y'])
-        gene = row['gene']
+        x = int(row["x"])
+        y = int(row["y"])
+        gene = row["gene"]
 
         # Get the cell ID from the labeled mask
         cell_id = cell_mask[y, x]
@@ -69,27 +82,28 @@ def assign_spots2cell(spot_table, cell_mask):
     gene_counts_df = pd.DataFrame(gene_counts).T
 
     # Add a column to gene_counts_df for the Cell_ID, make it the first column of the table
-    gene_counts_df['CellID'] = gene_counts_df.index
+    gene_counts_df["CellID"] = gene_counts_df.index
 
     # Add the regionprops data from cell_props for each cell ID to gene_counts_df add NA when cell_ID exists in cell_props but not in gene_counts_df
-    gene_counts_df = gene_counts_df.merge(cell_props, on='CellID', how='outer')
+    gene_counts_df = gene_counts_df.merge(cell_props, on="CellID", how="outer")
 
     # Convert NaN values to 0
     gene_counts_df = gene_counts_df.fillna(0)
 
     # Sort by Cell_ID in ascending order
-    gene_counts_df = gene_counts_df.sort_values(by=['CellID'])
+    gene_counts_df = gene_counts_df.sort_values(by=["CellID"])
 
     # Make Cell_ID the first column in gene_counts_df
-    gene_counts_df = gene_counts_df.set_index('CellID').reset_index()
+    gene_counts_df = gene_counts_df.set_index("CellID").reset_index()
 
     # Filter out cell_ID = 0 into it's own dataframe called background
-    background = gene_counts_df[gene_counts_df['CellID'] == 0]
-    gene_counts_df = gene_counts_df[gene_counts_df['CellID'] != 0]
+    background = gene_counts_df[gene_counts_df["CellID"] == 0]
+    gene_counts_df = gene_counts_df[gene_counts_df["CellID"] != 0]
 
     # Return both gene_counts_df and background
     return gene_counts_df, background
 
+
 if __name__ == "__main__":
     # Add a python argument parser with options for input, output and image size in x and y
     parser = argparse.ArgumentParser()
@@ -99,13 +113,14 @@ def assign_spots2cell(spot_table, cell_mask):
     args = parser.parse_args()
 
     ## Read in spot table
-    spot_data = pd.read_csv(args.spot_table,
-                names=['x', 'y', 'z', 'gene',"empty"], sep='\t', header=None,index_col=None)
+    spot_data = pd.read_csv(
+        args.spot_table, names=["x", "y", "z", "gene", "empty"], sep="\t", header=None, index_col=None
+    )
 
-    cell_mask  = tiff.imread(args.cell_mask)
+    cell_mask = tiff.imread(args.cell_mask)
 
     gene_counts_df, background = assign_spots2cell(spot_data, cell_mask)
 
     basename = os.path.basename(args.spot_table)
     basename = os.path.splitext(basename)[0]
-    gene_counts_df.to_csv(f"{basename}.cellxgene.tsv", sep=',',  header=True, index=False)
+    gene_counts_df.to_csv(f"{basename}.cellxgene.tsv", sep=",", header=True, index=False)

From 58ec42f6b531d9664ec6536609422740ee23a7a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kre=C5=A1imir=20Be=C5=A1tak?= <kbestak@gmail.com>
Date: Tue, 24 Oct 2023 15:04:42 +0000
Subject: [PATCH 6/9] Fixed multiqc break, removed Project_spots and MCQuant,
 added Duplicated spot count to QC

---
 CHANGELOG.md                     |  7 +++-
 assets/multiqc_config.yml        |  3 ++
 bin/collect_QC.py                |  9 +++--
 bin/project_spots.dask.py        | 64 -----------------------------
 bin/spot2cell.py                 |  6 ++-
 conf/modules.config              | 15 +------
 modules/local/molcart_qc.nf      |  2 +-
 modules/local/project_spots.nf   | 26 ------------
 modules/local/spot2cell.nf       |  8 ++--
 modules/nf-core/mcquant/main.nf  | 49 -----------------------
 modules/nf-core/mcquant/meta.yml | 69 --------------------------------
 workflows/molkart.nf             | 33 +++------------
 12 files changed, 32 insertions(+), 259 deletions(-)
 delete mode 100755 bin/project_spots.dask.py
 delete mode 100644 modules/local/project_spots.nf
 delete mode 100644 modules/nf-core/mcquant/main.nf
 delete mode 100644 modules/nf-core/mcquant/meta.yml

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 034ed53..bb20c4c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Replace `PROJECT_SPOTS` and `MCQUANT` modules with spot2cells. This new (for now local) module reduces the RAM requirements drastically, because it doesn't create a multi-channel stack for the spots. Spots are assigned by looking up cell IDs at x,y, positions and iterating over the deduplicated spots table.
 - Added process labels to many modules to fix linting warnings
-- Added meta map to molcart_qc output to remove linting warning
+- Added meta map to molcart_qc output to remove linting warning -- adjusted script for multiqc input accordingly
+- Added duplicated spots counts to collect_qc.py and multiqc_config.yml so that they also get counted.
+- Added tag option to spot2cell so that the output names with same sample id and different segmentation methods can be differentiated (they were overwriting each other previously)
+- removed project spots and mcquant from modules.config
+- changed pattern for molcart_qc as it was not matching the files (removed {})
+- added meta value to segmethod input in molcart_qc
 
 ## v1.0.1dev - [2023.22.10]
 
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 88a7947..3edf5af 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -47,6 +47,9 @@ custom_data:
       spot_assign_percent:
         title: Percentage of spots assigned to cell
         description: "% of spots assigned to cells"
+      duplicated_total:
+        title: Total number of duplicated spots in the area
+        description: "Total number of duplicated spots"
 sp:
   segmentation_stats:
     fn: "final_QC.all_samples.csv"
diff --git a/bin/collect_QC.py b/bin/collect_QC.py
index e2337bc..c96d397 100755
--- a/bin/collect_QC.py
+++ b/bin/collect_QC.py
@@ -32,8 +32,10 @@ def summarize_segmasks(cellxgene_table, spots_summary):
     spot_assign = cellxgene_table[spots_summary[2]].sum(axis=1)
     spot_assign_total = int(sum(spot_assign))
     spot_assign_per_cell = total_cells and spot_assign_total / total_cells or 0
+    spot_assign_per_cell = round(spot_assign_per_cell, 2)
     # spot_assign_per_cell = spot_assign_total / total_cells
     spot_assign_percent = spot_assign_total / spots_summary[1] * 100
+    spot_assign_percent = round(spot_assign_percent, 2)
 
     return (total_cells, avg_area, spot_assign_per_cell, spot_assign_total, spot_assign_percent)
 
@@ -44,9 +46,7 @@ def summarize_segmasks(cellxgene_table, spots_summary):
     parser.add_argument("-i", "--cellxgene", help="cellxgene regionprops_table.")
     parser.add_argument("-s", "--spots", help="Resolve biosciences spot table.")
     parser.add_argument("-o", "--outdir", help="Output directory.")
-
     parser.add_argument("-d", "--sample_id", help="Sample ID.")
-
     parser.add_argument("-g", "--segmentation_method", help="Segmentation method used.")
 
     args = parser.parse_args()
@@ -56,6 +56,7 @@ def summarize_segmasks(cellxgene_table, spots_summary):
 
     ## Read in spot table
     spots = pd.read_table(args.spots, sep="\t", names=["x", "y", "z", "gene"])
+    duplicated = sum(spots.gene.str.contains("Duplicated"))
     spots = spots[~spots.gene.str.contains("Duplicated")]
 
     ## Summarize spots table
@@ -73,6 +74,7 @@ def summarize_segmasks(cellxgene_table, spots_summary):
             "spot_assign_per_cell",
             "spot_assign_total",
             "spot_assign_percent",
+            "duplicated_total"
         ]
     )
     summary_df.loc[0] = [
@@ -85,8 +87,9 @@ def summarize_segmasks(cellxgene_table, spots_summary):
         summary_segmentation[2],
         summary_segmentation[3],
         summary_segmentation[4],
+        duplicated
     ]
-
+    print(args.sample_id)
     # Write summary_df to a csv file
     summary_df.to_csv(
         f"{args.outdir}/{args.sample_id}.{args.segmentation_method}.spot_QC.csv", header=True, index=False
diff --git a/bin/project_spots.dask.py b/bin/project_spots.dask.py
deleted file mode 100755
index ac32400..0000000
--- a/bin/project_spots.dask.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env python
-
-#### This script takes a table of Molecular Cartography spots as input and projects them
-#### onto a reference image. The output is a stack of images, one for each spot, with
-#### the spot projected onto the reference image shape.
-
-## Import packages
-import argparse
-import pandas as pd
-import numpy as np
-import dask.array as da
-import dask.dataframe as dd
-import tifffile
-from rich.progress import track
-from aicsimageio.writers import OmeTiffWriter
-
-
-# Make a function to project a table of spots with x,y coordinates onto a 2d plane based on reference image shape and add any duplicate spots to increase their pixel value in the output image
-def project_spots(spot_table, img):
-    # Initialize an empty image with the same shape as the reference image
-    img = np.zeros_like(img, dtype="int8")
-    # Iterate through each spot in the table
-    for spot in spot_table.itertuples():
-        # Add the corresponding spot count to the pixel value at the spot's x,y coordinates
-        img[spot.y, spot.x] += spot.counts
-    return img
-
-
-if __name__ == "__main__":
-    # Add a python argument parser with options for input, output and image size in x and y
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-i", "--input", help="Spot table to project.")
-    parser.add_argument("-s", "--sample_id", help="Sample ID.")
-    parser.add_argument("-d", "--img_dims", dest="img_dims", help="Corresponding image to get dimensions from.")
-
-    args = parser.parse_args()
-
-    # spots = pd.read_csv(args.input)
-    spots = dd.read_table(args.input, sep="\t", names=["x", "y", "z", "gene"]).compute()
-    img = tifffile.imread(args.img_dims)
-
-    spots = spots[["y", "x", "gene"]]
-
-    ## Filter any genes marked with Duplicated
-    spots = spots[~spots.gene.str.contains("Duplicated")]
-
-    # Sum spots by z-axis
-    spots_zsum = spots.value_counts().to_frame("counts").reset_index()
-
-    # Project each gene into a 2d plane and add to list
-    # Add a printed message that says "Projecting spots for gene X" for each gene in the list
-    spots_2d_list = [
-        project_spots(spots_zsum[spots_zsum.gene == gene], img)
-        for gene in track(spots_zsum.gene.unique(), description="[green]Projecting spots...")
-    ]
-
-    # Stack images on the c-axis
-    spot_2d_stack = da.stack(spots_2d_list, axis=0)
-    ## Write a csv file containing the channel names
-    channel_names = spots_zsum.gene.unique().tolist()
-    pd.DataFrame(channel_names).to_csv(args.sample_id + ".channel_names.csv", index=False, header=False)
-
-    # tifffile.imwrite(args.output, spot_2d_stack, metadata={'axes': 'CYX'})
-    OmeTiffWriter.save(spot_2d_stack, args.sample_id + ".tiff", dim_order="CYX")
diff --git a/bin/spot2cell.py b/bin/spot2cell.py
index c0fe42f..72a5970 100755
--- a/bin/spot2cell.py
+++ b/bin/spot2cell.py
@@ -84,6 +84,7 @@ def assign_spots2cell(spot_table, cell_mask):
     # Add a column to gene_counts_df for the Cell_ID, make it the first column of the table
     gene_counts_df["CellID"] = gene_counts_df.index
 
+
     # Add the regionprops data from cell_props for each cell ID to gene_counts_df add NA when cell_ID exists in cell_props but not in gene_counts_df
     gene_counts_df = gene_counts_df.merge(cell_props, on="CellID", how="outer")
 
@@ -96,6 +97,8 @@ def assign_spots2cell(spot_table, cell_mask):
     # Make Cell_ID the first column in gene_counts_df
     gene_counts_df = gene_counts_df.set_index("CellID").reset_index()
 
+    gene_counts_df[spot_table.gene.unique()] = gene_counts_df[spot_table.gene.unique()].astype(int)
+
     # Filter out cell_ID = 0 into it's own dataframe called background
     background = gene_counts_df[gene_counts_df["CellID"] == 0]
     gene_counts_df = gene_counts_df[gene_counts_df["CellID"] != 0]
@@ -109,6 +112,7 @@ def assign_spots2cell(spot_table, cell_mask):
     parser = argparse.ArgumentParser()
     parser.add_argument("-s", "--spot_table", help="Spot table to project.")
     parser.add_argument("-c", "--cell_mask", help="Sample ID.")
+    parser.add_argument("--tag", type=str, help="Additional tag to append to filename")
 
     args = parser.parse_args()
 
@@ -123,4 +127,4 @@ def assign_spots2cell(spot_table, cell_mask):
 
     basename = os.path.basename(args.spot_table)
     basename = os.path.splitext(basename)[0]
-    gene_counts_df.to_csv(f"{basename}.cellxgene.tsv", sep=",", header=True, index=False)
+    gene_counts_df.to_csv(f"{basename}.{args.tag}.cellxgene.csv", sep=",", header=True, index=False)
diff --git a/conf/modules.config b/conf/modules.config
index 4688be9..6834c35 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -38,7 +38,7 @@ process {
     withName: 'MOLCART_QC' {
         publishDir = [
             path: { "${params.outdir}/molcart_qc" },
-            pattern: { "*.csv" }
+            pattern: "*.csv"
         ]
     }
 
@@ -91,14 +91,6 @@ process {
         ].join(" ").trim()
     }
 
-    withName: "PROJECT_SPOTS" {
-        memory     = "16GB"
-        publishDir = [
-            path: "${params.outdir}/projectedspots",
-            pattern: "*.{tiff,csv}"
-        ]
-    }
-
     withName: "CLAHE_DASK" {
         memory    = "16GB"
         ext.when  = { !params.skip_clahe }
@@ -150,9 +142,4 @@ process {
             saveAs: { filename -> "${meta.id}_cellpose_mask.tif" }
         ]
     }
-
-    withName: "MCQUANT" {
-        ext.args = "--intensity_props intensity_sum"
-    }
-
 }
diff --git a/modules/local/molcart_qc.nf b/modules/local/molcart_qc.nf
index c4a0696..54b40c3 100644
--- a/modules/local/molcart_qc.nf
+++ b/modules/local/molcart_qc.nf
@@ -6,7 +6,7 @@ process MOLCART_QC{
     input:
     tuple val(meta), path(cellxgene_table)
     tuple val(meta2), path(spot_table)
-    val(segmethod)
+    tuple val(meta3), val(segmethod)
 
     output:
     tuple val(meta), path("*.csv"), emit: qc
diff --git a/modules/local/project_spots.nf b/modules/local/project_spots.nf
deleted file mode 100644
index c184510..0000000
--- a/modules/local/project_spots.nf
+++ /dev/null
@@ -1,26 +0,0 @@
-process PROJECT_SPOTS{
-    debug false
-    tag "Projecting spots $meta.id"
-    label 'process_medium'
-
-    container 'docker.io/wuennemannflorian/project_spots:latest'
-
-    input:
-    tuple val(meta), path(spots)
-    path(img)
-
-    output:
-    tuple val(meta), path("${spots.baseName}.tiff"), emit: img_spots
-    tuple val(meta), path("${spots.baseName}.channel_names.csv"), emit: channel_names
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    """
-    project_spots.dask.py \
-    --input ${spots} \
-    --sample_id ${spots.baseName} \
-    --img_dims $img
-    """
-}
diff --git a/modules/local/spot2cell.nf b/modules/local/spot2cell.nf
index 983e7e0..7b8e3d8 100644
--- a/modules/local/spot2cell.nf
+++ b/modules/local/spot2cell.nf
@@ -8,9 +8,10 @@ process SPOT2CELL{
     input:
     tuple val(meta) , path(spot_table)
     tuple val(meta2), path(cell_mask)
+    val(tag)
 
     output:
-    tuple val(meta), path("*cellxgene.tsv"), emit: cellxgene_table
+    tuple val(meta), path("*cellxgene.csv"), emit: cellxgene_table
 
     when:
     task.ext.when == null || task.ext.when
@@ -18,7 +19,8 @@ process SPOT2CELL{
     script:
     """
     spot2cell.py \
-    --spot_table ${spot_table} \
-    --cell_mask ${cell_mask}
+        --spot_table ${spot_table} \
+        --cell_mask ${cell_mask} \
+        --tag ${tag}
     """
 }
diff --git a/modules/nf-core/mcquant/main.nf b/modules/nf-core/mcquant/main.nf
deleted file mode 100644
index bc0eedf..0000000
--- a/modules/nf-core/mcquant/main.nf
+++ /dev/null
@@ -1,49 +0,0 @@
-process MCQUANT {
-    tag "$meta.id"
-    label 'process_single'
-
-    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
-    container "docker.io/labsyspharm/quantification:1.5.4"
-
-    input:
-    tuple val(meta), path(image)
-    tuple val(meta2), path(mask)
-    tuple val(meta3), path(markerfile)
-
-    output:
-    tuple val(meta), path("*.csv"), emit: csv
-    path "versions.yml"           , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def VERSION = '1.5.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
-    """
-    python /app/CommandSingleCellExtraction.py \
-        --masks $mask \
-        --image $image \
-        --channel_names $markerfile \
-        --output . \
-        $args
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        mcquant: $VERSION
-    END_VERSIONS
-    """
-
-    stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def VERSION = '1.5.4'
-    """
-    touch ${prefix}.csv
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        mcquant: $VERSION
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/mcquant/meta.yml b/modules/nf-core/mcquant/meta.yml
deleted file mode 100644
index 5400314..0000000
--- a/modules/nf-core/mcquant/meta.yml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "mcquant"
-description: write your description here
-keywords:
-  - quantification
-  - image_analysis
-  - mcmicro
-  - highly_multiplexed_imaging
-tools:
-  - "mcquant":
-      description: "Module for single-cell data extraction given a segmentation mask and multi-channel image. The CSV structure is aligned with histoCAT output."
-      homepage: "https://github.com/labsyspharm/quantification"
-      documentation: "https://github.com/labsyspharm/quantification/blob/master/README.md"
-      tool_dev_url: "https://github.com/labsyspharm/quantification"
-      doi: 10.1038/s41592-021-01308-y
-      licence: ""
-
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-
-  - image:
-      type: file
-      description: Multi-channel image file
-      pattern: "*.{tiff,tif,h5,hdf5}"
-
-  - meta2:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-
-  - mask:
-      type: file
-      description: Labeled segmentation mask for image
-      pattern: "*.{tiff,tif,h5,hdf5}"
-
-  - meta3:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-
-  - markerfile:
-      type: file
-      description: Marker file with channel names for image to quantify
-      pattern: "*.{csv}"
-
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-
-  - csv:
-      type: file
-      description: Quantified regionprops_table
-      pattern: "*.{csv}"
-
-authors:
-  - "@FloWuenne"
diff --git a/workflows/molkart.nf b/workflows/molkart.nf
index f0784f0..98951b4 100644
--- a/workflows/molkart.nf
+++ b/workflows/molkart.nf
@@ -248,32 +248,10 @@ workflow MOLKART {
 
     SPOT2CELL(
         dedup_spots.map(it -> tuple(it[0],it[1])),
-        dedup_spots.map(it -> tuple(it[0],it[2]))
+        dedup_spots.map(it -> tuple(it[0],it[2])),
+        dedup_spots.map(it -> it[0].segmentation)
     )
 
-    // PROJECT_SPOTS.out.img_spots
-    //     .join(PROJECT_SPOTS.out.channel_names)
-    //     .map{
-    //         meta,tiff,channels -> [meta,tiff,channels]
-    //         }
-    //     .combine(segmentation_masks, by: 0)
-    //     .map {
-    //         meta, tiff, channels, mask, seg ->
-    //         new_meta = meta.clone()
-    //         new_meta.segmentation = seg
-    //         [new_meta, tiff, channels, mask]
-    //     }.set{ mcquant_in }
-
-    //
-    // MODULE: MCQuant
-    //
-    // MCQUANT(
-    //     mcquant_in.map{it -> tuple(it[0],it[1])},
-    //     mcquant_in.map{it -> tuple(it[0],it[3])},
-    //     mcquant_in.map{it -> tuple(it[0],it[2])}
-    //     )
-    // ch_versions = ch_versions.mix(MCQUANT.out.versions)
-
     //
     // MODULE: MOLCART_QC
     //
@@ -286,18 +264,16 @@ workflow MOLKART {
     qc_spots
         .combine(spot2cell_out, by: 0)
         .set{ molcart_qc }
-    molcart_qc.view()
 
     MOLCART_QC(
             molcart_qc.map{it -> tuple(it[0],it[2])},
             molcart_qc.map{it -> tuple(it[0],it[1])},
-            molcart_qc.map{it -> it[3]}
+            molcart_qc.map{it -> tuple(it[0],it[3])}
         )
 
     CUSTOM_DUMPSOFTWAREVERSIONS (
         ch_versions.unique().collectFile(name: 'collated_versions.yml')
     )
-
     //
     // MODULE: MultiQC
     //
@@ -306,7 +282,8 @@ workflow MOLKART {
     methods_description    = WorkflowMolkart.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params)
     ch_methods_description = Channel.value(methods_description)
 
-    ch_multiqc_files = MOLCART_QC.out.qc.collectFile(name: 'final_QC.all_samples.csv',keepHeader: true, storeDir: "$params.outdir" )
+    ch_multiqc_files = Channel.empty()
+    ch_multiqc_files = ch_multiqc_files.mix(MOLCART_QC.out.qc.map{it[1]}.collectFile(name: 'final_QC.all_samples.csv', keepHeader: true, storeDir: "$params.outdir"))
     ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
     ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
     ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())

From 9123124941bf284c0ca56784be8a2b11ef141c87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kre=C5=A1imir=20Be=C5=A1tak?= <kbestak@gmail.com>
Date: Tue, 24 Oct 2023 15:06:53 +0000
Subject: [PATCH 7/9] Updated CHANGELOG.md

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bb20c4c..b8695d7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - removed project spots and mcquant from modules.config
 - changed pattern for molcart_qc as it was not matching the files (removed {})
 - added meta value to segmethod input in molcart_qc
+- spot counts are now int values
+- QC metrics rounded to 2 decimals
 
 ## v1.0.1dev - [2023.22.10]
 

From 1cd329a71f902ef29b9c5687abac962bb8703496 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kre=C5=A1imir=20Be=C5=A1tak?= <kbestak@gmail.com>
Date: Tue, 24 Oct 2023 15:15:27 +0000
Subject: [PATCH 8/9] Updated scripts with Python Black

---
 bin/collect_QC.py | 4 ++--
 bin/spot2cell.py  | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/bin/collect_QC.py b/bin/collect_QC.py
index c96d397..09dffce 100755
--- a/bin/collect_QC.py
+++ b/bin/collect_QC.py
@@ -74,7 +74,7 @@ def summarize_segmasks(cellxgene_table, spots_summary):
             "spot_assign_per_cell",
             "spot_assign_total",
             "spot_assign_percent",
-            "duplicated_total"
+            "duplicated_total",
         ]
     )
     summary_df.loc[0] = [
@@ -87,7 +87,7 @@ def summarize_segmasks(cellxgene_table, spots_summary):
         summary_segmentation[2],
         summary_segmentation[3],
         summary_segmentation[4],
-        duplicated
+        duplicated,
     ]
     print(args.sample_id)
     # Write summary_df to a csv file
diff --git a/bin/spot2cell.py b/bin/spot2cell.py
index 72a5970..eb218b3 100755
--- a/bin/spot2cell.py
+++ b/bin/spot2cell.py
@@ -84,7 +84,6 @@ def assign_spots2cell(spot_table, cell_mask):
     # Add a column to gene_counts_df for the Cell_ID, make it the first column of the table
     gene_counts_df["CellID"] = gene_counts_df.index
 
-
     # Add the regionprops data from cell_props for each cell ID to gene_counts_df add NA when cell_ID exists in cell_props but not in gene_counts_df
     gene_counts_df = gene_counts_df.merge(cell_props, on="CellID", how="outer")
 

From 91d31bd4684caec00ef5d05274c1df6654ee92f6 Mon Sep 17 00:00:00 2001
From: Florian Wuennemann <flowuenne@gmail.com>
Date: Wed, 25 Oct 2023 13:55:43 +0000
Subject: [PATCH 9/9] Removed mcquant completely.

---
 modules.json         | 5 -----
 workflows/molkart.nf | 5 -----
 2 files changed, 10 deletions(-)

diff --git a/modules.json b/modules.json
index d6da2cc..4915732 100644
--- a/modules.json
+++ b/modules.json
@@ -30,11 +30,6 @@
                         "git_sha": "6f150e1503c0826c21fedf1fa566cdbecbe98ec7",
                         "installed_by": ["modules"]
                     },
-                    "mcquant": {
-                        "branch": "master",
-                        "git_sha": "b9829e1064382745d8dff7f1d74d2138d2864f71",
-                        "installed_by": ["modules"]
-                    },
                     "mindagap/mindagap": {
                         "branch": "master",
                         "git_sha": "240937a2a9c30298110753292be041188891f2cb",
diff --git a/workflows/molkart.nf b/workflows/molkart.nf
index 98951b4..3eaaaa4 100644
--- a/workflows/molkart.nf
+++ b/workflows/molkart.nf
@@ -165,11 +165,6 @@ workflow MOLKART {
     MINDAGAP_DUPLICATEFINDER(spot_tuple)
     ch_versions = ch_versions.mix(MINDAGAP_DUPLICATEFINDER.out.versions)
 
-    //
-    // MODULE: PROJECT SPOTS
-    //
-    // Transform spot table to 2 dimensional numpy array to use with MCQUANT
-
     qc_spots = MINDAGAP_DUPLICATEFINDER.out.marked_dups_spots
 
     //