From f6cba52037205bba5846708a00431d266ad7afa2 Mon Sep 17 00:00:00 2001 From: Florian Wuennemann Date: Mon, 23 Oct 2023 23:43:12 +0000 Subject: [PATCH 1/9] Replace project spots and mcquant with spots2cell. --- bin/spot2cell.py | 108 +++++++++++++++++++++++++++++++++ modules/local/project_spots.nf | 1 + modules/local/spot2cell.nf | 24 ++++++++ workflows/molkart.nf | 61 +++++++++---------- 4 files changed, 162 insertions(+), 32 deletions(-) create mode 100755 bin/spot2cell.py create mode 100644 modules/local/spot2cell.nf diff --git a/bin/spot2cell.py b/bin/spot2cell.py new file mode 100755 index 0000000..c294da7 --- /dev/null +++ b/bin/spot2cell.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python + +## Import packages +import pandas as pd +import numpy as np +from skimage.measure import regionprops_table +import tifffile as tiff +import argparse +import os + +def assign_spots2cell(spot_table, cell_mask): + + # Initialize a dictionary to hold the counts + gene_counts = {} + + # Calculate cell properties for cell_mask using regionprops_table + cell_props = regionprops_table(cell_mask, + properties=["label", "centroid", "area", "major_axis_length", "minor_axis_length", "eccentricity", "solidity", "extent", "orientation"]) + #properties=["label","centroid","area"]) + # Turn cell props into a pandas DataFrame and add a Cell_ID column + name_map = { + "CellID": "label", + "X_centroid": "centroid-1", + "Y_centroid": "centroid-0", + "Area": "area", + "MajorAxisLength": "major_axis_length", + "MinorAxisLength": "minor_axis_length", + "Eccentricity": "eccentricity", + "Solidity": "solidity", + "Extent": "extent", + "Orientation": "orientation", + } + for new_name, old_name in name_map.items(): + cell_props[new_name] = cell_props[old_name] + + cell_props = pd.DataFrame(cell_props) + + # Exclude any rows that contain Duplicated in the gene column from spot_table + spot_table = spot_table[~spot_table['gene'].str.contains("Duplicated")] + + # Iterate over each row in the grouped DataFrame + for index,row in spot_table.iterrows(): + # Get the x and y positions and gene + x = int(row['x']) + y = int(row['y']) + gene = row['gene'] + + # Get the cell ID from the labeled mask + cell_id = cell_mask[y, x] + + # If the cell ID is not in the dictionary, add it + if cell_id not in gene_counts: + gene_counts[cell_id] = {} + if gene not in gene_counts[cell_id]: + gene_counts[cell_id][gene] = 1 + else: + gene_counts[cell_id][gene] += 1 + else: + if gene not in gene_counts[cell_id]: + gene_counts[cell_id][gene] = 1 + else: + # Add the count for this gene in this cell ID + gene_counts[cell_id][gene] += 1 + + # Convert the dictionary of counts into a DataFrame + gene_counts_df = pd.DataFrame(gene_counts).T + + # Add a column to gene_counts_df for the Cell_ID, make it the first column of the table + gene_counts_df['CellID'] = gene_counts_df.index + + # Add the regionprops data from cell_props for each cell ID to gene_counts_df add NA when cell_ID exists in cell_props but not in gene_counts_df + gene_counts_df = gene_counts_df.merge(cell_props, on='CellID', how='outer') + + # Convert NaN values to 0 + gene_counts_df = gene_counts_df.fillna(0) + + # Sort by Cell_ID in ascending order + gene_counts_df = gene_counts_df.sort_values(by=['CellID']) + + # Make Cell_ID the first column in gene_counts_df + gene_counts_df = gene_counts_df.set_index('CellID').reset_index() + + # Filter out cell_ID = 0 into it's own dataframe called background + background = gene_counts_df[gene_counts_df['CellID'] == 0] + gene_counts_df = gene_counts_df[gene_counts_df['CellID'] != 0] + + # Return both gene_counts_df and background + return gene_counts_df, background + +if __name__ == "__main__": + # Add a python argument parser with options for input, output and image size in x and y + parser = argparse.ArgumentParser() + parser.add_argument("-s", "--spot_table", help="Spot table to project.") + parser.add_argument("-c", "--cell_mask", help="Sample ID.") + + args = parser.parse_args() + + ## Read in spot table + spot_data = pd.read_csv(args.spot_table, + names=['x', 'y', 'z', 'gene',"empty"], sep='\t', header=None,index_col=None) + + cell_mask = tiff.imread(args.cell_mask) + + gene_counts_df, background = assign_spots2cell(spot_data, cell_mask) + + basename = os.path.basename(args.spot_table) + basename = os.path.splitext(basename)[0] + gene_counts_df.to_csv(f"{basename}.cellxgene.tsv", sep='\t', header=True, index=False) diff --git a/modules/local/project_spots.nf b/modules/local/project_spots.nf index 669d759..c184510 100644 --- a/modules/local/project_spots.nf +++ b/modules/local/project_spots.nf @@ -1,6 +1,7 @@ process PROJECT_SPOTS{ debug false tag "Projecting spots $meta.id" + label 'process_medium' container 'docker.io/wuennemannflorian/project_spots:latest' diff --git a/modules/local/spot2cell.nf b/modules/local/spot2cell.nf new file mode 100644 index 0000000..983e7e0 --- /dev/null +++ b/modules/local/spot2cell.nf @@ -0,0 +1,24 @@ +process SPOT2CELL{ + debug true + tag "Assigning spots to cells for $meta.id" + label 'process_single' + + container 'ghcr.io/schapirolabor/background_subtraction:v0.3.3' + + input: + tuple val(meta) , path(spot_table) + tuple val(meta2), path(cell_mask) + + output: + tuple val(meta), path("*cellxgene.tsv"), emit: cellxgene_table + + when: + task.ext.when == null || task.ext.when + + script: + """ + spot2cell.py \ + --spot_table ${spot_table} \ + --cell_mask ${cell_mask} + """ +} diff --git a/workflows/molkart.nf b/workflows/molkart.nf index 4948573..302cc8c 100644 --- a/workflows/molkart.nf +++ b/workflows/molkart.nf @@ -37,7 +37,7 @@ include { CREATEILASTIKTRAININGSUBSET } from '../modules/local/createilastiktrai include { CREATE_STACK } from '../modules/local/create_stack' include { CLAHE_DASK } from '../modules/local/clahe_dask' include { MINDAGAP_DUPLICATEFINDER } from '../modules/local/mindagap_duplicatefinder' -include { PROJECT_SPOTS } from '../modules/local/project_spots' +include { SPOT2CELL } from '../modules/local/spot2cell' include { TIFFH5CONVERT } from '../modules/local/tiffh5convert' include { MOLCART_QC } from '../modules/local/molcart_qc' @@ -61,7 +61,6 @@ include { CELLPOSE } from '../modules/nf-core/cellpose/main' include { DEEPCELL_MESMER } from '../modules/nf-core/deepcell/mesmer/main' include { ILASTIK_PIXELCLASSIFICATION } from '../modules/nf-core/ilastik/pixelclassification/main' include { ILASTIK_MULTICUT } from '../modules/nf-core/ilastik/multicut/main' -include { MCQUANT } from '../modules/nf-core/mcquant/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -173,18 +172,6 @@ workflow MOLKART { qc_spots = MINDAGAP_DUPLICATEFINDER.out.marked_dups_spots - qc_spots.join( - image_tuple.map { - meta, tiff -> - [meta.subMap("id"), tiff] - } - ).set { dedup_spots } - - PROJECT_SPOTS( - dedup_spots.map(it -> tuple(it[0],it[1])), - dedup_spots.map(it -> it[2]) - ) - // // MODULE: DeepCell Mesmer segmentation // @@ -248,33 +235,43 @@ workflow MOLKART { .combine(Channel.of('ilastik'))) } - PROJECT_SPOTS.out.img_spots - .join(PROJECT_SPOTS.out.channel_names) - .map{ - meta,tiff,channels -> [meta,tiff,channels] - } + // Assigning of spots to mask + qc_spots .combine(segmentation_masks, by: 0) - .map { - meta, tiff, channels, mask, seg -> - new_meta = meta.clone() - new_meta.segmentation = seg - [new_meta, tiff, channels, mask] - }.set{ mcquant_in } + .set { dedup_spots } + + SPOT2CELL( + dedup_spots.map(it -> tuple(it[0],it[1])), + dedup_spots.map(it -> tuple(it[0],it[2])) + ) + + // PROJECT_SPOTS.out.img_spots + // .join(PROJECT_SPOTS.out.channel_names) + // .map{ + // meta,tiff,channels -> [meta,tiff,channels] + // } + // .combine(segmentation_masks, by: 0) + // .map { + // meta, tiff, channels, mask, seg -> + // new_meta = meta.clone() + // new_meta.segmentation = seg + // [new_meta, tiff, channels, mask] + // }.set{ mcquant_in } // // MODULE: MCQuant // - MCQUANT( - mcquant_in.map{it -> tuple(it[0],it[1])}, - mcquant_in.map{it -> tuple(it[0],it[3])}, - mcquant_in.map{it -> tuple(it[0],it[2])} - ) - ch_versions = ch_versions.mix(MCQUANT.out.versions) + // MCQUANT( + // mcquant_in.map{it -> tuple(it[0],it[1])}, + // mcquant_in.map{it -> tuple(it[0],it[3])}, + // mcquant_in.map{it -> tuple(it[0],it[2])} + // ) + // ch_versions = ch_versions.mix(MCQUANT.out.versions) // // MODULE: MOLCART_QC // - MCQUANT.out.csv + SPOT2CELL.out.cellxgene_table .map { meta, quant -> [meta.subMap("id"), quant, meta.segmentation] From b7b64443c06fa33968bf41112de5538c294dc777 Mon Sep 17 00:00:00 2001 From: Florian Wuennemann Date: Mon, 23 Oct 2023 23:57:53 +0000 Subject: [PATCH 2/9] Added process labels to modules missing it. --- CHANGELOG.md | 5 +++++ modules/local/clahe_dask.nf | 1 + modules/local/create_stack.nf | 1 + modules/local/molcart_qc.nf | 1 + 4 files changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62c7de8..5ebe370 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.0.1dev - [2023.23.10] + +- Replace `PROJECT_SPOTS` and `MCQUANT` modules with spot2cells. This new (for now local) module reduces the RAM requirements drastically, because it doesn't create a multi-channel stack for the spots. Spots are assigned by looking up cell IDs at x,y, positions and iterating over the deduplicated spots table. +- Added process labels to many modules to fix linting warnings + ## v1.0.1dev - [2023.22.10] Replaced the `clahe` param with `skip_clahe` so that the default value for running CLAHE is `False`. diff --git a/modules/local/clahe_dask.nf b/modules/local/clahe_dask.nf index e6a16ca..65e83c6 100644 --- a/modules/local/clahe_dask.nf +++ b/modules/local/clahe_dask.nf @@ -1,6 +1,7 @@ process CLAHE_DASK{ debug false tag "Applying CLAHE to $meta.id" + label 'process_low' container 'ghcr.io/schapirolabor/background_subtraction:v0.3.3' diff --git a/modules/local/create_stack.nf b/modules/local/create_stack.nf index 68cf74a..45dc867 100644 --- a/modules/local/create_stack.nf +++ b/modules/local/create_stack.nf @@ -1,5 +1,6 @@ process CREATE_STACK { tag "Stacking channels for $meta.id" + label 'process_medium' container 'ghcr.io/schapirolabor/background_subtraction:v0.3.3' diff --git a/modules/local/molcart_qc.nf b/modules/local/molcart_qc.nf index 3011a30..6e1f97d 100644 --- a/modules/local/molcart_qc.nf +++ b/modules/local/molcart_qc.nf @@ -1,6 +1,7 @@ process MOLCART_QC{ tag "${meta.id}" container 'docker.io/wuennemannflorian/project_spots:latest' + label 'process_single' input: tuple val(meta), path(mcquant) From 8e540c0d1bc4822c370e677bcb0a8b805483ed67 Mon Sep 17 00:00:00 2001 From: Florian Wuennemann Date: Tue, 24 Oct 2023 01:19:01 +0000 Subject: [PATCH 3/9] MolcartQC working with spots2cell. --- CHANGELOG.md | 1 + bin/collect_QC.py | 29 ++++++++++++++++------------- bin/spot2cell.py | 13 ++++++++----- modules/local/molcart_qc.nf | 4 ++-- workflows/molkart.nf | 13 ++++++++++--- 5 files changed, 37 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ebe370..034ed53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Replace `PROJECT_SPOTS` and `MCQUANT` modules with spot2cells. This new (for now local) module reduces the RAM requirements drastically, because it doesn't create a multi-channel stack for the spots. Spots are assigned by looking up cell IDs at x,y, positions and iterating over the deduplicated spots table. - Added process labels to many modules to fix linting warnings +- Added meta map to molcart_qc output to remove linting warning ## v1.0.1dev - [2023.22.10] diff --git a/bin/collect_QC.py b/bin/collect_QC.py index fb6ad13..f50d0e7 100755 --- a/bin/collect_QC.py +++ b/bin/collect_QC.py @@ -14,19 +14,22 @@ def summarize_spots(spot_table): ## Calculate the total number of spots in spot_table total_spots = spot_table.shape[0] - return (tx_per_gene, total_spots) + ## Get list of genes + genes = spot_table['gene'].unique() + return (tx_per_gene, total_spots, genes) -def summarize_segmasks(mcquant, spots_summary): - ## Calculate the total number of cells (rows) in mcquant - total_cells = mcquant.shape[0] - ## Calculate the average segmentation area from column Area in mcquant - avg_area = mcquant["Area"].mean() +def summarize_segmasks(cellxgene_table, spots_summary): + ## Calculate the total number of cells (rows) in cellxgene_table + total_cells = cellxgene_table.shape[0] + + ## Calculate the average segmentation area from column Area in cellxgene_table + avg_area = cellxgene_table["Area"].mean() ## Calculate the % of spots assigned - ## Subset mcquant for all columns with _intensity_sum in the column name and sum the column values - spot_assign = mcquant.filter(regex="_intensity_sum").sum(axis=1) + ## Subset cellxgene_table for all columns with _intensity_sum in the column name and sum the column values + spot_assign = cellxgene_table[spots_summary[2]].sum(axis=1) spot_assign_total = int(sum(spot_assign)) spot_assign_per_cell = total_cells and spot_assign_total / total_cells or 0 # spot_assign_per_cell = spot_assign_total / total_cells @@ -36,9 +39,9 @@ def summarize_segmasks(mcquant, spots_summary): if __name__ == "__main__": - # Write an argparse with input options mcquant_in, spots and output options outdir, sample_id + # Write an argparse with input options cellxgene_table, spots and output options outdir, sample_id parser = argparse.ArgumentParser() - parser.add_argument("-i", "--mcquant", help="mcquant regionprops_table.") + parser.add_argument("-i", "--cellxgene", help="cellxgene regionprops_table.") parser.add_argument("-s", "--spots", help="Resolve biosciences spot table.") parser.add_argument("-o", "--outdir", help="Output directory.") @@ -48,8 +51,8 @@ def summarize_segmasks(mcquant, spots_summary): args = parser.parse_args() - ## Read in mcquant table - mcquant = pd.read_csv(args.mcquant) + ## Read in cellxgene_table table + cellxgene_table = pd.read_csv(args.cellxgene, sep =",") ## Read in spot table spots = pd.read_table(args.spots, sep="\t", names=["x", "y", "z", "gene"]) @@ -57,7 +60,7 @@ def summarize_segmasks(mcquant, spots_summary): ## Summarize spots table summary_spots = summarize_spots(spots) - summary_segmentation = summarize_segmasks(mcquant, summary_spots) + summary_segmentation = summarize_segmasks(cellxgene_table, summary_spots) ## Create pandas data frame with one row per parameter and write each value in summary_segmentation to a new row in the data frame summary_df = pd.DataFrame( diff --git a/bin/spot2cell.py b/bin/spot2cell.py index c294da7..693d44b 100755 --- a/bin/spot2cell.py +++ b/bin/spot2cell.py @@ -14,9 +14,8 @@ def assign_spots2cell(spot_table, cell_mask): gene_counts = {} # Calculate cell properties for cell_mask using regionprops_table - cell_props = regionprops_table(cell_mask, - properties=["label", "centroid", "area", "major_axis_length", "minor_axis_length", "eccentricity", "solidity", "extent", "orientation"]) - #properties=["label","centroid","area"]) + cell_props = regionprops_table(cell_mask, properties=["label", "centroid", "area", "major_axis_length", "minor_axis_length", "eccentricity", "solidity", "extent", "orientation"]) + # Turn cell props into a pandas DataFrame and add a Cell_ID column name_map = { "CellID": "label", @@ -28,11 +27,15 @@ def assign_spots2cell(spot_table, cell_mask): "Eccentricity": "eccentricity", "Solidity": "solidity", "Extent": "extent", - "Orientation": "orientation", + "Orientation": "orientation" } + for new_name, old_name in name_map.items(): cell_props[new_name] = cell_props[old_name] + for old_name in set(name_map.values()): + del cell_props[old_name] + cell_props = pd.DataFrame(cell_props) # Exclude any rows that contain Duplicated in the gene column from spot_table @@ -105,4 +108,4 @@ def assign_spots2cell(spot_table, cell_mask): basename = os.path.basename(args.spot_table) basename = os.path.splitext(basename)[0] - gene_counts_df.to_csv(f"{basename}.cellxgene.tsv", sep='\t', header=True, index=False) + gene_counts_df.to_csv(f"{basename}.cellxgene.tsv", sep=',', header=True, index=False) diff --git a/modules/local/molcart_qc.nf b/modules/local/molcart_qc.nf index 6e1f97d..0dfd835 100644 --- a/modules/local/molcart_qc.nf +++ b/modules/local/molcart_qc.nf @@ -4,7 +4,7 @@ process MOLCART_QC{ label 'process_single' input: - tuple val(meta), path(mcquant) + tuple val(meta), path(cellxgene_table) tuple val(meta2), path(spot_table) val(segmethod) @@ -19,7 +19,7 @@ process MOLCART_QC{ def sample_id = "${meta.id}" """ collect_QC.py \ - --mcquant $mcquant \ + --cellxgene $cellxgene_table \ --spots $spot_table \ --sample_id $sample_id \ --segmentation_method $segmethod \ diff --git a/workflows/molkart.nf b/workflows/molkart.nf index 302cc8c..f0784f0 100644 --- a/workflows/molkart.nf +++ b/workflows/molkart.nf @@ -238,6 +238,12 @@ workflow MOLKART { // Assigning of spots to mask qc_spots .combine(segmentation_masks, by: 0) + .map { + meta, spots_table, mask, segmethod -> + new_meta = meta.clone() + new_meta.segmentation = segmethod + [new_meta, spots_table, mask] + } .set { dedup_spots } SPOT2CELL( @@ -275,11 +281,12 @@ workflow MOLKART { .map { meta, quant -> [meta.subMap("id"), quant, meta.segmentation] - }.set { mcquant_out } + }.set { spot2cell_out } - qc_spots.combine( - mcquant_out, by: 0) + qc_spots + .combine(spot2cell_out, by: 0) .set{ molcart_qc } + molcart_qc.view() MOLCART_QC( molcart_qc.map{it -> tuple(it[0],it[2])}, From de1389afccc98a24e30f655f77ca773022d130bd Mon Sep 17 00:00:00 2001 From: Florian Wuennemann Date: Tue, 24 Oct 2023 01:21:01 +0000 Subject: [PATCH 4/9] Added meta map to MOLCART_QC. --- modules/local/molcart_qc.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/molcart_qc.nf b/modules/local/molcart_qc.nf index 0dfd835..c4a0696 100644 --- a/modules/local/molcart_qc.nf +++ b/modules/local/molcart_qc.nf @@ -9,7 +9,7 @@ process MOLCART_QC{ val(segmethod) output: - path("*.csv"), emit: qc + tuple val(meta), path("*.csv"), emit: qc when: task.ext.when == null || task.ext.when From a4d746110db49795f3bae6b5ef213a1d46ce7ff3 Mon Sep 17 00:00:00 2001 From: Florian Wuennemann Date: Tue, 24 Oct 2023 01:26:46 +0000 Subject: [PATCH 5/9] Python black fixes. --- bin/collect_QC.py | 4 ++-- bin/spot2cell.py | 51 ++++++++++++++++++++++++++++++----------------- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/bin/collect_QC.py b/bin/collect_QC.py index f50d0e7..e2337bc 100755 --- a/bin/collect_QC.py +++ b/bin/collect_QC.py @@ -15,7 +15,7 @@ def summarize_spots(spot_table): total_spots = spot_table.shape[0] ## Get list of genes - genes = spot_table['gene'].unique() + genes = spot_table["gene"].unique() return (tx_per_gene, total_spots, genes) @@ -52,7 +52,7 @@ def summarize_segmasks(cellxgene_table, spots_summary): args = parser.parse_args() ## Read in cellxgene_table table - cellxgene_table = pd.read_csv(args.cellxgene, sep =",") + cellxgene_table = pd.read_csv(args.cellxgene, sep=",") ## Read in spot table spots = pd.read_table(args.spots, sep="\t", names=["x", "y", "z", "gene"]) diff --git a/bin/spot2cell.py b/bin/spot2cell.py index 693d44b..c0fe42f 100755 --- a/bin/spot2cell.py +++ b/bin/spot2cell.py @@ -8,13 +8,26 @@ import argparse import os -def assign_spots2cell(spot_table, cell_mask): +def assign_spots2cell(spot_table, cell_mask): # Initialize a dictionary to hold the counts gene_counts = {} # Calculate cell properties for cell_mask using regionprops_table - cell_props = regionprops_table(cell_mask, properties=["label", "centroid", "area", "major_axis_length", "minor_axis_length", "eccentricity", "solidity", "extent", "orientation"]) + cell_props = regionprops_table( + cell_mask, + properties=[ + "label", + "centroid", + "area", + "major_axis_length", + "minor_axis_length", + "eccentricity", + "solidity", + "extent", + "orientation", + ], + ) # Turn cell props into a pandas DataFrame and add a Cell_ID column name_map = { @@ -27,7 +40,7 @@ def assign_spots2cell(spot_table, cell_mask): "Eccentricity": "eccentricity", "Solidity": "solidity", "Extent": "extent", - "Orientation": "orientation" + "Orientation": "orientation", } for new_name, old_name in name_map.items(): @@ -39,14 +52,14 @@ def assign_spots2cell(spot_table, cell_mask): cell_props = pd.DataFrame(cell_props) # Exclude any rows that contain Duplicated in the gene column from spot_table - spot_table = spot_table[~spot_table['gene'].str.contains("Duplicated")] + spot_table = spot_table[~spot_table["gene"].str.contains("Duplicated")] # Iterate over each row in the grouped DataFrame - for index,row in spot_table.iterrows(): + for index, row in spot_table.iterrows(): # Get the x and y positions and gene - x = int(row['x']) - y = int(row['y']) - gene = row['gene'] + x = int(row["x"]) + y = int(row["y"]) + gene = row["gene"] # Get the cell ID from the labeled mask cell_id = cell_mask[y, x] @@ -69,27 +82,28 @@ def assign_spots2cell(spot_table, cell_mask): gene_counts_df = pd.DataFrame(gene_counts).T # Add a column to gene_counts_df for the Cell_ID, make it the first column of the table - gene_counts_df['CellID'] = gene_counts_df.index + gene_counts_df["CellID"] = gene_counts_df.index # Add the regionprops data from cell_props for each cell ID to gene_counts_df add NA when cell_ID exists in cell_props but not in gene_counts_df - gene_counts_df = gene_counts_df.merge(cell_props, on='CellID', how='outer') + gene_counts_df = gene_counts_df.merge(cell_props, on="CellID", how="outer") # Convert NaN values to 0 gene_counts_df = gene_counts_df.fillna(0) # Sort by Cell_ID in ascending order - gene_counts_df = gene_counts_df.sort_values(by=['CellID']) + gene_counts_df = gene_counts_df.sort_values(by=["CellID"]) # Make Cell_ID the first column in gene_counts_df - gene_counts_df = gene_counts_df.set_index('CellID').reset_index() + gene_counts_df = gene_counts_df.set_index("CellID").reset_index() # Filter out cell_ID = 0 into it's own dataframe called background - background = gene_counts_df[gene_counts_df['CellID'] == 0] - gene_counts_df = gene_counts_df[gene_counts_df['CellID'] != 0] + background = gene_counts_df[gene_counts_df["CellID"] == 0] + gene_counts_df = gene_counts_df[gene_counts_df["CellID"] != 0] # Return both gene_counts_df and background return gene_counts_df, background + if __name__ == "__main__": # Add a python argument parser with options for input, output and image size in x and y parser = argparse.ArgumentParser() @@ -99,13 +113,14 @@ def assign_spots2cell(spot_table, cell_mask): args = parser.parse_args() ## Read in spot table - spot_data = pd.read_csv(args.spot_table, - names=['x', 'y', 'z', 'gene',"empty"], sep='\t', header=None,index_col=None) + spot_data = pd.read_csv( + args.spot_table, names=["x", "y", "z", "gene", "empty"], sep="\t", header=None, index_col=None + ) - cell_mask = tiff.imread(args.cell_mask) + cell_mask = tiff.imread(args.cell_mask) gene_counts_df, background = assign_spots2cell(spot_data, cell_mask) basename = os.path.basename(args.spot_table) basename = os.path.splitext(basename)[0] - gene_counts_df.to_csv(f"{basename}.cellxgene.tsv", sep=',', header=True, index=False) + gene_counts_df.to_csv(f"{basename}.cellxgene.tsv", sep=",", header=True, index=False) From 58ec42f6b531d9664ec6536609422740ee23a7a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kre=C5=A1imir=20Be=C5=A1tak?= Date: Tue, 24 Oct 2023 15:04:42 +0000 Subject: [PATCH 6/9] Fixed multiqc break, removed Project_spots and MCQuant, added Duplicated spot count to QC --- CHANGELOG.md | 7 +++- assets/multiqc_config.yml | 3 ++ bin/collect_QC.py | 9 +++-- bin/project_spots.dask.py | 64 ----------------------------- bin/spot2cell.py | 6 ++- conf/modules.config | 15 +------ modules/local/molcart_qc.nf | 2 +- modules/local/project_spots.nf | 26 ------------ modules/local/spot2cell.nf | 8 ++-- modules/nf-core/mcquant/main.nf | 49 ----------------------- modules/nf-core/mcquant/meta.yml | 69 -------------------------------- workflows/molkart.nf | 33 +++------------ 12 files changed, 32 insertions(+), 259 deletions(-) delete mode 100755 bin/project_spots.dask.py delete mode 100644 modules/local/project_spots.nf delete mode 100644 modules/nf-core/mcquant/main.nf delete mode 100644 modules/nf-core/mcquant/meta.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 034ed53..bb20c4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Replace `PROJECT_SPOTS` and `MCQUANT` modules with spot2cells. This new (for now local) module reduces the RAM requirements drastically, because it doesn't create a multi-channel stack for the spots. Spots are assigned by looking up cell IDs at x,y, positions and iterating over the deduplicated spots table. - Added process labels to many modules to fix linting warnings -- Added meta map to molcart_qc output to remove linting warning +- Added meta map to molcart_qc output to remove linting warning -- adjusted script for multiqc input accordingly +- Added duplicated spots counts to collect_qc.py and multiqc_config.yml so that they also get counted. +- Added tag option to spot2cell so that the output names with same sample id and different segmentation methods can be differentiated (they were overwriting each other previously) +- removed project spots and mcquant from modules.config +- changed pattern for molcart_qc as it was not matching the files (removed {}) +- added meta value to segmethod input in molcart_qc ## v1.0.1dev - [2023.22.10] diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 88a7947..3edf5af 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -47,6 +47,9 @@ custom_data: spot_assign_percent: title: Percentage of spots assigned to cell description: "% of spots assigned to cells" + duplicated_total: + title: Total number of duplicated spots in the area + description: "Total number of duplicated spots" sp: segmentation_stats: fn: "final_QC.all_samples.csv" diff --git a/bin/collect_QC.py b/bin/collect_QC.py index e2337bc..c96d397 100755 --- a/bin/collect_QC.py +++ b/bin/collect_QC.py @@ -32,8 +32,10 @@ def summarize_segmasks(cellxgene_table, spots_summary): spot_assign = cellxgene_table[spots_summary[2]].sum(axis=1) spot_assign_total = int(sum(spot_assign)) spot_assign_per_cell = total_cells and spot_assign_total / total_cells or 0 + spot_assign_per_cell = round(spot_assign_per_cell, 2) # spot_assign_per_cell = spot_assign_total / total_cells spot_assign_percent = spot_assign_total / spots_summary[1] * 100 + spot_assign_percent = round(spot_assign_percent, 2) return (total_cells, avg_area, spot_assign_per_cell, spot_assign_total, spot_assign_percent) @@ -44,9 +46,7 @@ def summarize_segmasks(cellxgene_table, spots_summary): parser.add_argument("-i", "--cellxgene", help="cellxgene regionprops_table.") parser.add_argument("-s", "--spots", help="Resolve biosciences spot table.") parser.add_argument("-o", "--outdir", help="Output directory.") - parser.add_argument("-d", "--sample_id", help="Sample ID.") - parser.add_argument("-g", "--segmentation_method", help="Segmentation method used.") args = parser.parse_args() @@ -56,6 +56,7 @@ def summarize_segmasks(cellxgene_table, spots_summary): ## Read in spot table spots = pd.read_table(args.spots, sep="\t", names=["x", "y", "z", "gene"]) + duplicated = sum(spots.gene.str.contains("Duplicated")) spots = spots[~spots.gene.str.contains("Duplicated")] ## Summarize spots table @@ -73,6 +74,7 @@ def summarize_segmasks(cellxgene_table, spots_summary): "spot_assign_per_cell", "spot_assign_total", "spot_assign_percent", + "duplicated_total" ] ) summary_df.loc[0] = [ @@ -85,8 +87,9 @@ def summarize_segmasks(cellxgene_table, spots_summary): summary_segmentation[2], summary_segmentation[3], summary_segmentation[4], + duplicated ] - + print(args.sample_id) # Write summary_df to a csv file summary_df.to_csv( f"{args.outdir}/{args.sample_id}.{args.segmentation_method}.spot_QC.csv", header=True, index=False diff --git a/bin/project_spots.dask.py b/bin/project_spots.dask.py deleted file mode 100755 index ac32400..0000000 --- a/bin/project_spots.dask.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python - -#### This script takes a table of Molecular Cartography spots as input and projects them -#### onto a reference image. The output is a stack of images, one for each spot, with -#### the spot projected onto the reference image shape. - -## Import packages -import argparse -import pandas as pd -import numpy as np -import dask.array as da -import dask.dataframe as dd -import tifffile -from rich.progress import track -from aicsimageio.writers import OmeTiffWriter - - -# Make a function to project a table of spots with x,y coordinates onto a 2d plane based on reference image shape and add any duplicate spots to increase their pixel value in the output image -def project_spots(spot_table, img): - # Initialize an empty image with the same shape as the reference image - img = np.zeros_like(img, dtype="int8") - # Iterate through each spot in the table - for spot in spot_table.itertuples(): - # Add the corresponding spot count to the pixel value at the spot's x,y coordinates - img[spot.y, spot.x] += spot.counts - return img - - -if __name__ == "__main__": - # Add a python argument parser with options for input, output and image size in x and y - parser = argparse.ArgumentParser() - parser.add_argument("-i", "--input", help="Spot table to project.") - parser.add_argument("-s", "--sample_id", help="Sample ID.") - parser.add_argument("-d", "--img_dims", dest="img_dims", help="Corresponding image to get dimensions from.") - - args = parser.parse_args() - - # spots = pd.read_csv(args.input) - spots = dd.read_table(args.input, sep="\t", names=["x", "y", "z", "gene"]).compute() - img = tifffile.imread(args.img_dims) - - spots = spots[["y", "x", "gene"]] - - ## Filter any genes marked with Duplicated - spots = spots[~spots.gene.str.contains("Duplicated")] - - # Sum spots by z-axis - spots_zsum = spots.value_counts().to_frame("counts").reset_index() - - # Project each gene into a 2d plane and add to list - # Add a printed message that says "Projecting spots for gene X" for each gene in the list - spots_2d_list = [ - project_spots(spots_zsum[spots_zsum.gene == gene], img) - for gene in track(spots_zsum.gene.unique(), description="[green]Projecting spots...") - ] - - # Stack images on the c-axis - spot_2d_stack = da.stack(spots_2d_list, axis=0) - ## Write a csv file containing the channel names - channel_names = spots_zsum.gene.unique().tolist() - pd.DataFrame(channel_names).to_csv(args.sample_id + ".channel_names.csv", index=False, header=False) - - # tifffile.imwrite(args.output, spot_2d_stack, metadata={'axes': 'CYX'}) - OmeTiffWriter.save(spot_2d_stack, args.sample_id + ".tiff", dim_order="CYX") diff --git a/bin/spot2cell.py b/bin/spot2cell.py index c0fe42f..72a5970 100755 --- a/bin/spot2cell.py +++ b/bin/spot2cell.py @@ -84,6 +84,7 @@ def assign_spots2cell(spot_table, cell_mask): # Add a column to gene_counts_df for the Cell_ID, make it the first column of the table gene_counts_df["CellID"] = gene_counts_df.index + # Add the regionprops data from cell_props for each cell ID to gene_counts_df add NA when cell_ID exists in cell_props but not in gene_counts_df gene_counts_df = gene_counts_df.merge(cell_props, on="CellID", how="outer") @@ -96,6 +97,8 @@ def assign_spots2cell(spot_table, cell_mask): # Make Cell_ID the first column in gene_counts_df gene_counts_df = gene_counts_df.set_index("CellID").reset_index() + gene_counts_df[spot_table.gene.unique()] = gene_counts_df[spot_table.gene.unique()].astype(int) + # Filter out cell_ID = 0 into it's own dataframe called background background = gene_counts_df[gene_counts_df["CellID"] == 0] gene_counts_df = gene_counts_df[gene_counts_df["CellID"] != 0] @@ -109,6 +112,7 @@ def assign_spots2cell(spot_table, cell_mask): parser = argparse.ArgumentParser() parser.add_argument("-s", "--spot_table", help="Spot table to project.") parser.add_argument("-c", "--cell_mask", help="Sample ID.") + parser.add_argument("--tag", type=str, help="Additional tag to append to filename") args = parser.parse_args() @@ -123,4 +127,4 @@ def assign_spots2cell(spot_table, cell_mask): basename = os.path.basename(args.spot_table) basename = os.path.splitext(basename)[0] - gene_counts_df.to_csv(f"{basename}.cellxgene.tsv", sep=",", header=True, index=False) + gene_counts_df.to_csv(f"{basename}.{args.tag}.cellxgene.csv", sep=",", header=True, index=False) diff --git a/conf/modules.config b/conf/modules.config index 4688be9..6834c35 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -38,7 +38,7 @@ process { withName: 'MOLCART_QC' { publishDir = [ path: { "${params.outdir}/molcart_qc" }, - pattern: { "*.csv" } + pattern: "*.csv" ] } @@ -91,14 +91,6 @@ process { ].join(" ").trim() } - withName: "PROJECT_SPOTS" { - memory = "16GB" - publishDir = [ - path: "${params.outdir}/projectedspots", - pattern: "*.{tiff,csv}" - ] - } - withName: "CLAHE_DASK" { memory = "16GB" ext.when = { !params.skip_clahe } @@ -150,9 +142,4 @@ process { saveAs: { filename -> "${meta.id}_cellpose_mask.tif" } ] } - - withName: "MCQUANT" { - ext.args = "--intensity_props intensity_sum" - } - } diff --git a/modules/local/molcart_qc.nf b/modules/local/molcart_qc.nf index c4a0696..54b40c3 100644 --- a/modules/local/molcart_qc.nf +++ b/modules/local/molcart_qc.nf @@ -6,7 +6,7 @@ process MOLCART_QC{ input: tuple val(meta), path(cellxgene_table) tuple val(meta2), path(spot_table) - val(segmethod) + tuple val(meta3), val(segmethod) output: tuple val(meta), path("*.csv"), emit: qc diff --git a/modules/local/project_spots.nf b/modules/local/project_spots.nf deleted file mode 100644 index c184510..0000000 --- a/modules/local/project_spots.nf +++ /dev/null @@ -1,26 +0,0 @@ -process PROJECT_SPOTS{ - debug false - tag "Projecting spots $meta.id" - label 'process_medium' - - container 'docker.io/wuennemannflorian/project_spots:latest' - - input: - tuple val(meta), path(spots) - path(img) - - output: - tuple val(meta), path("${spots.baseName}.tiff"), emit: img_spots - tuple val(meta), path("${spots.baseName}.channel_names.csv"), emit: channel_names - - when: - task.ext.when == null || task.ext.when - - script: - """ - project_spots.dask.py \ - --input ${spots} \ - --sample_id ${spots.baseName} \ - --img_dims $img - """ -} diff --git a/modules/local/spot2cell.nf b/modules/local/spot2cell.nf index 983e7e0..7b8e3d8 100644 --- a/modules/local/spot2cell.nf +++ b/modules/local/spot2cell.nf @@ -8,9 +8,10 @@ process SPOT2CELL{ input: tuple val(meta) , path(spot_table) tuple val(meta2), path(cell_mask) + val(tag) output: - tuple val(meta), path("*cellxgene.tsv"), emit: cellxgene_table + tuple val(meta), path("*cellxgene.csv"), emit: cellxgene_table when: task.ext.when == null || task.ext.when @@ -18,7 +19,8 @@ process SPOT2CELL{ script: """ spot2cell.py \ - --spot_table ${spot_table} \ - --cell_mask ${cell_mask} + --spot_table ${spot_table} \ + --cell_mask ${cell_mask} \ + --tag ${tag} """ } diff --git a/modules/nf-core/mcquant/main.nf b/modules/nf-core/mcquant/main.nf deleted file mode 100644 index bc0eedf..0000000 --- a/modules/nf-core/mcquant/main.nf +++ /dev/null @@ -1,49 +0,0 @@ -process MCQUANT { - tag "$meta.id" - label 'process_single' - - // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - container "docker.io/labsyspharm/quantification:1.5.4" - - input: - tuple val(meta), path(image) - tuple val(meta2), path(mask) - tuple val(meta3), path(markerfile) - - output: - tuple val(meta), path("*.csv"), emit: csv - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '1.5.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - python /app/CommandSingleCellExtraction.py \ - --masks $mask \ - --image $image \ - --channel_names $markerfile \ - --output . \ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mcquant: $VERSION - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '1.5.4' - """ - touch ${prefix}.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mcquant: $VERSION - END_VERSIONS - """ -} diff --git a/modules/nf-core/mcquant/meta.yml b/modules/nf-core/mcquant/meta.yml deleted file mode 100644 index 5400314..0000000 --- a/modules/nf-core/mcquant/meta.yml +++ /dev/null @@ -1,69 +0,0 @@ -name: "mcquant" -description: write your description here -keywords: - - quantification - - image_analysis - - mcmicro - - highly_multiplexed_imaging -tools: - - "mcquant": - description: "Module for single-cell data extraction given a segmentation mask and multi-channel image. The CSV structure is aligned with histoCAT output." - homepage: "https://github.com/labsyspharm/quantification" - documentation: "https://github.com/labsyspharm/quantification/blob/master/README.md" - tool_dev_url: "https://github.com/labsyspharm/quantification" - doi: 10.1038/s41592-021-01308-y - licence: "" - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - - image: - type: file - description: Multi-channel image file - pattern: "*.{tiff,tif,h5,hdf5}" - - - meta2: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - - mask: - type: file - description: Labeled segmentation mask for image - pattern: "*.{tiff,tif,h5,hdf5}" - - - meta3: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - - markerfile: - type: file - description: Marker file with channel names for image to quantify - pattern: "*.{csv}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - - csv: - type: file - description: Quantified regionprops_table - pattern: "*.{csv}" - -authors: - - "@FloWuenne" diff --git a/workflows/molkart.nf b/workflows/molkart.nf index f0784f0..98951b4 100644 --- a/workflows/molkart.nf +++ b/workflows/molkart.nf @@ -248,32 +248,10 @@ workflow MOLKART { SPOT2CELL( dedup_spots.map(it -> tuple(it[0],it[1])), - dedup_spots.map(it -> tuple(it[0],it[2])) + dedup_spots.map(it -> tuple(it[0],it[2])), + dedup_spots.map(it -> it[0].segmentation) ) - // PROJECT_SPOTS.out.img_spots - // .join(PROJECT_SPOTS.out.channel_names) - // .map{ - // meta,tiff,channels -> [meta,tiff,channels] - // } - // .combine(segmentation_masks, by: 0) - // .map { - // meta, tiff, channels, mask, seg -> - // new_meta = meta.clone() - // new_meta.segmentation = seg - // [new_meta, tiff, channels, mask] - // }.set{ mcquant_in } - - // - // MODULE: MCQuant - // - // MCQUANT( - // mcquant_in.map{it -> tuple(it[0],it[1])}, - // mcquant_in.map{it -> tuple(it[0],it[3])}, - // mcquant_in.map{it -> tuple(it[0],it[2])} - // ) - // ch_versions = ch_versions.mix(MCQUANT.out.versions) - // // MODULE: MOLCART_QC // @@ -286,18 +264,16 @@ workflow MOLKART { qc_spots .combine(spot2cell_out, by: 0) .set{ molcart_qc } - molcart_qc.view() MOLCART_QC( molcart_qc.map{it -> tuple(it[0],it[2])}, molcart_qc.map{it -> tuple(it[0],it[1])}, - molcart_qc.map{it -> it[3]} + molcart_qc.map{it -> tuple(it[0],it[3])} ) CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) - // // MODULE: MultiQC // @@ -306,7 +282,8 @@ workflow MOLKART { methods_description = WorkflowMolkart.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) ch_methods_description = Channel.value(methods_description) - ch_multiqc_files = MOLCART_QC.out.qc.collectFile(name: 'final_QC.all_samples.csv',keepHeader: true, storeDir: "$params.outdir" ) + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(MOLCART_QC.out.qc.map{it[1]}.collectFile(name: 'final_QC.all_samples.csv', keepHeader: true, storeDir: "$params.outdir")) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) From 9123124941bf284c0ca56784be8a2b11ef141c87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kre=C5=A1imir=20Be=C5=A1tak?= Date: Tue, 24 Oct 2023 15:06:53 +0000 Subject: [PATCH 7/9] Updated CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb20c4c..b8695d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - removed project spots and mcquant from modules.config - changed pattern for molcart_qc as it was not matching the files (removed {}) - added meta value to segmethod input in molcart_qc +- spot counts are now int values +- QC metrics rounded to 2 decimals ## v1.0.1dev - [2023.22.10] From 1cd329a71f902ef29b9c5687abac962bb8703496 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kre=C5=A1imir=20Be=C5=A1tak?= Date: Tue, 24 Oct 2023 15:15:27 +0000 Subject: [PATCH 8/9] Updated scripts with Python Black --- bin/collect_QC.py | 4 ++-- bin/spot2cell.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/bin/collect_QC.py b/bin/collect_QC.py index c96d397..09dffce 100755 --- a/bin/collect_QC.py +++ b/bin/collect_QC.py @@ -74,7 +74,7 @@ def summarize_segmasks(cellxgene_table, spots_summary): "spot_assign_per_cell", "spot_assign_total", "spot_assign_percent", - "duplicated_total" + "duplicated_total", ] ) summary_df.loc[0] = [ @@ -87,7 +87,7 @@ def summarize_segmasks(cellxgene_table, spots_summary): summary_segmentation[2], summary_segmentation[3], summary_segmentation[4], - duplicated + duplicated, ] print(args.sample_id) # Write summary_df to a csv file diff --git a/bin/spot2cell.py b/bin/spot2cell.py index 72a5970..eb218b3 100755 --- a/bin/spot2cell.py +++ b/bin/spot2cell.py @@ -84,7 +84,6 @@ def assign_spots2cell(spot_table, cell_mask): # Add a column to gene_counts_df for the Cell_ID, make it the first column of the table gene_counts_df["CellID"] = gene_counts_df.index - # Add the regionprops data from cell_props for each cell ID to gene_counts_df add NA when cell_ID exists in cell_props but not in gene_counts_df gene_counts_df = gene_counts_df.merge(cell_props, on="CellID", how="outer") From 91d31bd4684caec00ef5d05274c1df6654ee92f6 Mon Sep 17 00:00:00 2001 From: Florian Wuennemann Date: Wed, 25 Oct 2023 13:55:43 +0000 Subject: [PATCH 9/9] Removed mcquant completely. --- modules.json | 5 ----- workflows/molkart.nf | 5 ----- 2 files changed, 10 deletions(-) diff --git a/modules.json b/modules.json index d6da2cc..4915732 100644 --- a/modules.json +++ b/modules.json @@ -30,11 +30,6 @@ "git_sha": "6f150e1503c0826c21fedf1fa566cdbecbe98ec7", "installed_by": ["modules"] }, - "mcquant": { - "branch": "master", - "git_sha": "b9829e1064382745d8dff7f1d74d2138d2864f71", - "installed_by": ["modules"] - }, "mindagap/mindagap": { "branch": "master", "git_sha": "240937a2a9c30298110753292be041188891f2cb", diff --git a/workflows/molkart.nf b/workflows/molkart.nf index 98951b4..3eaaaa4 100644 --- a/workflows/molkart.nf +++ b/workflows/molkart.nf @@ -165,11 +165,6 @@ workflow MOLKART { MINDAGAP_DUPLICATEFINDER(spot_tuple) ch_versions = ch_versions.mix(MINDAGAP_DUPLICATEFINDER.out.versions) - // - // MODULE: PROJECT SPOTS - // - // Transform spot table to 2 dimensional numpy array to use with MCQUANT - qc_spots = MINDAGAP_DUPLICATEFINDER.out.marked_dups_spots //