minor_fixes

nf-core · Jan 5, 2024 · e095a69 · e095a69
1 parent 01ff5c9
commit e095a69
Show file tree

Hide file tree

Showing 15 changed files with 53 additions and 51 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,10 +8,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### `Added`
 
 - Added createanndata process to workflow. This process will generate a spatial anndata object from the spot2cell output. The anndata object will be written to /anndata in the output folder.
+- added tests for createanndata
 
 ### `Fixed`
 
 - Updated version numbers for all local modules using the molkart-local container to v0.0.3
+- spot2cell - removed tag, output name now required, output name defined in modules.config
+- output documentation for create training subset
+- formatting in local modules
 
 ## v1.0.1dev - [2023.12.19]
 

diff --git a/bin/spot2cell.py b/bin/spot2cell.py
@@ -111,7 +111,6 @@ def assign_spots2cell(spot_table, cell_mask):
     parser = argparse.ArgumentParser()
     parser.add_argument("-s", "--spot_table", help="Spot table to project.")
     parser.add_argument("-c", "--cell_mask", help="Sample ID.")
-    parser.add_argument("--tag", type=str, help="Additional tag to append to filename")
     parser.add_argument("--output", type=str, help="Output path")
     parser.add_argument("--version", action="version", version="0.1.0")
 
@@ -126,11 +125,4 @@ def assign_spots2cell(spot_table, cell_mask):
 
     gene_counts_df, background = assign_spots2cell(spot_data, cell_mask)
 
-    if args.output:
-        outpath = args.output
-
-    else:
-        basename = os.path.basename(args.spot_table)
-        basename = os.path.splitext(basename)[0]
-        outpath = f"{basename}.{args.tag}.cellxgene.csv"
-    gene_counts_df.to_csv(outpath, sep=",", header=True, index=False)
+    gene_counts_df.to_csv(args.output, sep=",", header=True, index=False)
diff --git a/conf/modules.config b/conf/modules.config
@@ -101,7 +101,7 @@ process {
     }
 
     withName: "SPOT2CELL" {
-        ext.prefix = { "${meta.id}_${tag}"}
+        ext.prefix = { "${meta.id}_${meta.segmentation}"}
         publishDir = [
             path: { "${params.outdir}/spot2cell" },
             pattern: "*.csv",
@@ -213,6 +213,7 @@ process {
     }
 
     withName: "CREATE_ANNDATA" {
+        ext.prefix = { "${meta.id}_${meta.segmentation}"}
         publishDir = [
             path: "${params.outdir}/anndata",
             mode: params.publish_dir_mode,

diff --git a/docs/output.md b/docs/output.md
@@ -16,7 +16,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 - [segmentation](#segmentation) - Segment single cells from provided image using segmentation method of choice (Cellpose, Mesmer, ilastik) and filter them by size.
 - [Mindagap_duplicatefinder](#Mindagap) - Take a spot table and search for duplicates along grid lines.
 - [Spot2cell](#spot2cell) - Assign non-duplicated spots to segmented cells based on segmentation mask and extract cell shape information.
-- [Create_anndata](#anndata) - Creates a spatial AnnData object as described in the [Squidpy tutorial](https://squidpy.readthedocs.io/en/stable/notebooks/tutorials/tutorial_read_spatial.html).
+- [Create AnnData](#anndata) - Creates a spatial AnnData object as described in the [Squidpy tutorial](https://squidpy.readthedocs.io/en/stable/notebooks/tutorials/tutorial_read_spatial.html).
 - [MolkartQC](#molkartqc) - Produce QC metrics specific to this pipeline.
 - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline.
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution.
@@ -104,7 +104,7 @@ Spot2cell is a local module that assigns spots (without Duplicates) to cells via
 
 </details>
 
-CREATE_ANNDATA is a local module that generates an [AnnData object](https://anndata.readthedocs.io/en/latest/) storing expression, metadadata and spatial locations of cells.
+CREATE_ANNDATA is a local module that generates an [AnnData object](https://anndata.readthedocs.io/en/latest/) storing expression, metadata and spatial locations of cells.
 
 ### MolkartQC
 
@@ -136,17 +136,6 @@ MolkartQC is a local module used for gathering useful quality-control metrics fo
 
 Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see <http://multiqc.info>.
 
-### Pipeline information
-
-<details markdown="1">
-<summary>Output files</summary>
-
-- `pipeline_info/`
-  - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`.
-  - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameters are used when running the pipeline.
-  - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`.
-  - Parameters used by the pipeline run: `params.json`.
-
 ### create-training-subset
 
 <details markdown="1">
@@ -160,7 +149,18 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
 
 </details>
 
-Spot2cell is a local module that assigns spots (without Duplicates) to cells via a spot table and segmentation mask.
+Create training subset is an optional group of modules that create crops in `hdf5` and `tiff` formats, as well as provide the crop overview for reusability.
+
+### Pipeline information
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `pipeline_info/`
+  - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`.
+  - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameters are used when running the pipeline.
+  - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`.
+  - Parameters used by the pipeline run: `params.json`.
 
 </details>
 

diff --git a/modules/local/clahe.nf b/modules/local/clahe.nf
@@ -15,7 +15,7 @@ process CLAHE{
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
+    def args   = task.ext.args   ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
     apply_clahe.dask.py \\

diff --git a/modules/local/createanndata.nf b/modules/local/createanndata.nf
@@ -8,14 +8,14 @@ process CREATE_ANNDATA {
     tuple val(meta), path(spot2cell)
 
     output:
-    tuple val(meta), path("*.adata")  , emit: stack
-    path "versions.yml"               , emit: versions
+    tuple val(meta), path("*.adata") , emit: stack
+    path "versions.yml"              , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
+    def args   = task.ext.args   ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
 
     """

diff --git a/modules/local/createstack.nf b/modules/local/createstack.nf
@@ -8,14 +8,14 @@ process CREATE_STACK {
     tuple val(meta), path(image)
 
     output:
-    tuple val(meta), path("*.ome.tif"), emit: stack
-    path "versions.yml"               , emit: versions
+    tuple val(meta), path("*.ome.tif") , emit: stack
+    path "versions.yml"                , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
+    def args   = task.ext.args   ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
 
     """

diff --git a/modules/local/croptiff.nf b/modules/local/croptiff.nf
@@ -17,7 +17,7 @@ process CROPTIFF {
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args     ?: ''
+    def args   = task.ext.args   ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
 
     """

diff --git a/modules/local/maskfilter.nf b/modules/local/maskfilter.nf
@@ -16,7 +16,7 @@ process MASKFILTER {
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
+    def args   = task.ext.args   ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
 
     """

diff --git a/modules/local/molkartqc.nf b/modules/local/molkartqc.nf
@@ -15,7 +15,7 @@ process MOLKARTQC{
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
+    def args   = task.ext.args   ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
 
     """

diff --git a/modules/local/spot2cell.nf b/modules/local/spot2cell.nf
@@ -8,7 +8,6 @@ process SPOT2CELL{
     input:
     tuple val(meta) , path(spot_table)
     tuple val(meta2), path(cell_mask)
-    val(tag)
 
     output:
     tuple val(meta), path("*.csv"), emit: cellxgene_table
@@ -18,14 +17,13 @@ process SPOT2CELL{
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
+    def args   = task.ext.args   ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
 
     """
     spot2cell.py \\
         --spot_table ${spot_table} \\
         --cell_mask ${cell_mask} \\
-        --tag ${tag} \\
         --output ${prefix}.csv \\
         $args
 

diff --git a/modules/local/tiffh5convert.nf b/modules/local/tiffh5convert.nf
@@ -15,7 +15,7 @@ process TIFFH5CONVERT {
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
+    def args   = task.ext.args   ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
 
     """

diff --git a/tests/main.nf.test b/tests/main.nf.test
@@ -29,6 +29,8 @@ nextflow_pipeline {
                 path("$outputDir/segmentation/filtered_masks/nuc_only_cellpose_filtered.tif"),
                 path("$outputDir/spot2cell/cellxgene_nuc_only_cellpose.csv"),
                 path("$outputDir/spot2cell/cellxgene_nuc_only_mesmer.csv"),
+                path("$outputDir/anndata/nuc_only_cellpose.adata"),
+                path("$outputDir/anndata/nuc_only_mesmer.adata"),
                 path("$outputDir/molkartqc/nuc_only.cellpose.spot_QC.csv"),
                 path("$outputDir/molkartqc/nuc_only.mesmer.spot_QC.csv")
             ).match()
@@ -60,6 +62,8 @@ nextflow_pipeline {
                 path("$outputDir/segmentation/filtered_masks/mem_only_cellpose_filtered.tif"),
                 path("$outputDir/spot2cell/cellxgene_mem_only_cellpose.csv"),
                 path("$outputDir/spot2cell/cellxgene_mem_only_mesmer.csv"),
+                path("$outputDir/anndata/mem_only_cellpose.adata"),
+                path("$outputDir/anndata/mem_only_mesmer.adata"),
                 path("$outputDir/molkartqc/mem_only.cellpose.spot_QC.csv"),
                 path("$outputDir/molkartqc/mem_only.mesmer.spot_QC.csv"),
             ).match()

diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap
@@ -10,10 +10,12 @@
             "mem_only_cellpose_filtered.tif:md5,590591f541b6d3f17810cd44dd71b252",
             "cellxgene_mem_only_cellpose.csv:md5,f063c1ce5e93e1e73431af36c6bc1e79",
             "cellxgene_mem_only_mesmer.csv:md5,198b187d5f151077437aa591f5991b22",
+            "mem_only_cellpose.adata:md5,b47ea63ffea0947e43511f17b6920cd8",
+            "mem_only_mesmer.adata:md5,530649647a3466316ea52dde9dede4ab",
             "mem_only.cellpose.spot_QC.csv:md5,df312175498f7942bffc33f2c2d8d1c9",
             "mem_only.mesmer.spot_QC.csv:md5,3273bd6fecf93a3d240614d1b38831c9"
         ],
-        "timestamp": "2023-12-07T15:13:30.564809013"
+        "timestamp": "2024-01-05T11:19:49.882138591"
     },
     "Nuclear channel, mesmer and cellpose, without clahe": {
         "content": [
@@ -25,10 +27,12 @@
             "nuc_only_cellpose_filtered.tif:md5,2784d8c6683ad80c24d8df4121e1128f",
             "cellxgene_nuc_only_cellpose.csv:md5,0610a8713457c28acf1bc4c298bee8e3",
             "cellxgene_nuc_only_mesmer.csv:md5,425cfb90a60b564e4bfff09ac5ea94c7",
+            "nuc_only_cellpose.adata:md5,fd52e62711465d754fd36a433761cb3b",
+            "nuc_only_mesmer.adata:md5,cd20ab6db5274bb85c960ea3bd8d2619",
             "nuc_only.cellpose.spot_QC.csv:md5,e77b5973e0997170d0fde5c5901ad551",
             "nuc_only.mesmer.spot_QC.csv:md5,f8f4eb85bb8269341ac072ac78962ed4"
         ],
-        "timestamp": "2023-12-07T15:07:51.353007277"
+        "timestamp": "2024-01-05T11:18:48.214995389"
     },
     "Create training subset": {
         "content": [

diff --git a/workflows/molkart.nf b/workflows/molkart.nf
@@ -32,16 +32,16 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { CROPTIFF      } from '../modules/local/croptiff'
-include { CROPHDF5      } from '../modules/local/crophdf5'
-include { CREATE_STACK  } from '../modules/local/createstack'
-include { CLAHE         } from '../modules/local/clahe'
-include { MASKFILTER    } from '../modules/local/maskfilter'
-include { MOLKARTQC     } from '../modules/local/molkartqc'
-include { MOLKARTQCPNG  } from '../modules/local/molkartqcpng'
-include { SPOT2CELL     } from '../modules/local/spot2cell'
-include { TIFFH5CONVERT } from '../modules/local/tiffh5convert'
+include { CROPTIFF       } from '../modules/local/croptiff'
+include { CROPHDF5       } from '../modules/local/crophdf5'
 include { CREATE_ANNDATA } from '../modules/local/createanndata'
+include { CREATE_STACK   } from '../modules/local/createstack'
+include { CLAHE          } from '../modules/local/clahe'
+include { MASKFILTER     } from '../modules/local/maskfilter'
+include { MOLKARTQC      } from '../modules/local/molkartqc'
+include { MOLKARTQCPNG   } from '../modules/local/molkartqcpng'
+include { SPOT2CELL      } from '../modules/local/spot2cell'
+include { TIFFH5CONVERT  } from '../modules/local/tiffh5convert'
 
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
@@ -284,8 +284,7 @@ workflow MOLKART {
 
     SPOT2CELL(
         dedup_spots.map(it -> tuple(it[0],it[1])),
-        dedup_spots.map(it -> tuple(it[0],it[2])),
-        dedup_spots.map(it -> it[0].segmentation)
+        dedup_spots.map(it -> tuple(it[0],it[2]))
     )
     ch_versions = ch_versions.mix(SPOT2CELL.out.versions)