diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a58..4a9bc5c 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { @@ -17,11 +18,11 @@ "python.linting.flake8Path": "/opt/conda/bin/flake8", "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.linting.pylintPath": "/opt/conda/bin/pylint", }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } - } + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"], + }, + }, } diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index b07b118..7f7789a 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,7 +9,9 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/molkart then the best place to ask is on the nf-core Slack [#molkart](https://nfcore.slack.com/channels/molkart) channel ([join our Slack here](https://nf-co.re/join/slack)). +:::info +If you need help using or modifying nf-core/molkart then the best place to ask is on the nf-core Slack [#molkart](https://nfcore.slack.com/channels/molkart) channel ([join our Slack here](https://nf-co.re/join/slack)). +::: ## Contribution workflow @@ -25,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to +receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -116,4 +121,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index d9bf0f3..959f14a 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,7 +42,7 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 22.10.1)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 50d6098..284d2f6 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/molk - [ ] If necessary, also make a PR on the nf-core/molkart _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 1b3e7bc..176bdb5 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,21 +14,23 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/molkart/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/molkart/results-${{ github.sha }}" } - profiles: test_full,aws_tower + profiles: test_full + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index b7d0785..e721f23 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,18 +12,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/molkart/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/molkart/results-test-${{ github.sha }}" } - profiles: test,aws_tower + profiles: test + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ba0cf4e..ffca402 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,43 +1,108 @@ -name: nf-core CI # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +name: nf-core CI on: push: branches: - - dev + - "dev" pull_request: + branches: + - "dev" + - "master" release: types: [published] env: NXF_ANSI_LOG: false + NFTEST_VER: "0.8.2" concurrency: group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" cancel-in-progress: true jobs: + list: + name: List Tests + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/molkart') }}" + outputs: + # Expose matched filters as job 'modules' output variable + tests: ${{ steps.list.outputs.tests }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Setup Nextflow + uses: nf-core/setup-nextflow@v1 + + - name: Install nf-test + if: steps.cache-software.outputs.cache-hit != 'true' + run: | + wget -qO- https://code.askimed.com/install/nf-test | bash + sudo mv nf-test /usr/local/bin/ + + - name: List nf-test + id: list + run: | + echo "tests=$(nf-test list --silent --format=json)" >> "$GITHUB_OUTPUT" + test: - name: Run pipeline with test data + name: ${{ matrix.tags }} (${{ matrix.profile }}-${{ matrix.NXF_VER }}) # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/molkart') }}" + if: needs.list.outputs.tests != '[]' + needs: [list] runs-on: ubuntu-latest strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" + profile: ["docker"] #TODO add "singularity" + test: ["${{ fromJson(needs.list.outputs.tests) }}"] + steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 + + - name: Cache Nextflow installation + id: cache-software + uses: actions/cache@v3 + with: + path: | + /usr/local/bin/nf-test + /home/runner/.nf-test/nf-test.jar + key: molkart-${{ runner.os }}-${{ matrix.NXF_VER }} - name: Install Nextflow uses: nf-core/setup-nextflow@v1 with: version: "${{ matrix.NXF_VER }}" - - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix + - name: Install nf-test + if: steps.cache-software.outputs.cache-hit != 'true' + run: | + wget -qO- https://code.askimed.com/install/nf-test | bash + sudo mv nf-test /usr/local/bin/ + + - name: Set up Singularity + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-singularity@v5 + with: + singularity-version: 3.7.1 + + - name: Run nf-test + run: | + nf-test test \ + --profile="test,${{ matrix.profile }}" \ + ${{ matrix.test }} \ + --junitxml=${{ matrix.test }}-${{ matrix.profile }}.xml + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: success() || failure() # run this step even if previous step failed + with: + path: ${{ matrix.test }}-${{ matrix.profile }}.xml + + - name: Output log on failure + if: failure() run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + sudo apt install bat > /dev/null + batcat --decorations=always --color=always ${{ github.workspace }}/.nf-test/tests/*/output/pipeline_info/software_versions.yml diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 1189989..0d832b1 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: token: ${{ secrets.nf_core_bot_auth_token }} @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 888cb4b..905c58e 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -27,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @@ -40,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Check code lints with Black uses: psf/black@stable @@ -71,14 +71,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 - uses: actions/setup-python@v4 with: - python-version: "3.8" + python-version: "3.11" architecture: "x64" - name: Install dependencies diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 0000000..6ad3392 --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,68 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@v0.0.2 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ec..acf7269 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,5 +1,12 @@ image: nfcore/gitpod:latest - +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - codezombiech.gitignore # Language support for .gitignore files diff --git a/CHANGELOG.md b/CHANGELOG.md index 86159d5..72f3787 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,273 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.0.1dev - [2024.01.16] + +### `Added` + +- mindagap_boxsize, mindagap_loopnum, mindagap_edges parameters +- updated mindagap version and adapted molkart.nf and modules.config to match updated mindagap module version +- CI testing with nf-test +- updated base.config to account for ilastik's high memory use +- updated metromap + +## v1.0.1dev - [2024.01.08] + +### `Fixed` + +- Fixed a bug in maskfilter for calculating the number of filtered objects. +- Changed naming of columns in multiqc table for filtered cells + +## v1.0.1dev - [2024.01.06] + +### `Fixed` + +- Updated version numbers for all local modules to molkart-local container v0.0.4 due to a bug in version v0.0.3 that prevented the modules to be properly run with singularity. + +## v1.0.1dev - [2024.01.04] + +### `Added` + +- Added createanndata process to workflow. This process will generate a spatial anndata object from the spot2cell output. The anndata object will be written to /anndata in the output folder. +- added tests for createanndata + +### `Fixed` + +- Updated version numbers for all local modules using the molkart-local container to v0.0.3 +- spot2cell - removed tag, output name now required, output name defined in modules.config +- output documentation for create training subset +- formatting in local modules + +## v1.0.1dev - [2023.12.19] + +### `Fixed` + +Fixed parameter specification for Maskfilter in modules.config, where both min and max area params in python were passed as min_area. + +## v1.0.1dev - [2023.12.18] + +### `Fixed` + +- Changed file prefix used in CLAHE to prevent file naming collisions if user used dots in filenames +- Changed label for CLAHE and MASKFILTER to process_medium to make them pass first try on real life datasets + +## v1.0.1dev - [2023.12.15] + +### `Added` + +- Added config file for full test dataset + +## v1.0.1dev - [2023.12.11] + +Crop overview is provided to Multiqc - now when create_training_subset is run, multiqc and customdumpsoftwareversions are also run. + +### `Added` + +- removed CropSummary.txt from published outputs - it gets collected at multiqc step and published there +- moved crop_overview.png to MultiQC folder +- gitpod container is nf-core/gitpod:dev instead of latest to include new versions of nf-tools and nf-test +- MOLKARTQCPNG process to add name to png for multiqc report, and combine if multiple samples are processed + +## v1.0.1dev - [2023.12.07] + +Local module revamp - all should use the same Docker image to save space. + +### `Added` + +- renamed CREATEILASTIKTRAININGSUBSET to CROPHDF5 +- renamed TIFFTRAININGSUBSET to CROPTIFF +- local modules now use the ghcr.io/schapirolabor/molkart-local:v0.0.1 container +- CREATE_STACK when clause - also applied the size check logic in molkart.nf +- Added crop_hdf5.py script instead of using mcmicro-ilastik container +- pattern to only return cropped images and overview (not versions or full hdf5 image) +- clahe does not use aicsimageio anymore +- create stack outputs a pyramidal tif (Palom) +- updated mesmer module - accordingly added prefix logic (and for maskfilter) + +## v1.0.1dev - [2023.12.05] + +Added MASKFILTER module. + +### `Added` + +- MASKFILTER module with respective script, parameters, qc measures that are passed to MOLKARTQC and MULTIQC +- renamed molcart_qc to MOLKARTQC +- versions to main local modules (MOLKARTQC, SPOT2CELL) +- CREATE_STACK when clause (so that it does not show in the progress when it doesn't run) +- comments in molkart.nf for clarity + +### `Fixed` + +- collect_QC average area is now rounded +- prefix handling in some modules + +### `Removed` + +- SAMPLESHEETCHECK subworkflow and Python script + +## v1.0.1dev - [2023.12.02] + +Replaced local module for mindagap/duplicatefinder with nf-core module. + +### `Added` + +- installed mindagap/duplicatefinder via nf-core tools + +### `Removed` + +- removed local mindagap_duplicatefinder.nf in local modules + +## v1.0.1dev - [2023.11.30.] + +Changes to clahe - more nf-core compliant, script change, versions, updated tests. + +### `Added` + +- Clahe now outputs versions +- --clahe_pyramid_tile parameter (hidden) + +### `Fixed` + +- clahe local module now follows nf-core guidelines with output naming defined through ext.prefix +- In all cases, the same writer will be used for clahe now +- Fixed CLAHE metadata +- renamed process from CLAHE_DASK to CLAHE +- renamed tilesize parameter to mindagap_tilesize for clarity + +### `Removed` + +- clahe_skip_pyramid parameter + +## v1.0.1dev - [2023.11.28.] + +Fixed file naming schema for mindagap and spot2cell. If only mesmer is used for segmentation, create stack does not have to be run. + +### `Fixed` + +- Mindagap outputs, in case the filenames were the same, would overwrite each other. +- spot2cell outputs, in case the filenames and segmentation method were the same, would overwrite each other. +- removed hardcoded memory requirement for CREATEILASTIKTRAININGSUBSET +- if only mesmer is used for segmentation, create stack does not have to be run. + +## v1.0.1dev - [2023.11.24.] + +Added first nf-tests for the pipeline. + +### `Added` + +- nf-test for 3 runs +- main.nf where the input only has the nuclear channel (does not run clahe or ilastik) +- main.nf where the input has both nuclear and membrane image (runs clahe, does not run ilastik) +- main.nf where the input only has the nuclear channel (does not run clahe), creates training subset + +## v1.0.1dev - [2023.11.15] + +Upgraded workflow, fixed multisample cellpose segmentation with custom model. Added options necessary to make testing work on small images. + +### `Added` + +- white background in metromap +- clahe_skip_pyramid parameter to skip pyramid generation in the clahe step - necessary for smaller data + +### `Fixed` + +- Cellpose custom model functions with multiple samples now. + +## v1.0.1dev - [2023.11.13] + +Added documentation - usage.md and output.md + +### `Added` + +- usage.md documentation +- output.md documentation +- segmentation outputs are all moved to a segmentation folder. +- updated nf-core module versions +- CITATIONS.md updated +- README.md updated +- WorkflowMolkart.groovy updated to return citations if tools are used (added commas) + +## v1.0.1dev - [2023.25.10] + +Implemented the tilesize parameter for Mindagap_mindagap and mindagap_duplicatefinder so that smaller representative images can be used as test. + +### `Added` + +- tilesize param +- tilesize passing to mindagap and duplicatefinder in modules.config +- + +### `Fixed` + +### `Dependencies` + +### `Deprecated` + +## v1.0.1dev - [2023.23.10] + +- Replace `PROJECT_SPOTS` and `MCQUANT` modules with spot2cells. This new (for now local) module reduces the RAM requirements drastically, because it doesn't create a multi-channel stack for the spots. Spots are assigned by looking up cell IDs at x,y, positions and iterating over the deduplicated spots table. +- Added process labels to many modules to fix linting warnings +- Added meta map to molcart_qc output to remove linting warning -- adjusted script for multiqc input accordingly +- Added duplicated spots counts to collect_qc.py and multiqc_config.yml so that they also get counted. +- Added tag option to spot2cell so that the output names with same sample id and different segmentation methods can be differentiated (they were overwriting each other previously) +- removed project spots and mcquant from modules.config +- changed pattern for molcart_qc as it was not matching the files (removed {}) +- added meta value to segmethod input in molcart_qc +- spot counts are now int values +- QC metrics rounded to 2 decimals + +## v1.0.1dev - [2023.22.10] + +Replaced the `clahe` param with `skip_clahe` so that the default value for running CLAHE is `False`. + +### `Added` + +- skip_clahe param (default False) +- removed clahe param +- adjusted workflow to check the params.skip_clahe value instead of the params.clahe +- adjusted the ext.when in modules.config + +### `Fixed` + +### `Dependencies` + +### `Deprecated` + +## v1.0dev - [2023.18.10] + +Added barebones version of multiqc output. + +### `Added` + +- emit value for png overview for createtrainingtiff +- molcart-qc: added sampleid-segmentation tag as sample id, as multiqc was only showing the second row if sample id is same - can this be fixed to unique row? +- input for multiqc are the csv files produced by molcart qc + +### `Fixed` + +### `Dependencies` + +### `Deprecated` + +## v1.0.1dev - [2023.12.10] + +Molkart adapted to most nf-core standards with optional parameters, multiple segmentation options, as well as membrane channel handling. Started work on creating training subset functionality. + +### `Added` + +- parameters for pipeline execution +- ext.args logic for almost all modules with external parameters +- channel logic for membrane handling +- create stack process if membrane image present for Cellpose +- optional clahe +- started work on create subset functionality + +### `Fixed` + +### `Dependencies` + +### `Deprecated` + ## v1.0dev - [date] Initial release of nf-core/molkart, created with the [nf-core](https://nf-co.re/) template. diff --git a/CITATIONS.md b/CITATIONS.md index 30d55b0..0248a6d 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,9 +10,29 @@ ## Pipeline tools -- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [anndata](https://anndata.readthedocs.io/en/latest/) + + > Isaac Virshup, Sergei Rybakov, Fabian J. Theis, Philipp Angerer, F. Alexander Wolf anndata: Annotated data > bioRxiv 2021.12.16.473007; doi: https://doi.org/10.1101/2021.12.16.473007 + +- [Cellpose](https://www.cellpose.org/) + + > Stringer, C., Wang, T., Michaelos, M. et al. Cellpose: a generalist algorithm for cellular segmentation. Nat Methods 18, 100–106 (2021). https://doi.org/10.1038/s41592-020-01018-x + > Pachitariu, M., Stringer, C. Cellpose 2.0: how to train your own model. Nat Methods 19, 1634–1641 (2022). https://doi.org/10.1038/s41592-022-01663-4 + +- [ilastik](https://www.ilastik.org/) + + > Berg, S., Kutra, D., Kroeger, T. et al. ilastik: interactive machine learning for (bio)image analysis. Nat Methods 16, 1226–1232 (2019). https://doi.org/10.1038/s41592-019-0582-9 + +- [Mesmer](https://deepcell.readthedocs.io/en/master/API/deepcell.applications.html) + + > Greenwald NF, Miller G, Moen E, Kong A, Kagel A, Dougherty T, Fullaway CC, McIntosh BJ, Leow KX, Schwartz MS, Pavelchek C, Cui S, Camplisson I, Bar-Tal O, Singh J, Fong M, Chaudhry G, Abraham Z, Moseley J, Warshawsky S, Soon E, Greenbaum S, Risom T, Hollmann T, Bendall SC, Keren L, Graf W, Angelo M, Van Valen D. Whole-cell segmentation of tissue images with human-level performance using large-scale data annotation and deep learning. Nat Biotechnol. 2022 Apr;40(4):555-565. doi: 10.1038/s41587-021-01094-0. Epub 2021 Nov 18. PMID: 34795433; PMCID: PMC9010346. + +- [Mindagap](https://github.com/ViriatoII/MindaGap) + + > Ricardo Guerreiro, Florian Wuennemann, & pvtodorov. (2023). ViriatoII/MindaGap: v0.0.3 (0.0.3). Zenodo. https://doi.org/10.5281/zenodo.8120559 - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. ## Software packaging/containerisation tools @@ -31,5 +51,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052..c089ec7 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/LICENSE b/LICENSE index 462c737..41ad6b6 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) @FloWuenne +Copyright (c) @kbestak, @FloWuenne Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 755b849..2a76ddb 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # ![nf-core/molkart](docs/images/nf-core-molkart_logo_light.png#gh-light-mode-only) ![nf-core/molkart](docs/images/nf-core-molkart_logo_dark.png#gh-dark-mode-only) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/molkart/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![GitHub Actions CI Status](https://github.com/nf-core/molkart/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/molkart/actions?query=workflow%3A%22nf-core+CI%22) +[![GitHub Actions Linting Status](https://github.com/nf-core/molkart/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/molkart/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/molkart/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/molkart) @@ -12,47 +12,50 @@ ## Introduction -**nf-core/molkart** is a bioinformatics pipeline that ... +**nf-core/molkart** is a pipeline for processing Molecular Cartography data from Resolve Bioscience (combinatorial FISH). It takes as input a table of FISH spot positions (x,y,z,gene), a corresponding DAPI image (`tiff` format) and optionally a membrane staining image in the `tiff` format. nf-core/molkart performs end-to-end processing of the data including image processing, QC filtering of spots, cell segmentation, spot-to-cell assignment and reports quality metrics such as the spot assignment rate, average spots per cell and segmentation mask size ranges. - +

+ +

- - +Image preprocessing -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +- Fill the grid pattern in provided images ([`Mindagap`](https://github.com/ViriatoII/MindaGap)) +- Optionally apply contrast-limited adaptive histogram equalization +- If a second (membrane) image is present, combine images into a multichannel stack (if required for segmentation) -## Usage +Cell segmentation + +- Apply cell segmentation based on provided images, available options are: - [`Cellpose`](https://www.cellpose.org/) - [`Mesmer`](https://deepcell.readthedocs.io/en/master/API/deepcell.applications.html#mesmer) - [`ilastik`](https://www.ilastik.org/) +- Filter cells based on cell size to remove artifacts + +Spot processing -> **Note** -> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -> with `-profile test` before running the workflow on actual data. +- Find duplicated spots near grid lines ([`Mindagap`](https://github.com/ViriatoII/MindaGap)) +- Assign spots to segmented cells - - -Now, you can run the pipeline using: +Each row represents an FOV (field-of-view). Columns represent the sample ID (all must be unique), the path to the respective nuclear image, the spot table, and optionally the path to the respective membrane image (or any second image). - +Now, you can run the pipeline using all default values with: ```bash nextflow run nf-core/molkart \ @@ -61,26 +64,24 @@ nextflow run nf-core/molkart \ --outdir ``` -> **Warning:** -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -For more details, please refer to the [usage documentation](https://nf-co.re/molkart/usage) and the [parameter documentation](https://nf-co.re/molkart/parameters). +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/molkart/usage) and the [parameter documentation](https://nf-co.re/molkart/parameters). ## Pipeline output -To see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/molkart/results) tab on the nf-core website pipeline page. +The pipeline outputs a matched cell-by-transcript table based on deduplicated spots and segmented cells, as well as preprocessing and segmentation intermediaries. +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/molkart/results) tab on the nf-core website pipeline page. For more details about the output files and reports, please refer to the [output documentation](https://nf-co.re/molkart/output). ## Credits -nf-core/molkart was originally written by @FloWuenne. +nf-core/molkart was originally written by @kbestak, @FloWuenne. -We thank the following people for their extensive assistance in the development of this pipeline: - - +We thank [Maxime U Garcia](https://github.com/maxulysse) for his assistance and support in the development of this pipeline. ## Contributions and Support @@ -91,9 +92,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations - - - + An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index fcc76d5..74dd468 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,17 +3,21 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/molkart Methods Description" section_href: "https://github.com/nf-core/molkart" plot_type: "html" -## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/molkart v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/molkart v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

    -
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • -
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography}
Notes:
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 002ca4c..16519ec 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,11 @@ +custom_logo: "nf-core-molkart_logo_light.png" +custom_logo_url: https://github.com/nf-core/molkart/ +custom_logo_title: "nf-core/molkart" + report_comment: > - This report has been generated by the nf-core/molkart + This report has been generated by the nf-core/molkart analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-molkart-methods-description": order: -1000 @@ -11,3 +15,54 @@ report_section_order: order: -1002 export_plots: true + +custom_data: + my_custom_content_image: + section_name: "Crop overview" + segmentation_stats: + id: "segmentation_stats" + section_name: "QC statistics from segmentation" + pconfig: + id: "segmentation_stats_table" + namespace: "Segmentation stats" + headers: + segmentation_method: + title: Segmentation method + description: "Segmentation method" + total_cells: + title: Total Number of cells + description: "Total # cells" + avg_area: + title: Average cell area + description: "Average area per cell" + total_spots: + title: Total number of spots + description: "Average number of spots assigned per cell" + spot_assign_per_cell: + title: Average spots assigned per cell + description: "Average number of spots assigned per cell" + spot_assign_total: + title: Total spots assigned + description: "Total number of spots assigned" + spot_assign_percent: + title: Percentage of spots assigned to cell + description: "% of spots assigned to cells" + duplicated_total: + title: Total number of duplicated spots in the area + description: "Total number of duplicated spots" + labels_total: + title: Total number of cells before filtering + description: "Total number of segmented labels" + labels_below_thresh: + title: Number of removed small cells + description: "Total number of labels below min_area" + labels_above_thresh: + title: Number of removed large cells + description: "Total number of labels above max_area" +sp: + segmentation_stats: + fn: "final_QC.all_samples.csv" + shared: true + my_custom_content_image: + fn: "*.png" +ignore_images: false diff --git a/assets/nf-core-molkart_logo_light.png b/assets/nf-core-molkart_logo_light.png index 6f16190..c92f2ae 100644 Binary files a/assets/nf-core-molkart_logo_light.png and b/assets/nf-core-molkart_logo_light.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab..c39a490 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +sample,nuclear_image,spot_table +sample_fov1,/path/to/dapi/file/sample_fov1.DAPI.tiff,/path/to/spots/file/sample_fov1.spots.txt +sample_fov2,/path/to/dapi/file/sample_fov2.DAPI.tiff,/path/to/spots/file/sample_fov2.spots.txt diff --git a/assets/schema_input.json b/assets/schema_input.json index 9252388..09c4ef5 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -12,17 +12,23 @@ "pattern": "^\\S+$", "errorMessage": "Sample name must be provided and cannot contain spaces" }, - "fastq_1": { + "nuclear_image": { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "pattern": "^\\S+\\.(tif|tiff)$", + "format": "file-path", + "errorMessage": "Nuclear image must be provided, cannot contain spaces and must have extension '.tif' or '.tiff'" }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "spot_table": { + "type": "string", + "pattern": "^\\S+\\.(txt|tsv)$", + "errorMessage": "Spot table must be provided, has to have shape x,y,z,gene with sep = '\t', cannot contain spaces and must have extension '.txt'" + }, + "membrane_image": { + "errorMessage": "Membrane image is optional, and cannot contain spaces and must have extension '.tif' or '.tiff'", "anyOf": [ { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" + "pattern": "^\\S+\\.(tif|tiff)$" }, { "type": "string", @@ -31,6 +37,6 @@ ] } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "nuclear_image", "spot_table"] } } diff --git a/assets/slackreport.json b/assets/slackreport.json index 043d02f..5c8bbab 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "nf-core/molkart ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/apply_clahe.dask.py b/bin/apply_clahe.dask.py new file mode 100755 index 0000000..8b9676c --- /dev/null +++ b/bin/apply_clahe.dask.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python +from __future__ import print_function, division +from distutils.log import error +import time +import argparse +from argparse import ArgumentParser as AP +from os.path import abspath +import os +import numpy as np +from skimage.exposure import equalize_adapthist +from multiprocessing.spawn import import_main_path +import sys +import copy +import argparse +import numpy as np +import tifffile +import zarr +import skimage.transform +from ome_types import from_tiff, to_xml +from os.path import abspath +from argparse import ArgumentParser as AP +import time + +# from memory_profiler import profile +# This API is apparently changing in skimage 1.0 but it's not clear to +# me what the replacement will be, if any. We'll explicitly import +# this so it will break loudly if someone tries this with skimage 1.0. +try: + from skimage.util.dtype import _convert as dtype_convert +except ImportError: + from skimage.util.dtype import convert as dtype_convert + + +def get_args(): + # Script description + description = """Easy-to-use, large scale CLAHE""" + + # Add parser + parser = AP(description=description, formatter_class=argparse.RawDescriptionHelpFormatter) + + # Sections + inputs = parser.add_argument_group(title="Required Input", description="Path to required input file") + inputs.add_argument("-r", "--input", dest="input", action="store", required=True, help="File path to input image.") + inputs.add_argument("-o", "--output", dest="output", action="store", required=True, help="Path to output image.") + inputs.add_argument( + "--cliplimit", dest="clip", action="store", required=True, type=float, default=0.01, help="Clip Limit for CLAHE" + ) + inputs.add_argument( + "--kernel", dest="kernel", action="store", required=False, type=int, default=25, help="Kernel size for CLAHE" + ) + inputs.add_argument( + "--nbins", dest="nbins", action="store", required=False, type=int, default=256, help="Number of bins for CLAHE" + ) + inputs.add_argument( + "-p", "--pixel-size", dest="pixel_size", action="store", type=float, required=False, help="Image pixel size" + ) + inputs.add_argument( + "--tile-size", + dest="tile_size", + action="store", + type=int, + default=1072, + help="Tile size for pyramid generation (must be divisible by 16)", + ) + inputs.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + + # Standardize paths + arg.input = abspath(arg.input) + arg.clip = float(arg.clip) + arg.pixel_size = float(arg.pixel_size) + arg.nbins = int(arg.nbins) + arg.kernel = int(arg.kernel) + + return arg + + +def preduce(coords, img_in, img_out): + print(img_in.dtype) + (iy1, ix1), (iy2, ix2) = coords + (oy1, ox1), (oy2, ox2) = np.array(coords) // 2 + tile = skimage.img_as_float32(img_in[iy1:iy2, ix1:ix2]) + tile = skimage.transform.downscale_local_mean(tile, (2, 2)) + tile = dtype_convert(tile, "uint16") + # tile = dtype_convert(tile, img_in.dtype) + img_out[oy1:oy2, ox1:ox2] = tile + + +def format_shape(shape): + return "%dx%d" % (shape[1], shape[0]) + + +def subres_tiles(level, level_full_shapes, tile_shapes, outpath, scale): + print(f"\n processing level {level}") + assert level >= 1 + num_channels, h, w = level_full_shapes[level] + tshape = tile_shapes[level] or (h, w) + tiff = tifffile.TiffFile(outpath) + zimg = zarr.open(tiff.aszarr(series=0, level=level - 1, squeeze=False)) + for c in range(num_channels): + sys.stdout.write(f"\r processing channel {c + 1}/{num_channels}") + sys.stdout.flush() + th = tshape[0] * scale + tw = tshape[1] * scale + for y in range(0, zimg.shape[1], th): + for x in range(0, zimg.shape[2], tw): + a = zimg[c, y : y + th, x : x + tw, 0] + a = skimage.transform.downscale_local_mean(a, (scale, scale)) + if np.issubdtype(zimg.dtype, np.integer): + a = np.around(a) + a = a.astype("uint16") + yield a + + +def detect_pixel_size(img_path, pixel_size=None): + if pixel_size is None: + print("Pixel size overwrite not specified") + try: + metadata = ome_types.from_tiff(img_path) + pixel_size = metadata.images[0].pixels.physical_size_x + except Exception as err: + print(err) + print("Pixel size detection using ome-types failed") + pixel_size = None + return pixel_size + + +def main(args): + _version = "0.1.0" + print(f"Head directory = {args.input}") + print(f"ClipLimit = {args.clip}, nbins = {args.nbins}, kernel_size = {args.kernel}, pixel_size = {args.pixel_size}") + + # clahe = cv2.createCLAHE(clipLimit = int(args.clip), tileGridSize=tuple(map(int, args.grid))) + + img_in = tifffile.imread(args.input).astype("uint16") + print(img_in.shape) + adapted = img_in / 65535 + adapted = ( + equalize_adapthist(adapted, kernel_size=args.kernel, clip_limit=args.clip, nbins=args.nbins) * 65535 + ).astype("uint16") + img_in = adapted[np.newaxis, :, :] + + # construct levels + tile_size = args.tile_size + scale = 2 + pixel_size = detect_pixel_size(args.input, args.pixel_size) + if pixel_size is None: + pixel_size = 1 + + dtype = img_in.dtype + base_shape = img_in[0].shape + num_channels = img_in.shape[0] + num_levels = (np.ceil(np.log2(max(1, max(base_shape) / tile_size))) + 1).astype(int) + factors = 2 ** np.arange(num_levels) + shapes = (np.ceil(np.array(base_shape) / factors[:, None])).astype(int) + + print("Pyramid level sizes: ") + for i, shape in enumerate(shapes): + print(f" level {i+1}: {format_shape(shape)}", end="") + if i == 0: + print("(original size)", end="") + print() + print() + print(shapes) + + level_full_shapes = [] + for shape in shapes: + level_full_shapes.append((num_channels, shape[0], shape[1])) + level_shapes = shapes + tip_level = np.argmax(np.all(level_shapes < tile_size, axis=1)) + tile_shapes = [(tile_size, tile_size) if i <= tip_level else None for i in range(len(level_shapes))] + + software = f"molkart_clahe {_version}" + pixel_size = pixel_size + metadata = { + "Creator": software, + "Pixels": { + "PhysicalSizeX": pixel_size, + "PhysicalSizeXUnit": "\u00b5m", + "PhysicalSizeY": pixel_size, + "PhysicalSizeYUnit": "\u00b5m", + }, + } + + # write pyramid + with tifffile.TiffWriter(args.output, ome=True, bigtiff=True) as tiff: + tiff.write( + data=img_in, + metadata=metadata, + shape=level_full_shapes[0], + subifds=int(num_levels - 1), + dtype=dtype, + resolution=(10000 / pixel_size, 10000 / pixel_size, "centimeter"), + tile=tile_shapes[0], + ) + for level, (shape, tile_shape) in enumerate(zip(level_full_shapes[1:], tile_shapes[1:]), 1): + tiff.write( + data=subres_tiles(level, level_full_shapes, tile_shapes, args.output, scale), + shape=shape, + subfiletype=1, + dtype=dtype, + tile=tile_shape, + ) + print() + + +if __name__ == "__main__": + # Read in arguments + args = get_args() + + # Run script + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index 4a758fe..0000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,259 +0,0 @@ -#!/usr/bin/env python - - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ) - - def __init__( - self, - sample_col="sample", - first_col="fastq_1", - second_col="fastq_2", - single_col="single_end", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - first_col (str): The name of the column that contains the first (or only) - FASTQ file path (default "fastq_1"). - second_col (str): The name of the column that contains the second (if any) - FASTQ file path (default "fastq_2"). - single_col (str): The name of the new column that will be inserted and - records whether the sample contains single- or paired-end sequencing - reads (default "single_end"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._first_col = first_col - self._second_col = second_col - self._single_col = single_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_first(row) - self._validate_second(row) - self._validate_pair(row) - self._seen.add((row[self._sample_col], row[self._first_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_first(self, row): - """Assert that the first FASTQ entry is non-empty and has the right format.""" - if len(row[self._first_col]) <= 0: - raise AssertionError("At least the first FASTQ file is required.") - self._validate_fastq_format(row[self._first_col]) - - def _validate_second(self, row): - """Assert that the second FASTQ entry has the right format if it exists.""" - if len(row[self._second_col]) > 0: - self._validate_fastq_format(row[self._second_col]) - - def _validate_pair(self, row): - """Assert that read pairs have the same file extension. Report pair status.""" - if row[self._first_col] and row[self._second_col]: - row[self._single_col] = False - first_col_suffix = Path(row[self._first_col]).suffixes[-2:] - second_col_suffix = Path(row[self._second_col]).suffixes[-2:] - if first_col_suffix != second_col_suffix: - raise AssertionError("FASTQ pairs must have the same file extensions.") - else: - row[self._single_col] = True - - def _validate_fastq_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and FASTQ filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of sample name and FASTQ must be unique.") - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - row[self._sample_col] = f"{sample}_T{seen[sample]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _viral recon samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - - """ - required_columns = {"sample", "fastq_1", "fastq_2"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/bin/collect_QC.py b/bin/collect_QC.py new file mode 100755 index 0000000..cf82441 --- /dev/null +++ b/bin/collect_QC.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python + +#### This script takes regionprops_tabe output from mcquant and the raw spot tables from Resolve bioscience as input +#### and calculates some QC metrics for masks and spot assignments +### If png files are provided, it combines them into one + +import argparse +import pandas as pd +from PIL import Image, ImageDraw, ImageFont +import os + + +def combine_png_files(input_paths, output_path): + print(input_paths) + images = [] + for file_path in input_paths: + img = Image.open(file_path) + image_name = os.path.basename(file_path).replace(".ome", "").replace(".crop", "_crop") + draw = ImageDraw.Draw(img) + font_size = 50 + font = ImageFont.load_default(font_size) + draw.text((100, 50), image_name, fill="black", font=font) + images.append(img) + + width, height = images[0].size + combined_image = Image.new("RGB", (width, len(images) * height)) + for i, img in enumerate(images): + combined_image.paste(img, (0, i * height)) + combined_image.save(os.path.join(output_path, "crop_overview.png")) + + +def summarize_spots(spot_table): + ## Calculate number of spots per gene + tx_per_gene = spot_table.groupby("gene").count().reset_index() + + ## Calculate the total number of spots in spot_table + total_spots = spot_table.shape[0] + + ## Get list of genes + genes = spot_table["gene"].unique() + + return (tx_per_gene, total_spots, genes) + + +def summarize_segmasks(cellxgene_table, spots_summary): + ## Calculate the total number of cells (rows) in cellxgene_table + total_cells = cellxgene_table.shape[0] + + ## Calculate the average segmentation area from column Area in cellxgene_table + avg_area = round(cellxgene_table["Area"].mean(), 2) + + ## Calculate the % of spots assigned + ## Subset cellxgene_table for all columns with _intensity_sum in the column name and sum the column values + spot_assign = cellxgene_table[spots_summary[2]].sum(axis=1) + spot_assign_total = int(sum(spot_assign)) + spot_assign_per_cell = total_cells and spot_assign_total / total_cells or 0 + spot_assign_per_cell = round(spot_assign_per_cell, 2) + # spot_assign_per_cell = spot_assign_total / total_cells + spot_assign_percent = spot_assign_total / spots_summary[1] * 100 + spot_assign_percent = round(spot_assign_percent, 2) + + return (total_cells, avg_area, spot_assign_per_cell, spot_assign_total, spot_assign_percent) + + +if __name__ == "__main__": + # Write an argparse with input options cellxgene_table, spots and output options outdir, sample_id + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--cellxgene", help="cellxgene regionprops_table.") + parser.add_argument("-s", "--spots", help="Resolve biosciences spot table.") + parser.add_argument("-o", "--outdir", help="Output directory.") + parser.add_argument("-d", "--sample_id", help="Sample ID.") + parser.add_argument("-g", "--segmentation_method", help="Segmentation method used.") + parser.add_argument("--filterqc", required=False, help="QC from mask filter step") + parser.add_argument("--png_overview", nargs="+", help="Crop overview image paths") + parser.add_argument("--version", action="version", version="0.1.0") + + args = parser.parse_args() + + if args.png_overview != None: + combine_png_files(args.png_overview, args.outdir) + + else: + ## Read in cellxgene_table table + cellxgene_table = pd.read_csv(args.cellxgene, sep=",") + + ## Read in spot table + spots = pd.read_table(args.spots, sep="\t", names=["x", "y", "z", "gene"]) + duplicated = sum(spots.gene.str.contains("Duplicated")) + spots = spots[~spots.gene.str.contains("Duplicated")] + + ## Pass on filterqc values + filterqc = pd.read_csv( + args.filterqc, + names=["below_min_area", "below_percentage", "above_max_area", "above_percentage", "total_labels"], + header=None, + ) + + ## Summarize spots table + summary_spots = summarize_spots(spots) + summary_segmentation = summarize_segmasks(cellxgene_table, summary_spots) + + ## Create pandas data frame with one row per parameter and write each value in summary_segmentation to a new row in the data frame + summary_df = pd.DataFrame( + columns=[ + "sample_id", + "segmentation_method", + "total_cells", + "avg_area", + "total_spots", + "spot_assign_per_cell", + "spot_assign_total", + "spot_assign_percent", + "duplicated_total", + "labels_total", + "labels_below_thresh", + "labels_above_thresh", + ] + ) + summary_df.loc[0] = [ + ##args.sample_id, + args.sample_id + "_" + args.segmentation_method, + args.segmentation_method, + summary_segmentation[0], + summary_segmentation[1], + summary_spots[1], + summary_segmentation[2], + summary_segmentation[3], + summary_segmentation[4], + duplicated, + filterqc.total_labels[1], + filterqc.below_min_area[1], + filterqc.above_max_area[1], + ] + print(args.sample_id) + # Write summary_df to a csv file + summary_df.to_csv( + f"{args.outdir}/{args.sample_id}.{args.segmentation_method}.spot_QC.csv", header=True, index=False + ) diff --git a/bin/create_anndata.py b/bin/create_anndata.py new file mode 100755 index 0000000..5725397 --- /dev/null +++ b/bin/create_anndata.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +import pandas as pd +import numpy as np +from anndata import AnnData +import argparse +from argparse import ArgumentParser as AP +from os.path import abspath +import time +from scipy.sparse import csr_matrix + + +def get_args(): + # Script description + description = """Anndata object creation""" + + # Add parser + parser = AP(description=description, formatter_class=argparse.RawDescriptionHelpFormatter) + + # Sections + inputs = parser.add_argument_group(title="Required Input", description="Path to required input file") + inputs.add_argument("-i", "--input", type=str, help="Path to the spot2cell csv file.") + inputs.add_argument("-s", "--spatial_cols", nargs="+", help="Column names for location data.") + inputs.add_argument( + "-o", "--output", dest="output", action="store", required=True, help="Path to output anndata object." + ) + inputs.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + arg.input = abspath(arg.input) + arg.output = abspath(arg.output) + return arg + + +def create_spatial_anndata(input, spatial_cols): + df = pd.read_csv(input) + spatial_coords = np.array(df[args.spatial_cols].values.tolist()) + # Find the index of 'Y_centroid' column + y_centroid_index = df.columns.get_loc("X_centroid") + # Create a list of all columns from 'Y_centroid' to the end + metadata_cols = df.columns[y_centroid_index:] + # Extract the excluded columns as metadata + metadata = df[metadata_cols] + + count_table = csr_matrix(df.drop(list(metadata_cols), axis=1).values.tolist()) + adata = AnnData(count_table, obsm={"spatial": spatial_coords}) + # Add the metadata to adata.obs + for col in metadata.columns: + adata.obs[col] = metadata[col].values + adata.obs_names = [f"Cell_{i:d}" for i in range(adata.n_obs)] + return adata + + +def main(args): + adata = create_spatial_anndata(args.input, args.spatial_cols) + adata.write(args.output) + + +if __name__ == "__main__": + args = get_args() + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/crop_hdf5.py b/bin/crop_hdf5.py new file mode 100755 index 0000000..99cd510 --- /dev/null +++ b/bin/crop_hdf5.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python3 +import tifffile +import numpy as np +import h5py +import pathlib +import os +import random +from skimage import filters +import scipy.io +import math +import argparse +import time + +# Most of the code by Joshua Hess from the labsyspharm/mcmicro-ilastik repo: https://github.com/labsyspharm/mcmicro-ilastik + + +def IlastikPrepOME( + input, output, crop, crop_size, nonzero_fraction, nuclei_index, num_channels, channelIDs, ring_mask, crop_amount +): + """Function for exporting a large ome.tiff image as an hdf5 image for + training ilastik random forest pixel classifier for cell segmentation""" + + # Create a pathlib object for the image name + im_name = pathlib.Path(input) + # Get the input directory + im_dir = im_name.parent + # Get the image name (remove ".ome") + im_stem = im_name.stem.replace(".ome", "") + # Create hdf5 name + h5_name = im_stem + ".hdf5" + + # Check to see if ring mask is being applied + if ring_mask: + # Read the matlab file + mat = scipy.io.loadmat(os.path.join(str(im_dir), (str(im_stem) + "-ROI-nodes.mat"))) + # Get the width and height indices for cropping + min_w, max_w = math.floor(abs(mat["nodes"][:, 0]).min()), math.ceil(abs(mat["nodes"][:, 0]).max()) + min_h, max_h = math.floor(abs(mat["nodes"][:, 1]).min()), math.ceil(abs(mat["nodes"][:, 1]).max()) + + # Check to see if the num_channels exists + if num_channels == None and channelIDs == None: + # raise an error + raise ValueError("--num_channels and --channelIDs are not specified") + + # Otherwise continue + else: + # Condition 1 + if num_channels == None and channelIDs != None: + # Set number of channels to length of channel IDs + num_channels = len(channelIDs) + # Check if number of channels and channelIDs agree + elif num_channels != None and channelIDs == None: + # Set channelIDs to be first n channels for num_channels + channelIDs = range(0, num_channels) + # infer the number of channels give the channel IDs + else: + # Check that the two agree + if num_channels != len(channelIDs): + # raise error + raise ValueError("--num_channels and length of --channelIDs do not agree") + + # Check if the number of channels is even or odd + if (num_channels % 2) == 0: + step = 2 + else: + step = 1 + + # Read the tif image - Reads the image as cyx + print("Reading " + im_name.stem + "...") + tif = tifffile.TiffFile(im_name) + # Set the index for the loop + idx = 0 + # Add counter for channel index + chan_idx = 0 + for i in range(int(num_channels / step)): + # Get the channel indices based on the step + chan_idx = channelIDs[idx : idx + step] + # Convert the tifffile object to array + im = tif.asarray(series=0, key=chan_idx) + # Check to see what step size is (if step is 1, tiffile will not read color channel, only width and height) + if step != 1: + # Swap the axes to be in the order zyxc for ilastik + im = np.swapaxes(im, 0, 2) + # Swap the axes to be in the order zyxc for ilastik + im = np.swapaxes(im, 0, 1) + # Check if step size is 1 or two (again, if 1, then no color channel) + if step != 1: + # Reshape the array + im = im.reshape((1, im.shape[0], im.shape[1], im.shape[2])) + else: + # Add a color axis when reshaping instead + im = im.reshape((1, im.shape[0], im.shape[1], 1)) + # Check to see if ring mask is being applied + if ring_mask: + # Crop the region + im = im[:, min_h:max_h, min_w:max_w, :] + # Create an hdf5 dataset if idx is 0 plane + if idx == 0: + # Create hdf5 + h5 = h5py.File(pathlib.Path(os.path.join(output, h5_name)), "w") + h5.create_dataset(str(im_stem), data=im[:, :, :, :], chunks=True, maxshape=(1, None, None, None)) + h5.close() + else: + # Append hdf5 dataset + h5 = h5py.File(pathlib.Path(os.path.join(output, h5_name)), "a") + # Add step size to the z axis + h5[str(im_stem)].resize((idx + step), axis=3) + # Add the image to the new channels + h5[str(im_stem)][:, :, :, idx : idx + step] = im[:, :, :, :] + h5.close() + # Update the index + idx = idx + step + # Finished exporting the image + print("Finished exporting image") + + # Optional to crop out regions for ilastik training + if crop: + # Get the index of nuclei in channelIDs + nuclei_index = channelIDs.index(nuclei_index) + # Run through each cropping iteration + full_h5 = h5py.File(pathlib.Path(os.path.join(output, h5_name)), "r") + im_nuc = full_h5[str(im_stem)][:, :, :, nuclei_index] + im = full_h5[str(im_stem)][:, :, :, :] + indices = {} + count = 0 + thresh = filters.threshold_otsu(im_nuc[:, :, :]) + while count < crop_amount: + # Get random height value that falls within crop range of the edges + extension_h = crop_size[0] // 2 + h = random.randint(extension_h, im_nuc.shape[1] - extension_h) + h_up, h_down = h - extension_h, h + extension_h + # Get random width value that falls within crop range of the edges + extension_w = crop_size[1] // 2 + w = random.randint(extension_w, im_nuc.shape[2] - extension_w) + w_lt, w_rt = w - extension_w, w + extension_w + # Crop the image with these coordinates expanding from center + crop = im_nuc[:, h_up:h_down, w_lt:w_rt] + crop_name = pathlib.Path(os.path.join(output, (im_stem + "_crop" + str(count) + ".hdf5"))) + # Check to see if the crop passes the nonzero fraction test + if ((crop[0, :, :] > thresh).sum() / (crop.shape[1] * crop.shape[2])) >= nonzero_fraction: + # Export the image to hdf5 + print("Writing " + crop_name.stem + ".hdf5...") + crop = im[:, h_up:h_down, w_lt:w_rt, :] + h5_crop = h5py.File(crop_name, "w") + h5_crop.create_dataset(str(im_stem) + "_" + str(count), data=crop, chunks=True) + h5_crop.close() + print("Finished exporting " + crop_name.stem + ".hdf5") + # Add one to the counter + count = count + 1 + # Add the indices to a table to store the cropped indices + indices.update({crop_name.stem: [(h_up, h_down), (w_lt, w_rt)]}) + # Export the indices to a text file to track the cropped regions + summary = open(pathlib.Path(os.path.join(output, im_stem) + "_CropSummary.txt"), "w") + summary.write(str(indices)) + summary.close() + + +def MultiIlastikOMEPrep( + input, output, crop, crop_size, nonzero_fraction, nuclei_index, num_channels, channelIDs, ring_mask, crop_amount +): + """Function for iterating over a list of files and output locations to + export large ome.tiff images in the correct hdf5 image format for ilastik + random forest pixel classification and batch processing""" + + # Iterate over each image in the list if only a single output + if len(output) < 2: + # Iterate through the images and export to the same location + for im_name in input: + # Run the IlastikPrepOME function for this image + IlastikPrepOME( + im_name, + output[0], + crop, + crop_size, + nonzero_fraction, + nuclei_index, + num_channels, + channelIDs, + ring_mask, + crop_amount, + ) + # Alternatively, iterate over output directories + else: + # Check to make sure the output directories and image paths are equal in length + if len(output) != len(input): + raise (ValueError("Detected more than one output but not as many directories as images")) + else: + # Iterate through images and output directories + for i in range(len(input)): + # Run the IlastikPrepOME function for this image and output directory + IlastikPrepOME( + input[i], + output[i], + crop, + crop_size, + nonzero_fraction, + nuclei_index, + num_channels, + channelIDs, + ring_mask, + crop_amount, + ) + + +def ParseInputOME(): + """Function for parsing command line arguments for input to ilastik prep functions""" + parser = argparse.ArgumentParser() + parser.add_argument( + "--input", + nargs="*", + help="enter path to images with spaces between each image (Ex: /path1/image1.ome.tiff /path2/image2.ome.tiff)", + ) + parser.add_argument("--output", nargs="*") + parser.add_argument("--crop", action="store_true", default=False) + parser.add_argument("--no-crop", dest="crop", action="store_false") + parser.add_argument("--crop_size", type=int, nargs="*") + parser.add_argument("--nonzero_fraction", type=float) + parser.add_argument("--nuclei_index", type=int) + parser.add_argument("--num_channels", type=int) + parser.add_argument("--channelIDs", type=int, nargs="*") + parser.add_argument("--ring_mask", action="store_true", default=False) + parser.add_argument("--no-ring_mask", dest="ring_mask", action="store_false") + parser.add_argument("--crop_amount", type=int) + parser.add_argument("--version", action="version", version="0.1.0") + + args = parser.parse_args() + + # Adjustment to account for user-facing 1-based indexing and the 0-based Python implementation + if args.nuclei_index != None: + nuc_idx = args.nuclei_index - 1 + else: + nuc_idx = None + if args.channelIDs != None: + chIDs = [x - 1 for x in args.channelIDs] + else: + chIDs = None + + # Create a dictionary object to pass to the next function + dict = { + "input": args.input, + "output": args.output, + "crop": args.crop, + "crop_size": args.crop_size, + "nonzero_fraction": args.nonzero_fraction, + "nuclei_index": nuc_idx, + "num_channels": args.num_channels, + "channelIDs": chIDs, + "ring_mask": args.ring_mask, + "crop_amount": args.crop_amount, + } + # Print the dictionary object + print(dict) + # Return the dictionary + return dict + + +if __name__ == "__main__": + # Parse the command line arguments + args = ParseInputOME() + + # Run script + st = time.time() + # Run the MultiIlastikOMEPrep function + MultiIlastikOMEPrep(**args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/crop_tiff.py b/bin/crop_tiff.py new file mode 100755 index 0000000..d00115f --- /dev/null +++ b/bin/crop_tiff.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +import ast +import tifffile as tiff +import os +import argparse +import matplotlib.pyplot as plt +import numpy as np + + +# Create a function to create crops from a tiff image and a dictionary of crop coordinates +def create_crops(tiff_image, crop_dict): + for index, (crop_name, crop) in enumerate(crop_dict.items()): + crop_image = tiff_image[:, crop[0][0] : crop[0][1], crop[1][0] : crop[1][1]] + basename = os.path.basename(args.input) + basename = os.path.splitext(basename)[0] + tiff.imsave(f"./{basename}_crop{index}.tiff", crop_image) + ## Create a plot with all crop regions highlighted on the full image for easier selection + # Create a maximum projection of the channels in tiff_image + tiff_image_max = np.max(tiff_image, axis=0) + plt.imshow(tiff_image_max, cmap="gray") + plt.plot( + [crop[1][0], crop[1][1], crop[1][1], crop[1][0], crop[1][0]], + [crop[0][0], crop[0][0], crop[0][1], crop[0][1], crop[0][0]], + "red", + linewidth=1, + ) + plt.text( + crop[1][0], crop[0][0], str(index), color="white" + ) # make the text red and add a label to each box with index of the crop + plt.savefig(f"{basename}.crop_overview.png", dpi=300) + + +## Run the script +if __name__ == "__main__": + # Add argument parser with arguments for input tiffile, crop_summary input file and output tiffile + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--input", help="Input tiffile.") + parser.add_argument("-c", "--crop_summary", help="Crop summary file.") + parser.add_argument("--version", action="version", version="0.1.0") + args = parser.parse_args() + + # reading the crop information from the file + with open(args.crop_summary) as f: + crops = f.read() + # reconstructing the data as a dictionary + crops = ast.literal_eval(crops) + ## Read in tiff image + tiff_image = tiff.imread(args.input) + if len(tiff_image.shape) == 2: + tiff_image = np.expand_dims(tiff_image, axis=0) + + create_crops(tiff_image, crops) diff --git a/bin/maskfilter.py b/bin/maskfilter.py new file mode 100755 index 0000000..c47d7b3 --- /dev/null +++ b/bin/maskfilter.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +import time +import argparse +from argparse import ArgumentParser as AP +from os.path import abspath +import argparse +import numpy as np +from skimage.measure import label, regionprops +from skimage.io import imread, imsave +from os.path import abspath +from argparse import ArgumentParser as AP +import time +import pandas as pd + + +def get_args(): + # Script description + description = """Segmentation mask filtering""" + + # Add parser + parser = AP(description=description, formatter_class=argparse.RawDescriptionHelpFormatter) + + # Sections + inputs = parser.add_argument_group(title="Required Input", description="Path to required input file") + inputs.add_argument("-r", "--input", dest="input", action="store", required=True, help="File path to input image.") + inputs.add_argument("-o", "--output", dest="output", action="store", required=True, help="Path to output image.") + inputs.add_argument( + "--output_qc", dest="output_qc", action="store", required=False, help="Path to output qc csv file." + ) + inputs.add_argument( + "--min_area", + dest="min_area", + action="store", + type=int, + default=None, + help="Lower area (in px) threshold for cell removal", + ) + inputs.add_argument( + "--max_area", + dest="max_area", + action="store", + type=int, + default=None, + help="Upper area (in px) threshold for cell removal", + ) + inputs.add_argument("--version", action="version", version="0.1.0") + arg = parser.parse_args() + + # Standardize paths + arg.input = abspath(arg.input) + arg.output = abspath(arg.output) + if arg.output_qc is None: + arg.output_qc = abspath(arg.output.replace(".tif", ".csv")) + return arg + + +def filter_areas(mask, min_area=None, max_area=None): + labeled_mask = label(mask, background=0) + measure_tmp = regionprops(labeled_mask) + num_cells = len(measure_tmp) + # Create a mapping between label and area + label_area_map = {prop.label: prop.area for prop in measure_tmp} + + if min_area and max_area: + small_valid_labels = np.array([label for label, area in label_area_map.items() if area >= min_area]) + large_valid_labels = np.array([label for label, area in label_area_map.items() if area <= max_area]) + valid_labels = np.intersect1d(small_valid_labels, large_valid_labels) + retained_masks = np.isin(labeled_mask, valid_labels) * labeled_mask + small_labels = num_cells - len(small_valid_labels) + large_labels = num_cells - len(large_valid_labels) + relabeled_mask = label(retained_masks, background=0) + elif min_area: + valid_labels = np.array([label for label, area in label_area_map.items() if area >= min_area]) + retained_masks = np.isin(labeled_mask, valid_labels) * labeled_mask + large_labels = 0 + small_labels = num_cells - len(valid_labels) + relabeled_mask = label(retained_masks, background=0) + elif max_area: + valid_labels = np.array([label for label, area in label_area_map.items() if area <= max_area]) + retained_masks = np.isin(labeled_mask, valid_labels) * labeled_mask + large_labels = num_cells - len(valid_labels) + small_labels = 0 + relabeled_mask = label(retained_masks, background=0) + else: + small_labels = 0 + large_labels = 0 + relabeled_mask = labeled_mask + + return relabeled_mask, small_labels, large_labels, num_cells + + +def main(args): + print(f"Head directory = {args.input}") + + # Example usage + in_path = args.input + output = args.output + min_area = args.min_area + max_area = args.max_area + + mask = imread(in_path) + mask, small, big, total = filter_areas(mask, min_area=min_area, max_area=max_area) + imsave(output, mask.astype("int32"), check_contrast=False) + print(f"Filtered mask saved to {output}") + + qc_df = pd.DataFrame( + { + "below_min_area": [small], + "below_percentage": [small / total], + "above_max_area": [big], + "above_percentage": [big / total], + "total_labels": [total], + }, + index=None, + ) + qc_df.to_csv(output.replace(".tif", ".csv"), index=False) + print() + + +if __name__ == "__main__": + # Read in arguments + args = get_args() + + # Run script + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/bin/spot2cell.py b/bin/spot2cell.py new file mode 100755 index 0000000..1e5e5a3 --- /dev/null +++ b/bin/spot2cell.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python + +## Import packages +import pandas as pd +import numpy as np +from skimage.measure import regionprops_table +import tifffile as tiff +import argparse +import os + + +def assign_spots2cell(spot_table, cell_mask): + # Initialize a dictionary to hold the counts + gene_counts = {} + + # Calculate cell properties for cell_mask using regionprops_table + cell_props = regionprops_table( + cell_mask, + properties=[ + "label", + "centroid", + "area", + "major_axis_length", + "minor_axis_length", + "eccentricity", + "solidity", + "extent", + "orientation", + ], + ) + + # Turn cell props into a pandas DataFrame and add a Cell_ID column + name_map = { + "CellID": "label", + "X_centroid": "centroid-1", + "Y_centroid": "centroid-0", + "Area": "area", + "MajorAxisLength": "major_axis_length", + "MinorAxisLength": "minor_axis_length", + "Eccentricity": "eccentricity", + "Solidity": "solidity", + "Extent": "extent", + "Orientation": "orientation", + } + + for new_name, old_name in name_map.items(): + cell_props[new_name] = cell_props[old_name] + + for old_name in set(name_map.values()): + del cell_props[old_name] + + cell_props = pd.DataFrame(cell_props) + + # Exclude any rows that contain Duplicated in the gene column from spot_table + spot_table = spot_table[~spot_table["gene"].str.contains("Duplicated")] + + # Iterate over each row in the grouped DataFrame + for index, row in spot_table.iterrows(): + # Get the x and y positions and gene + x = int(row["x"]) + y = int(row["y"]) + gene = row["gene"] + + # Get the cell ID from the labeled mask + cell_id = cell_mask[y, x] + + # If the cell ID is not in the dictionary, add it + if cell_id not in gene_counts: + gene_counts[cell_id] = {} + if gene not in gene_counts[cell_id]: + gene_counts[cell_id][gene] = 1 + else: + gene_counts[cell_id][gene] += 1 + else: + if gene not in gene_counts[cell_id]: + gene_counts[cell_id][gene] = 1 + else: + # Add the count for this gene in this cell ID + gene_counts[cell_id][gene] += 1 + + # Convert the dictionary of counts into a DataFrame + gene_counts_df = pd.DataFrame(gene_counts).T + + # Add a column to gene_counts_df for the Cell_ID, make it the first column of the table + gene_counts_df["CellID"] = gene_counts_df.index + + # Add the regionprops data from cell_props for each cell ID to gene_counts_df add NA when cell_ID exists in cell_props but not in gene_counts_df + gene_counts_df = gene_counts_df.merge(cell_props, on="CellID", how="outer") + + # Convert NaN values to 0 + gene_counts_df = gene_counts_df.fillna(0) + + # Sort by Cell_ID in ascending order + gene_counts_df = gene_counts_df.sort_values(by=["CellID"]) + + # Make Cell_ID the first column in gene_counts_df + gene_counts_df = gene_counts_df.set_index("CellID").reset_index() + + gene_counts_df[spot_table.gene.unique()] = gene_counts_df[spot_table.gene.unique()].astype(int) + + # Filter out cell_ID = 0 into it's own dataframe called background + background = gene_counts_df[gene_counts_df["CellID"] == 0] + gene_counts_df = gene_counts_df[gene_counts_df["CellID"] != 0] + + # Return both gene_counts_df and background + return gene_counts_df, background + + +if __name__ == "__main__": + # Add a python argument parser with options for input, output and image size in x and y + parser = argparse.ArgumentParser() + parser.add_argument("-s", "--spot_table", help="Spot table to project.") + parser.add_argument("-c", "--cell_mask", help="Sample ID.") + parser.add_argument("--output", type=str, help="Output path") + parser.add_argument("--version", action="version", version="0.1.0") + + args = parser.parse_args() + + ## Read in spot table + spot_data = pd.read_csv( + args.spot_table, names=["x", "y", "z", "gene", "empty"], sep="\t", header=None, index_col=None + ) + + cell_mask = tiff.imread(args.cell_mask) + + gene_counts_df, background = assign_spots2cell(spot_data, cell_mask) + + gene_counts_df.to_csv(args.output, sep=",", header=True, index=False) diff --git a/bin/stack.py b/bin/stack.py new file mode 100755 index 0000000..b1882c1 --- /dev/null +++ b/bin/stack.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +import numpy as np +import argparse +import tifffile +import dask.array as da +from argparse import ArgumentParser as AP +import palom.pyramid +import palom.reader +import copy +import math +import time + + +def get_args(): + parser = AP(description="Stack a list of images into a single image stack using Dask") + parser.add_argument("-i", "--input", nargs="+", help="List of images to stack") + parser.add_argument("-o", "--output", dest="output", type=str) + parser.add_argument("--pixel_size", dest="pixel_size", type=float, default=0.138) + parser.add_argument("--tile_size", dest="tilesize", type=int, default=1072) + parser.add_argument("--version", action="version", version="0.1.0") + return parser.parse_args() + + +def num_levels_patch(self, base_shape): + factor = max(base_shape) / self.max_pyramid_img_size + return math.ceil(math.log(max(1, factor), self.downscale_factor)) + 1 + + +def main(args): + img = palom.reader.OmePyramidReader(args.input[0]) + mosaic = img.pyramid[0] + mosaic_out = copy.copy(mosaic) + + for i in range(1, len(args.input)): + img = palom.reader.OmePyramidReader(args.input[i]) + mosaic = img.pyramid[0] + mosaic_out = da.concatenate([mosaic_out, copy.copy(mosaic)], axis=0) + + palom.pyramid.PyramidSetting.num_levels = num_levels_patch + palom.pyramid.write_pyramid( + [mosaic_out], args.output, channel_names=["stack"], downscale_factor=2, pixel_size=0.138, tile_size=368 + ) + + +if __name__ == "__main__": + # Read in arguments + args = get_args() + + # Run script + st = time.time() + main(args) + rt = time.time() - st + print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/conf/base.config b/conf/base.config index 1c2c62a..8a67abc 100644 --- a/conf/base.config +++ b/conf/base.config @@ -9,8 +9,6 @@ */ process { - - // TODO nf-core: Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } @@ -24,8 +22,6 @@ process { // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } @@ -57,9 +53,13 @@ process { } withLabel:error_retry { errorStrategy = 'retry' - maxRetries = 2 + maxRetries = 3 } withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } + + withName:'ILASTIK_PIXELCLASSIFICATION|ILASTIK_MULTICUT' { + label = "process_high" + } } diff --git a/conf/modules.config b/conf/modules.config index da58a5d..93c1b0e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,24 +18,208 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + } + + withName: 'MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: FASTQC { - ext.args = '--quiet' + withName: 'MOLKARTQC' { + publishDir = [ + path: { "${params.outdir}/molkartqc" }, + pattern: "*.csv" + ] + } + + withName: 'MOLKARTQCPNG' { + ext.when = { params.create_training_subset } + publishDir = [ + path: { "${params.outdir}/molkartqc" }, + pattern: "*.png" + ] } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { + withName: 'CREATE_STACK' { + ext.when = { image.size() > 1 } + ext.prefix = { "${meta.id}_stack" } + ext.args = [ "", + params.clahe_pyramid_tile ? "--tile_size ${params.clahe_pyramid_tile}" : "", + params.clahe_pixel_size ? "--pixel_size ${params.clahe_pixel_size}" : "" + ].join(" ").trim() publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: { "${params.outdir}/stack" }, mode: params.publish_dir_mode, - pattern: '*_versions.yml' + pattern: "*.{ome.tif}" + ] + } + + withName: "MASKFILTER" { + ext.prefix = { "${meta.id}_${meta.segmentation}_filtered" } + ext.args = [ "", + params.segmentation_min_area ? "--min_area ${params.segmentation_min_area}" : "", + params.segmentation_max_area ? "--max_area ${params.segmentation_max_area}" : "" + ].join(" ").trim() + publishDir = [ + path: { "${params.outdir}/segmentation/filtered_masks" }, + mode: params.publish_dir_mode, + pattern: "*.{tiff,tif}" + ] + } + + withName: "MINDAGAP_MINDAGAP" { + ext.args = [ "", + params.mindagap_boxsize ? "${params.mindagap_boxsize}" : "", + params.mindagap_loopnum ? "${params.mindagap_loopnum}" : "", + params.mindagap_tilesize ? "-xt ${params.mindagap_tilesize}" : "" + ].join(" ").trim() + publishDir = [ + path: { "${params.outdir}/mindagap" }, + pattern: "*.{tiff,tif}", + saveAs: { filename -> "${meta.id}_$filename" } + ] + } + + withName: "MINDAGAP_DUPLICATEFINDER" { + ext.args = [ "", + params.mindagap_tilesize ? "${params.mindagap_tilesize}" : "" + ].join(" ").trim() + publishDir = [ + path: { "${params.outdir}/mindagap" }, + pattern: "*.{tsv,txt}", + mode: params.publish_dir_mode, + saveAs: { filename -> "${meta.id}_$filename" } ] } + withName: "SPOT2CELL" { + ext.prefix = { "${meta.id}_${meta.segmentation}"} + publishDir = [ + path: { "${params.outdir}/spot2cell" }, + pattern: "*.csv", + mode: params.publish_dir_mode, + saveAs: { filename -> "cellxgene_$filename" } + ] + } + + withName: "ILASTIK_PIXELCLASSIFICATION" { + ext.when = { params.segmentation_method.split(',').contains('ilastik') } + publishDir = [ + path: { "${params.outdir}/segmentation/ilastik" }, + pattern: "*.{h5}", + saveAs: { filename -> "${meta.id}_probability_$filename" } + ] + } + + withName: "ILASTIK_MULTICUT" { + ext.when = { params.segmentation_method.split(',').contains('ilastik') } + publishDir = [ + path: { "${params.outdir}/segmentation/ilastik" }, + pattern: "*.tiff", + saveAs: { filename -> "${meta.id}_ilastik_$filename" } + ] + } + + withName: "CROPHDF5" { + ext.when = { params.create_training_subset } + publishDir = [ + path: "${params.outdir}/training_subset/hdf5", + mode: params.publish_dir_mode, + pattern: "*{C,c}rop*.{hdf5,h5}" + ] + ext.args = [ "", + "--crop", + "--nuclei_index 1", + params.crop_amount ? "--crop_amount ${params.crop_amount}" : "", + params.crop_size_x && params.crop_size_y ? "--crop_size ${params.crop_size_x} ${params.crop_size_y}" : "", + params.crop_nonzero_fraction ? "--nonzero_fraction ${params.crop_nonzero_fraction}" : "" + ].join(" ").trim() + } + + withName: "CROPTIFF" { + ext.when = { params.create_training_subset } + publishDir = [ + path: "${params.outdir}/training_subset/tiff", + mode: params.publish_dir_mode, + pattern: "*.{tiff,tif}" + ] + } + + withName: "TIFFH5CONVERT" { + ext.when = { params.segmentation_method.split(',').contains('ilastik') } + publishDir = [ + path: "${params.outdir}/converted_hdf5", + pattern: "*.{hdf5,h5}" + ] + } + + withName: "CLAHE" { + ext.prefix = { + def name = image.name + def base = name.lastIndexOf('.') != -1 ? name[0..name.lastIndexOf('.') - 1] : name + return "${meta.id}_${base}_clahe" + } + ext.when = { !params.skip_clahe } + ext.args = [ "", + params.clahe_pyramid_tile ? "--tile-size ${params.clahe_pyramid_tile}" : "", + params.clahe_cliplimit ? "--cliplimit ${params.clahe_cliplimit}" : "", + params.clahe_nbins ? "--nbins ${params.clahe_nbins}" : "", + params.clahe_pixel_size ? "--pixel-size ${params.clahe_pixel_size}" : "", + params.clahe_kernel ? "--kernel ${params.clahe_kernel}" : "" + ].join(" ").trim() + } + + withName: "DEEPCELL_MESMER" { + ext.when = { params.segmentation_method.split(',').contains('mesmer') } + ext.args = [ "", + params.mesmer_compartment ? "--compartment ${params.mesmer_compartment}" : "", + params.mesmer_image_mpp ? "--image-mpp ${params.mesmer_image_mpp}" : "", + "--nuclear-channel 0" + ].join(" ").trim() + ext.prefix = { "${meta.id}_mesmer_mask" } + publishDir = [ + path: "${params.outdir}/segmentation/mesmer", + pattern: "*.tif" + ] + } + + withName: "CELLPOSE" { + singularity.runOptions = "--bind $HOME:$HOME" + ext.when = { params.segmentation_method.split(',').contains('cellpose') } + ext.args = [ "", + "--channel_axis 0", + "--no_npy", + params.cellpose_save_flows ? "--save_flows" : "", + params.cellpose_diameter ? "--diameter ${params.cellpose_diameter}" : "", + params.cellpose_chan ? "--chan ${params.cellpose_chan}" : "", + params.cellpose_chan2 ? "--chan2 ${params.cellpose_chan2}" : "", + params.cellpose_custom_model ? "" : params.cellpose_pretrained_model ? "--pretrained_model ${params.cellpose_pretrained_model}" : "", + params.cellpose_flow_threshold ? "--flow_threshold ${params.cellpose_flow_threshold}" : "", + params.cellpose_edge_exclude ? "--exclude_on_edges" : "" + ].join(" ").trim() + publishDir = [ + path: "${params.outdir}/segmentation/cellpose", + pattern: "*_cp_masks.tif", + saveAs: { filename -> "${meta.id}_cellpose_mask.tif" } + ] + } + + withName: "CREATE_ANNDATA" { + ext.prefix = { "${meta.id}_${meta.segmentation}"} + publishDir = [ + path: "${params.outdir}/anndata", + mode: params.publish_dir_mode, + pattern: "*.{adata}" + ] + } } diff --git a/conf/test.config b/conf/test.config index d94efa5..f30779e 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,15 +15,15 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + max_cpus = 4 + max_memory = '8.GB' + max_time = '4.h' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' - - // Genome references - genome = 'R64-1-1' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/molkart/test_data/samplesheets/samplesheet_membrane.csv' + mindagap_tilesize = 90 + mindagap_boxsize = 7 + mindagap_loopnum = 100 + clahe_pyramid_tile = 368 + segmentation_method = "mesmer,cellpose" } diff --git a/conf/test_full.config b/conf/test_full.config index ce72ba6..30c1090 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -10,17 +10,14 @@ ---------------------------------------------------------------------------------------- */ -cleanup = true - params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/molkart/test_data/samplesheets/samplesheet_full_test.csv' - // Genome references - genome = 'R64-1-1' + segmentation_method = "mesmer,cellpose" + mindagap_boxsize = 3 + mindagap_loopnum = 40 + cellpose_pretrained_model = "nuclei" } diff --git a/docs/images/molkart_workflow.png b/docs/images/molkart_workflow.png new file mode 100644 index 0000000..2d3de1c Binary files /dev/null and b/docs/images/molkart_workflow.png differ diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png deleted file mode 100755 index 361d0e4..0000000 Binary files a/docs/images/mqc_fastqc_adapter.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png deleted file mode 100755 index cb39ebb..0000000 Binary files a/docs/images/mqc_fastqc_counts.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png deleted file mode 100755 index a4b89bf..0000000 Binary files a/docs/images/mqc_fastqc_quality.png and /dev/null differ diff --git a/docs/output.md b/docs/output.md index cb20dba..2dd3cc0 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,36 +6,117 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [FastQC](#fastqc) - Raw read QC -- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline -- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +- [Mindagap](#Mindagap) - Fill empty grid lines in a panorama image with neighbor-weighted values. +- [CLAHE](#CLAHE) - perform contrast-limited adaptive histogram equalization. +- [Create stacks](#create_stacks) - If a second image is provided, combine both into one stack as input for segmentation modules. +- [segmentation](#segmentation) - Segment single cells from provided image using segmentation method of choice (Cellpose, Mesmer, ilastik) and filter them by size. +- [Mindagap_duplicatefinder](#Mindagap) - Take a spot table and search for duplicates along grid lines. +- [Spot2cell](#spot2cell) - Assign non-duplicated spots to segmented cells based on segmentation mask and extract cell shape information. +- [Create AnnData](#anndata) - Creates a spatial AnnData object as described in the [Squidpy tutorial](https://squidpy.readthedocs.io/en/stable/notebooks/tutorials/tutorial_read_spatial.html). +- [MolkartQC](#molkartqc) - Produce QC metrics specific to this pipeline. +- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline. +- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution. + +- [Create training subset](#create-training-subset) - creates crops for segmentation training (Cellpose, ilastik). + +### Mindagap + +
+Output files + +- `mindagap/` + - `*_gridfilled.tiff`: Gridfilled panorama file(s). + - `*_markedDups.txt`: Spot table with duplicated spots marked as 'Duplicated'. + +
+ +[Mindagap](https://github.com/ViriatoII/MindaGap) fills empty grids of a panorama made from several tiles using the mean of the immediate neighborhood, as well as marking duplicated spots near the grid from the spot table. + +### CLAHE + +
+Output files + +- `clahe/` + - `*_clahe.tiff`: Image with contrast-limited adaptive histogram equalization applied. + +
+ +[CLAHE](https://scikit-image.org/docs/stable/api/skimage.exposure.html#skimage.exposure.equalize_adapthist) is a algorithm from [scikit-image](https://scikit-image.org) for local contrast enhancement, that uses histograms computed over different tile regions of the image. Local details can therefore be enhanced even in regions that are darker or lighter than most of the image. + +### Create_stacks + +
+Output files + +- `stack/` + - `*.ome.tif`: Image containing provided input images as channels. + +
+ +Create stack is a local module used to merge images into a stack as preparation for segmentation processes. + +### Segmentation -### FastQC +
+Output files + +- `segmentation/` + - `cellpose/` + - `*_cellpose_mask.tif`: Segmentation masks created by Cellpose. + - `ilastik/` + - `*_probability_maps.hdf5`: Probability maps created by ilastik's Pixel Classifier workflow. + - `*_ilastik_mask.tif`: Segmentation masks created by ilastik's Boundary prediction with Multicut workflow. + - `mesmer/`: + - `*_mesmer_mask.tif`: Segmentation masks created by Mesmer. + - `filtered_masks/` - `*_method_filtered.tif`: Segmentation masks filtered based on provided area limits. +
+ +[Cellpose](https://www.cellpose.org) is a segmentation tool that provides pretrained models as well as additional human-in-the loop training. If additional training is performed, the envisioned way of doing it is creating the training subset (`tiff`), and training the model in the [Cellpose GUI](https://cellpose.readthedocs.io/en/latest/gui.html) on the subset, then giving the trained model as an argument within the pipeline to complete the pipeline run. + +[ilastik](https://www.ilastik.org) is an interactive learning and segmentation toolkit, with its application here envisioned as - create training subset (`hdf5`), create Pixel Classifier and Boundary prediction with Multicut projects with specified parameters. Within Molkart, the project files can be given and batch processing would be applied on the full images. + +[Mesmer](https://deepcell.readthedocs.io/en/master/API/deepcell.applications.html#mesmer) is a segmentation tool that provides pretrained models for whole-cell and nuclear segmentation. + +### Spot2cell
Output files -- `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics. - - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `spot2cell/` + - `*.cellxgene.csv`: Cell-by-transcript `csv` file containing transcript counts per cell, as well as cell shape properties.
-[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +Spot2cell is a local module that assigns spots (without Duplicates) to cells via a spot table and segmentation mask. -![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) +### Create_anndata -![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) +
+Output files -![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) +- `anndata/` + - `*.adata`: Anndata object containing the spot count table, spatial locations of cells in `adata.obsm` and metadata like 'Area', 'MajorAxisLength', 'MinorAxisLength', 'Eccentricity', 'Solidity', 'Extent', 'Orientation' in `adata.obs` + +
-> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +CREATE_ANNDATA is a local module that generates an [AnnData object](https://anndata.readthedocs.io/en/latest/) storing expression, metadata and spatial locations of cells. + +### MolkartQC + +
+Output files + +- `molkartqc/` + - `*.spot_QC.csv`: Sheet containing useful quality-control metrics specific to spot-based image processing methods. + +
+ +MolkartQC is a local module used for gathering useful quality-control metrics for spot-based image processing methods, including: sample ID, used segmentation method, total number of cells, average cell area, total number of spots, average spot assignment per cell, total number of assigned spots, percentage of assigned spots, number of duplicated spots. ### MultiQC @@ -43,6 +124,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d Output files - `multiqc/` + - `final_QC.all_samples.csv`: all molkartqc outputs concatenated to one `csv` file. + - `*.crop_overview.png`: Crop overview for visual assessment of crop placement on the whole sample. - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - `multiqc_plots/`: directory containing static images from the report in various formats. @@ -53,6 +136,21 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . +### create-training-subset + +
+Output files + +- `training_subset/` + - `hdf5/` + - `*_crop[0-9]+.hdf5`: `hdf5` crops for training Pixel classification and Multicut models with ilastik for segmentation. + - `tiff/` + - `*_crop[0-9]+.tiff`: `tiff` crops for training Cellpose to create a custom segmentation model. + +
+ +Create training subset is an optional group of modules that create crops in `hdf5` and `tiff` formats, as well as provide the crop overview for reusability. + ### Pipeline information
@@ -60,8 +158,9 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ - `pipeline_info/` - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameters are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`.
diff --git a/docs/usage.md b/docs/usage.md index 3353f83..ed37e42 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,58 +6,43 @@ ## Introduction - - ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns (4th column optional), and a header row as shown in the examples below. ```bash --input '[path to samplesheet file]' ``` -### Multiple runs of the same sample - -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: - -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz -``` - ### Full samplesheet -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +The pipeline requires that the first column specifies the sample ID. If multiple rows are provided, their sample tags must be different. The samplesheet needs to have the three column names exactly as specified below, with the optional fourth column specifying the `membrane_image` (it does not have to be a membrane image necessarily, but the second image can be provided here to help with segmentation). -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +A final samplesheet file that can be used to process a full dataset (after segmentation optimization), where a matching membrane image is provided would look like: -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +```csv title="samplesheet.csv" +sample,nuclear_image,spot_table,membrane_image +SAMPLE1,SAMPLE1.nucleus.tiff,SAMPLE1.spots.txt,SAMPLE1.membrane.tiff +SAMPLE2,SAMPLE2.nucleus.tiff,SAMPLE2.spots.txt,SAMPLE2.membrane.tiff +SAMPLE3,SAMPLE3.nucleus.tiff,SAMPLE3.spots.txt,SAMPLE3.membrane.tiff +SAMPLE4,SAMPLE4.nucleus.tiff,SAMPLE4.spots.txt,SAMPLE4.membrane.tiff ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| Column | Description | +| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `sample` | Custom sample name. If multiple field-of-views (FOVs) are being processed for the same sample, their sample tags must be different. Must not contain spaces. | +| `nuclear_image` | Full path to nuclear image (DAPI, Hoechst). | +| `spot_table` | Full path to tsv or txt spot table provided by Resolve. Separator must be `\t`. | +| `membrane_image` | Full path to membrane image (e.g WGA) or second channel to help with segmentation (optional). | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. ## Running the pipeline -The typical command for running the pipeline is as follows: +The typical command for running the pipeline with default values is as follows: ```bash -nextflow run nf-core/molkart --input samplesheet.csv --outdir --genome GRCh37 -profile docker +nextflow run nf-core/molkart --input ./samplesheet.csv --outdir ./results -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -75,8 +60,11 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. -> ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -> The above pipeline run specified with a params file in yaml format: +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: + +The above pipeline run specified with a params file in yaml format: ```bash nextflow run nf-core/molkart -profile docker -params-file params.yaml @@ -85,15 +73,61 @@ nextflow run nf-core/molkart -profile docker -params-file params.yaml with `params.yaml` containing: ```yaml -input: './samplesheet.csv' -outdir: './results/' -genome: 'GRCh37' -input: 'data' -<...> +input: "./samplesheet.csv" +outdir: "./results/" +``` + +Additionally, `params.yaml` can contain optional parameters: + +```yaml +input: "./samplesheet.csv" +outdir: "./results/" +segmentation_method: "mesmer" +segmentation_min_area: null +segmentation_max_area: null +cellpose_save_flows: false +cellpose_diameter: 30 +cellpose_chan: 0 +cellpose_chan2: null +cellpose_pretrained_model: "cyto" +cellpose_custom_model: null +cellpose_flow_threshold: 0.4 +cellpose_edge_exclude: true +mesmer_image_mpp: 0.138 +mesmer_compartment: "whole-cell" +ilastik_pixel_project: null +ilastik_multicut_project: null +mindagap_tilesize: 2144 +mindagap_boxsize: 3 +mindagap_loopnum: 40 +mindagap_edges: false +skip_clahe: false +clahe_cliplimit: 0.01 +clahe_nbins: 256 +clahe_pixel_size: 0.138 +clahe_kernel: 25 +clahe_pyramid_tile: 1072 +create_training_subset: false +crop_amount: 4 +crop_nonzero_fraction: 0.4 +crop_size_x: 400 +crop_size_y: 400 ``` You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). +To run the pipeline so that the training subset is created with default values, run: + +```bash +nextflow run nf-core/molkart --input ./samplesheet.csv --outdir ./results -profile docker --create_training_subset +``` + +After training a Cellpose 2.0 model, or creating ilastik Pixel Classification and Multicut projects, make sure you match the parameters (e.g cell diameter, flow threshold) in the run to your training and continue the default pipeline run with: + +```bash +nextflow run nf-core/molkart --input ./samplesheet.csv --outdir ./results -profile docker --segmentation_method cellpose,ilastik --cellpose_custom_model /path/to/model --ilastik_pixel_project /path/to/pixel_classifier.ilp --ilastik_multicut_project /path/to/multicut.ilp +``` + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -110,13 +144,17 @@ First, go to the [nf-core/molkart releases page](https://github.com/nf-core/molk This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. -To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. +To further assist in reproducibility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -> 💡 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: ### `-profile` @@ -124,14 +162,16 @@ Use this parameter to choose a configuration profile. Profiles can give configur Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility -- currently, Conda is not supported for this pipeline. +::: The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer environment. - `test` - A profile with a complete configuration for automated testing @@ -149,7 +189,7 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. Currently not supported. ### `-resume` diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 9b34804..0000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,530 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import nextflow.Nextflow -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-apptainer', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - Nextflow.error('Exiting!') - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 25a0a74..e248e4c 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -3,6 +3,8 @@ // import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx class NfcoreTemplate { @@ -128,7 +130,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) @@ -140,12 +142,14 @@ class NfcoreTemplate { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { mail_cmd += [ '-A', mqc_report ] } mail_cmd.execute() << email_html @@ -154,14 +158,16 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") + FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() } // @@ -222,6 +228,20 @@ class NfcoreTemplate { } } + // + // Dump pipeline parameters in a json file + // + public static void dump_parameters(workflow, params) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() + } + // // Print pipeline summary on completion // diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 6e080d9..8a01ac7 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -20,40 +20,11 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Generate help string - // - public static String help(workflow, params) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params) - System.exit(0) - } // Print workflow version and exit on --version if (params.version) { @@ -62,14 +33,6 @@ class WorkflowMain { System.exit(0) } - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) @@ -89,6 +52,7 @@ class WorkflowMain { // // Get attribute from genome config file e.g. fasta // + /* public static Object getGenomeAttribute(params, attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { if (params.genomes[ params.genome ].containsKey(attribute)) { @@ -97,4 +61,5 @@ class WorkflowMain { } return null } + */ // TODO : } diff --git a/lib/WorkflowMolkart.groovy b/lib/WorkflowMolkart.groovy index f23ab88..55ef7a0 100755 --- a/lib/WorkflowMolkart.groovy +++ b/lib/WorkflowMolkart.groovy @@ -7,16 +7,19 @@ import groovy.text.SimpleTemplateEngine class WorkflowMolkart { + // // Check and validate parameters // public static void initialise(params, log) { +/* //TODO : remove genomeExistsError(params, log) if (!params.fasta) { Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." } +*/ } // @@ -46,15 +49,60 @@ class WorkflowMolkart { return yaml_file_text } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // + // Generate methods description for MultiQC + // + + public static String toolCitationText(params) { + + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "Mindagap (Guerreiro et al. 2023),", + params.segmentation_method.split(',').contains('mesmer') ? "Mesmer (Greenwald et al. 2021)," : "", + params.segmentation_method.split(',').contains('ilastik') ? "ilastik (Berg et al. 2019)," : "", + params.segmentation_method.split(',').contains('cellpose') ? "Cellpose (Stringer et al. 2021; Pachitariu et al 2022)," : "", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Guerreiro R, Wuennemann F & pvtodorov (2023). ViriatoII/MindaGap: v0.0.3 (0.0.3).
  • ", + params.segmentation_method.split(',').contains('mesmer') ? "
  • Greenwald NF, Miller G, Moen E, Kong A, Kagel A, Dougherty T, Fullaway CC, McIntosh BJ, Leow KX, Schwartz MS, Pavelchek C, Cui S, Camplisson I, Bar-Tal O, Singh J, Fong M, Chaudhry G, Abraham Z, Moseley J, Warshawsky S, Soon E, Greenbaum S, Risom T, Hollmann T, Bendall SC, Keren L, Graf W, Angelo M, Van Valen D. Whole-cell segmentation of tissue images with human-level performance using large-scale data annotation and deep learning. Nat Biotechnol. 2022 Apr;40(4):555-565. doi: 10.1038/s41587-021-01094-0. Epub 2021 Nov 18. PMID: 34795433; PMCID: PMC9010346.
  • " : "", + params.segmentation_method.split(',').contains('ilastik') ? "
  • Berg, S., Kutra, D., Kroeger, T. et al. ilastik: interactive machine learning for (bio)image analysis. Nat Methods 16, 1226–1232 (2019). https://doi.org/10.1038/s41592-019-0582-9
  • " : "", + params.segmentation_method.split(',').contains('cellpose') ? "
  • Stringer, C., Wang, T., Michaelos, M. et al. Cellpose: a generalist algorithm for cellular segmentation. Nat Methods 18, 100–106 (2021). https://doi.org/10.1038/s41592-020-01018-x
  • " : "", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() + // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + meta["tool_bibliography"] = toolBibliographyText(params) + + def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() diff --git a/main.nf b/main.nf index afd47ed..7b0635c 100644 --- a/main.nf +++ b/main.nf @@ -13,17 +13,26 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + VALIDATE & PRINT PARAMETER SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +include { validateParameters; paramsHelp; paramsSummaryLog; fromSamplesheet } from 'plugin/nf-validation' -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} WorkflowMain.initialise(workflow, params, log) diff --git a/modules.json b/modules.json index 42d5674..85bce5c 100644 --- a/modules.json +++ b/modules.json @@ -5,19 +5,44 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "cellpose": { + "branch": "master", + "git_sha": "0975c63a8ce4488c3259f595270b3f0d419abafe", + "installed_by": ["modules"] + }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", + "installed_by": ["modules"] + }, + "deepcell/mesmer": { + "branch": "master", + "git_sha": "81fcaa73c7f1668a1c289464da88bf5eff582bcd", + "installed_by": ["modules"] + }, + "ilastik/multicut": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "ilastik/pixelclassification": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "mindagap/duplicatefinder": { + "branch": "master", + "git_sha": "cdc42519bf7d6d2cac9b5f14cb56e0060477c69c", "installed_by": ["modules"] }, - "fastqc": { + "mindagap/mindagap": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "97dbec551f491ee562721008cdecd1e7a9a56b14", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", "installed_by": ["modules"] } } diff --git a/modules/local/clahe.nf b/modules/local/clahe.nf new file mode 100644 index 0000000..b4caf08 --- /dev/null +++ b/modules/local/clahe.nf @@ -0,0 +1,32 @@ +process CLAHE{ + tag "$meta.id" + label 'process_medium' + + container 'ghcr.io/schapirolabor/molkart-local:v0.0.4' + + input: + tuple val(meta), path(image) + + output: + tuple val(meta), path("*.tiff") , emit: img_clahe + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + apply_clahe.dask.py \\ + --input ${image} \\ + --output ${prefix}.tiff \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + molkart_clahe: \$(apply_clahe.dask.py --version) + END_VERSIONS + """ + +} diff --git a/modules/local/createanndata.nf b/modules/local/createanndata.nf new file mode 100644 index 0000000..a11e3d7 --- /dev/null +++ b/modules/local/createanndata.nf @@ -0,0 +1,33 @@ +process CREATE_ANNDATA { + tag "$meta.id" + label 'process_low' + + container 'ghcr.io/schapirolabor/molkart-local:v0.0.4' + + input: + tuple val(meta), path(spot2cell) + + output: + tuple val(meta), path("*.adata") , emit: stack + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + create_anndata.py \\ + --input ${spot2cell} \\ + --spatial_cols X_centroid Y_centroid \\ + --output ${prefix}.adata \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + molkart_createanndata: \$(create_anndata.py --version) + END_VERSIONS + """ +} diff --git a/modules/local/createstack.nf b/modules/local/createstack.nf new file mode 100644 index 0000000..993c5a4 --- /dev/null +++ b/modules/local/createstack.nf @@ -0,0 +1,32 @@ +process CREATE_STACK { + tag "$meta.id" + label 'process_low' + + container 'ghcr.io/schapirolabor/molkart-local:v0.0.4' + + input: + tuple val(meta), path(image) + + output: + tuple val(meta), path("*.ome.tif") , emit: stack + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + stack.py \\ + --input ${image} \\ + --output ${prefix}.ome.tif \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + molkart_stack: \$(stack.py --version) + END_VERSIONS + """ +} diff --git a/modules/local/crophdf5.nf b/modules/local/crophdf5.nf new file mode 100644 index 0000000..6d5229a --- /dev/null +++ b/modules/local/crophdf5.nf @@ -0,0 +1,34 @@ +process CROPHDF5 { + tag "$meta.id" + label 'process_single' + + container 'ghcr.io/schapirolabor/molkart-local:v0.0.4' + + input: + tuple val(meta), path(image_stack), val(num_channels) + + output: + tuple val(meta), path("*.hdf5"), emit: ilastik_training + tuple val(meta), path("*.txt") , emit: crop_summary + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + crop_hdf5.py \\ + --input $image_stack \\ + --output . \\ + --num_channels $num_channels \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + molkart_crophdf5: \$(crop_hdf5.py --version) + END_VERSIONS + """ +} diff --git a/modules/local/croptiff.nf b/modules/local/croptiff.nf new file mode 100644 index 0000000..7e3ca43 --- /dev/null +++ b/modules/local/croptiff.nf @@ -0,0 +1,34 @@ +process CROPTIFF { + tag "$meta.id" + label 'process_single' + + container 'ghcr.io/schapirolabor/molkart-local:v0.0.4' + + input: + tuple val(meta), path(image_stack) + tuple val(meta), path(crop_summary) + + output: + tuple val(meta), path("*.tiff"), emit: crop_tiff + tuple val(meta), path("*.png") , emit: overview + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + crop_tiff.py \\ + --input $image_stack \\ + --crop_summary $crop_summary \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + molkart_croptiff: \$(crop_tiff.py --version) + END_VERSIONS + """ +} diff --git a/modules/local/maskfilter.nf b/modules/local/maskfilter.nf new file mode 100644 index 0000000..97e73c3 --- /dev/null +++ b/modules/local/maskfilter.nf @@ -0,0 +1,47 @@ +process MASKFILTER { + tag "$meta.id" + label 'process_medium' + + container 'ghcr.io/schapirolabor/molkart-local:v0.0.4' + + input: + tuple val(meta), path(mask) + + output: + tuple val(meta), path("*.tif"), emit: filtered_mask + tuple val(meta), path("*.csv"), emit: filtered_qc + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + maskfilter.py \\ + --input ${mask} \\ + --output ${prefix}.tif \\ + --output_qc ${prefix}.csv \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + molkart_maskfilter: \$(maskfilter.py --version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.tif + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + molkart_maskfilter: \$(maskfilter.py --version) + END_VERSIONS + """ +} diff --git a/modules/local/molkartqc.nf b/modules/local/molkartqc.nf new file mode 100644 index 0000000..57fe8d4 --- /dev/null +++ b/modules/local/molkartqc.nf @@ -0,0 +1,49 @@ +process MOLKARTQC{ + tag "$meta.id" + label 'process_single' + + container 'ghcr.io/schapirolabor/molkart-local:v0.0.4' + + input: + tuple val(meta), path(spot_table), path(cellxgene_table), val(segmethod), path(filterqc) + + output: + tuple val(meta), path("*.csv"), emit: qc + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + collect_QC.py \\ + --cellxgene $cellxgene_table \\ + --spots $spot_table \\ + --sample_id $prefix \\ + --segmentation_method $segmethod \\ + --filterqc $filterqc \\ + --outdir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + molkartqc: \$(collect_QC.py --version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + molkartqc: \$(collect_QC.py --version) + END_VERSIONS + """ +} diff --git a/modules/local/molkartqcpng.nf b/modules/local/molkartqcpng.nf new file mode 100644 index 0000000..f5a44df --- /dev/null +++ b/modules/local/molkartqcpng.nf @@ -0,0 +1,43 @@ +process MOLKARTQCPNG { + label 'process_single' + + container 'ghcr.io/schapirolabor/molkart-local:v0.0.4' + + input: + path(png) + + output: + path("*.png") , emit: png_overview + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + collect_QC.py \\ + --png_overview $png \\ + --outdir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + molkartqc: \$(collect_QC.py --version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.png + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + molkartqc: \$(collect_QC.py --version) + END_VERSIONS + """ +} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index 15bc8b4..0000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/molkart/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/local/spot2cell.nf b/modules/local/spot2cell.nf new file mode 100644 index 0000000..4eab5ba --- /dev/null +++ b/modules/local/spot2cell.nf @@ -0,0 +1,35 @@ +process SPOT2CELL{ + debug true + tag "$meta.id" + label 'process_single' + + container 'ghcr.io/schapirolabor/molkart-local:v0.0.4' + + input: + tuple val(meta) , path(spot_table) + tuple val(meta2), path(cell_mask) + + output: + tuple val(meta), path("*.csv"), emit: cellxgene_table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + spot2cell.py \\ + --spot_table ${spot_table} \\ + --cell_mask ${cell_mask} \\ + --output ${prefix}.csv \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + molkart_spot2cell: \$(spot2cell.py --version) + END_VERSIONS + """ +} diff --git a/modules/local/tiffh5convert.nf b/modules/local/tiffh5convert.nf new file mode 100644 index 0000000..f3d38ab --- /dev/null +++ b/modules/local/tiffh5convert.nf @@ -0,0 +1,33 @@ +process TIFFH5CONVERT { + tag "$meta.id" + label 'process_single' + + container "ghcr.io/schapirolabor/molkart-local:v0.0.4" + + input: + tuple val(meta), path(image), val(num_channels) + + output: + tuple val(meta), path("*.hdf5"), emit: hdf5 + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + crop_hdf5.py \\ + --input $image \\ + --output . \\ + --num_channels $num_channels \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + molkart_crophdf5: \$(crop_hdf5.py --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cellpose/environment.yml b/modules/nf-core/cellpose/environment.yml new file mode 100644 index 0000000..f8ca8bc --- /dev/null +++ b/modules/nf-core/cellpose/environment.yml @@ -0,0 +1,5 @@ +name: cellpose +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/cellpose/main.nf b/modules/nf-core/cellpose/main.nf new file mode 100644 index 0000000..8fde76c --- /dev/null +++ b/modules/nf-core/cellpose/main.nf @@ -0,0 +1,57 @@ +process CELLPOSE { + tag "$meta.id" + label 'process_medium' + + container "docker.io/biocontainers/cellpose:2.2.2_cv2" + + input: + tuple val(meta), path(image) + path(model) + + output: + tuple val(meta), path("*masks.tif") , emit: mask + tuple val(meta), path("*flows.tif"), emit: flows, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "I did not manage to create a cellpose module in Conda that works in all OSes. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def model_command = model ? "--pretrained_model $model" : "" + def VERSION = '2.2.2' + """ + cellpose \ + --image_path $image \ + --save_tif \ + --verbose \ + $model_command \ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellpose: $VERSION + END_VERSIONS + """ + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "I did not manage to create a cellpose module in Conda that works in all OSes. Please use Docker / Singularity / Podman instead." + } + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "2.2.2" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}_cp_masks.tif + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellpose: $VERSION + END_VERSIONS + """ + +} diff --git a/modules/nf-core/cellpose/meta.yml b/modules/nf-core/cellpose/meta.yml new file mode 100644 index 0000000..32f59f9 --- /dev/null +++ b/modules/nf-core/cellpose/meta.yml @@ -0,0 +1,52 @@ +name: "cellpose" +description: cellpose segments cells in images +keywords: + - segmentation + - image + - cellpose +tools: + - "cellpose": + description: "cellpose is an anatomical segmentation algorithm written in Python 3 by Carsen Stringer and Marius Pachitariu" + homepage: "https://github.com/MouseLand/cellpose" + documentation: "https://cellpose.readthedocs.io/en/latest/command.html" + tool_dev_url: "https://github.com/MouseLand/cellpose" + doi: 10.1038/s41592-022-01663-4 + licence: ["BSD 3-Clause"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + (sample id) + - image: + type: file + description: tif file for ready for segmentation + pattern: "*.{tif,tiff}" + - model: + type: file + description: Optional input file. Cellpose 2 model trained by user using human-in-the-loop approach. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + [sample id] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - mask: + type: file + description: labelled mask output from cellpose in tif format + pattern: "*.{tif, tiff}" + - flows: + type: file + description: cell flow output from cellpose + pattern: "*.{tif}" + +authors: + - "@josenimo" + - "@FlowWuenne" +maintainers: + - "@josenimo" + - "@FlowWuenne" diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 0000000..9b3272b --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 800a609..f218761 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index c32657d..5f15a5f 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: @@ -16,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -30,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index e55b8d4..da03340 100755 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -4,11 +4,10 @@ """Provide functions to merge multiple versions.yml files.""" +import yaml import platform from textwrap import dedent -import yaml - def _make_versions_html(versions): """Generate a tabular HTML output of all versions for MultiQC.""" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 0000000..b1e1630 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 0000000..5f59a93 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Should run without failures": { + "content": [ + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] + ], + "timestamp": "2024-01-09T23:01:18.710682" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 0000000..405aa24 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/deepcell/mesmer/main.nf b/modules/nf-core/deepcell/mesmer/main.nf new file mode 100644 index 0000000..a62235d --- /dev/null +++ b/modules/nf-core/deepcell/mesmer/main.nf @@ -0,0 +1,39 @@ +process DEEPCELL_MESMER { + tag "$meta.id" + label 'process_low' + + container "docker.io/vanvalenlab/deepcell-applications:0.4.1" + + input: + tuple val(meta) , path(img) + tuple val(meta2), path(membrane_img) + + // Output a .tif image, don't touch versions + output: + tuple val(meta), path("*.tif"), emit: mask + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def membrane_command = membrane_img ? "--membrane-image $membrane_img" : "" + def VERSION = "0.4.1" + + """ + python /usr/src/app/run_app.py mesmer \\ + --squeeze \\ + --nuclear-image $img \\ + --output-directory . \\ + --output-name ${prefix}.tif \\ + $membrane_command \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deepcell_mesmer: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/deepcell/mesmer/meta.yml b/modules/nf-core/deepcell/mesmer/meta.yml new file mode 100644 index 0000000..dec360b --- /dev/null +++ b/modules/nf-core/deepcell/mesmer/meta.yml @@ -0,0 +1,56 @@ +name: "deepcell_mesmer" +description: Deepcell/mesmer segmentation for whole-cell +keywords: + - imaging + - spatial_omics + - segmentation +tools: + - "mesmer": + description: "Deep cell is a collection of tools to segment imaging data" + homepage: "https://github.com/vanvalenlab/deepcell-tf" + documentation: "https://github.com/vanvalenlab/intro-to-deepcell/tree/master/pretrained_models" + tool_dev_url: "https://githu/b.com/vanvalenlab/deepcell-tf" + doi: 10.1038/s41587-021-01094-0 + licence: ["APACHE2"] +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - img: + type: file + description: Multichannel image file + pattern: "*.{tiff,tif,h5,hdf5}" + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - membrane_img: + type: file + description: Optional membrane image to be provided separately. + pattern: "*.{tiff,tif,h5,hdf5}" +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - mask: + type: file + description: File containing the mask. + pattern: "*.{tif, tiff}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@migueLib" + - "@chiarasch" +maintainers: + - "@migueLib" + - "@chiarasch" diff --git a/modules/nf-core/deepcell/mesmer/tests/main.nf.test b/modules/nf-core/deepcell/mesmer/tests/main.nf.test new file mode 100644 index 0000000..9546c1e --- /dev/null +++ b/modules/nf-core/deepcell/mesmer/tests/main.nf.test @@ -0,0 +1,40 @@ +nextflow_process { + + name "Test Process DEEPCELL_MESMER" + script "../main.nf" + config "./nextflow.config" + process "DEEPCELL_MESMER" + + tag "modules" + tag "modules_nfcore" + tag "deepcell" + tag "deepcell/mesmer" + + test("mesmer - tif") { + + when { + process { + """ + input[0] = [ + [ id: 'test_img' ], + file(params.test_data['imaging']['segmentation']['image'], checkIfExists: true) + ] + input[1] = [ + [:], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.mask).match("mask") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/nf-core/deepcell/mesmer/tests/main.nf.test.snap b/modules/nf-core/deepcell/mesmer/tests/main.nf.test.snap new file mode 100644 index 0000000..e6ac463 --- /dev/null +++ b/modules/nf-core/deepcell/mesmer/tests/main.nf.test.snap @@ -0,0 +1,23 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,922bf813163d265f8a7f12fa09fc18c2" + ] + ], + "timestamp": "2023-12-06T11:11:13.513166311" + }, + "mask": { + "content": [ + [ + [ + { + "id": "test_img" + }, + "mask.tif:md5,1550535389bd24d4ea4a8288502b0afa" + ] + ] + ], + "timestamp": "2023-12-06T11:04:34.263500517" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepcell/mesmer/tests/nextflow.config b/modules/nf-core/deepcell/mesmer/tests/nextflow.config new file mode 100644 index 0000000..b0c3e0f --- /dev/null +++ b/modules/nf-core/deepcell/mesmer/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + + withName: "DEEPCELL_MESMER" { + ext.prefix = 'mask' + ext.args = '--image-mpp=0.65 --compartment=whole-cell --nuclear-channel 0 --membrane-channel 1' + } + +} + +docker.runOptions = '--entrypoint ""' +singularity.runOptions = '-B "$HOME"' diff --git a/modules/nf-core/deepcell/mesmer/tests/tags.yml b/modules/nf-core/deepcell/mesmer/tests/tags.yml new file mode 100644 index 0000000..002647b --- /dev/null +++ b/modules/nf-core/deepcell/mesmer/tests/tags.yml @@ -0,0 +1,2 @@ +deepcell/mesmer: + - "modules/nf-core/deepcell/mesmer/**" diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf deleted file mode 100644 index 9ae5838..0000000 --- a/modules/nf-core/fastqc/main.nf +++ /dev/null @@ -1,51 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::fastqc=0.11.9" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // Make list of old name and new name pairs to use for renaming in the bash while loop - def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } - def rename_to = old_new_pairs*.join(' ').join(' ') - def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') - """ - printf "%s %s\\n" $rename_to | while read old_name new_name; do - [ -f "\${new_name}" ] || ln -s \$old_name \$new_name - done - fastqc $args --threads $task.cpus $renamed_files - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml deleted file mode 100644 index 4da5bb5..0000000 --- a/modules/nf-core/fastqc/meta.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: fastqc -description: Run FastQC on sequenced reads -keywords: - - quality control - - qc - - adapters - - fastq -tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ["GPL-2.0-only"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/ilastik/multicut/environment.yml b/modules/nf-core/ilastik/multicut/environment.yml new file mode 100644 index 0000000..ecd6fa3 --- /dev/null +++ b/modules/nf-core/ilastik/multicut/environment.yml @@ -0,0 +1,5 @@ +name: ilastik_multicut +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/ilastik/multicut/main.nf b/modules/nf-core/ilastik/multicut/main.nf new file mode 100644 index 0000000..f792487 --- /dev/null +++ b/modules/nf-core/ilastik/multicut/main.nf @@ -0,0 +1,58 @@ +process ILASTIK_MULTICUT { + tag "$meta.id" + label 'process_low' + + container "docker.io/biocontainers/ilastik:1.4.0_cv1" + + input: + tuple val(meta), path(h5) + tuple val(meta2), path (ilp) + tuple val(meta3), path (probs) + + output: + tuple val(meta), path("*.tiff") , emit: out_tiff + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "ILASTIK_MULTICUT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + /opt/ilastik-1.4.0-Linux/run_ilastik.sh \\ + --headless \\ + --readonly 1 \\ + --project=$ilp \\ + --raw_data=$h5 \\ + --probabilities=$probs \\ + --export_source="Multicut Segmentation" \\ + --output_filename_format=${prefix}.tiff \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ilastik: \$(/opt/ilastik-1.4.0-Linux/run_ilastik.sh --headless --version) + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "ILASTIK_MULTICUT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.4.0" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.tiff + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ilastik:: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/ilastik/multicut/meta.yml b/modules/nf-core/ilastik/multicut/meta.yml new file mode 100644 index 0000000..cb2af37 --- /dev/null +++ b/modules/nf-core/ilastik/multicut/meta.yml @@ -0,0 +1,59 @@ +name: "ilastik_multicut" +description: Ilastik is a tool that utilizes machine learning algorithms to classify pixels, segment, track and count cells in images. Ilastik contains a graphical user interface to interactively label pixels. However, this nextflow module will implement the --headless mode, to apply pixel classification using a pre-trained .ilp file on an input image. +keywords: + - multicut + - segmentation + - pixel classification +tools: + - "ilastik": + description: "Ilastik is a user friendly tool that enables pixel classification, segmentation and analysis." + homepage: "https://www.ilastik.org/" + documentation: "https://www.ilastik.org/documentation/" + tool_dev_url: "https://github.com/ilastik/ilastik" + licence: "GPL3" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - h5: + type: file + description: h5 file containing image stack to classify file + pattern: "*.{h5,hdf5}" + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ilp: + type: file + description: Trained ilastik .ilp project file + pattern: "*.{ilp}" + - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - probs: + type: file + description: Probability map for boundary based segmentation + pattern: "*.{h5,,hdf5}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - out_tiff: + type: file + description: Multicut segmentation mask output. + pattern: "*.{tiff}" +authors: + - "@FloWuenne" +maintainers: + - "@FloWuenne" diff --git a/modules/nf-core/ilastik/pixelclassification/environment.yml b/modules/nf-core/ilastik/pixelclassification/environment.yml new file mode 100644 index 0000000..a8dafc2 --- /dev/null +++ b/modules/nf-core/ilastik/pixelclassification/environment.yml @@ -0,0 +1,5 @@ +name: ilastik_pixelclassification +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/ilastik/pixelclassification/main.nf b/modules/nf-core/ilastik/pixelclassification/main.nf new file mode 100644 index 0000000..2748283 --- /dev/null +++ b/modules/nf-core/ilastik/pixelclassification/main.nf @@ -0,0 +1,58 @@ +process ILASTIK_PIXELCLASSIFICATION { + tag "$meta.id" + label 'process_single' + + container "docker.io/biocontainers/ilastik:1.4.0_cv1" + + input: + tuple val(meta), path(input_img) + tuple val(meta2), path(ilp) + + output: + tuple val(meta), path("*.${suffix}") , emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "ILASTIK_PIXELCLASSIFICATION module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "h5" + + """ + /opt/ilastik-1.4.0-Linux/run_ilastik.sh \\ + --headless \\ + --readonly 1 \\ + --project=$ilp \\ + --output_filename_format=${prefix}.${suffix} \\ + $args \\ + $input_img + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ilastik: \$(/opt/ilastik-1.4.0-Linux/run_ilastik.sh --headless --version) + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "ILASTIK_PIXELCLASSIFICATION module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "h5" + + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ilastik:: \$(/opt/ilastik-1.4.0-Linux/run_ilastik.sh --headless --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/ilastik/pixelclassification/meta.yml b/modules/nf-core/ilastik/pixelclassification/meta.yml new file mode 100644 index 0000000..6a9e8ba --- /dev/null +++ b/modules/nf-core/ilastik/pixelclassification/meta.yml @@ -0,0 +1,48 @@ +name: "ilastik_pixelclassification" +description: Ilastik is a tool that utilizes machine learning algorithms to classify pixels, segment, track and count cells in images. Ilastik contains a graphical user interface to interactively label pixels. However, this nextflow module will implement the --headless mode, to apply pixel classification using a pre-trained .ilp file on an input image. +keywords: + - pixel_classification + - segmentation + - probability_maps +tools: + - "ilastik": + description: "Ilastik is a user friendly tool that enables pixel classification, segmentation and analysis." + homepage: "https://www.ilastik.org/" + documentation: "https://www.ilastik.org/documentation/" + tool_dev_url: "https://github.com/ilastik/ilastik" + licence: "GPL3" +input: + - meta: + type: map + description: | + Groovy Map containing sample information for h5 file + e.g. [ id:'test', single_end:false ] + - input_img: + type: file + description: Input img file containing image stack to classify + - meta2: + type: map + description: | + Groovy Map containing sample information for ilp file + e.g. [ id:'test', single_end:false ] + - ilp: + type: file + description: Trained ilastik pixel classification .ilp project file + pattern: "*.{ilp}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: Output file from ilastik pixel classification. +authors: + - "@FloWuenne" +maintainers: + - "@FloWuenne" diff --git a/modules/nf-core/mindagap/duplicatefinder/environment.yml b/modules/nf-core/mindagap/duplicatefinder/environment.yml new file mode 100644 index 0000000..ad98152 --- /dev/null +++ b/modules/nf-core/mindagap/duplicatefinder/environment.yml @@ -0,0 +1,7 @@ +name: "MINDAGAP_DUPLICATEFINDER" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::mindagap=0.0.2" diff --git a/modules/nf-core/mindagap/duplicatefinder/main.nf b/modules/nf-core/mindagap/duplicatefinder/main.nf new file mode 100644 index 0000000..ad7ce5b --- /dev/null +++ b/modules/nf-core/mindagap/duplicatefinder/main.nf @@ -0,0 +1,33 @@ +process MINDAGAP_DUPLICATEFINDER { + tag "$meta.id" + label 'process_single' + + conda "bioconda::mindagap=0.0.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mindagap:0.0.2--pyhdfd78af_1': + 'biocontainers/mindagap:0.0.2--pyhdfd78af_1' }" + + input: + tuple val(meta), path(spot_table) + + output: + tuple val(meta), path("*markedDups.txt"), emit: marked_dups_spots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + duplicate_finder.py \\ + $spot_table \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mindagap: \$(mindagap.py test -v) + END_VERSIONS + """ +} diff --git a/modules/nf-core/mindagap/duplicatefinder/meta.yml b/modules/nf-core/mindagap/duplicatefinder/meta.yml new file mode 100644 index 0000000..b92d419 --- /dev/null +++ b/modules/nf-core/mindagap/duplicatefinder/meta.yml @@ -0,0 +1,44 @@ +name: "MINDAGAP_DUPLICATEFINDER" +description: marks duplicate spots along gridline edges. +keywords: + - imaging + - resolve_bioscience + - spatial_transcriptomics +tools: + - "mindagap": + description: "Takes a single panorama image and fills the empty grid lines with neighbour-weighted values." + homepage: "https://github.com/ViriatoII/MindaGap/blob/main/README.md" + documentation: "https://github.com/ViriatoII/MindaGap/blob/main/README.md" + tool_dev_url: "https://github.com/ViriatoII/MindaGap" + licence: ["BSD 3-clause License"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - spot_table: + type: file + description: tsv file containing one spot per row with order x,y,z,gene without column header. + pattern: "*.{tsv,txt}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - marked_dups_spots: + type: file + description: tsv file containing one spot per row, with duplicated spots labeled with "Duplicated" in their gene column. + pattern: "*.{markedDups.txt}" + +authors: + - "@FloWuenne" +maintainers: + - "@FloWuenne" diff --git a/modules/nf-core/mindagap/duplicatefinder/tests/main.nf.test b/modules/nf-core/mindagap/duplicatefinder/tests/main.nf.test new file mode 100644 index 0000000..d43a7de --- /dev/null +++ b/modules/nf-core/mindagap/duplicatefinder/tests/main.nf.test @@ -0,0 +1,34 @@ +nextflow_process { + + name "Test Process MINDAGAP_DUPLICATEFINDER" + script "../main.nf" + config "./nextflow.config" + process "MINDAGAP_DUPLICATEFINDER" + tag "modules" + tag "modules_nfcore" + tag "mindagap" + tag "mindagap/duplicatefinder" + + test("test_mindagap_duplicatefinder_spots") { + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file('https://raw.githubusercontent.com/nf-core/test-datasets/molkart/test_data/input_data/spots.txt') + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mindagap/duplicatefinder/tests/main.nf.test.snap b/modules/nf-core/mindagap/duplicatefinder/tests/main.nf.test.snap new file mode 100644 index 0000000..a1af5c2 --- /dev/null +++ b/modules/nf-core/mindagap/duplicatefinder/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "test_mindagap_duplicatefinder_spots": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "spots_markedDups.txt:md5,4562caad05850d7dd7b6e9235e068a8b" + ] + ], + "1": [ + "versions.yml:md5,ae112b853ec32ee1c5eecaf421d01003" + ], + "marked_dups_spots": [ + [ + { + "id": "test" + }, + "spots_markedDups.txt:md5,4562caad05850d7dd7b6e9235e068a8b" + ] + ], + "versions": [ + "versions.yml:md5,ae112b853ec32ee1c5eecaf421d01003" + ] + } + ], + "timestamp": "2023-11-30T22:56:20.101101751" + } +} \ No newline at end of file diff --git a/modules/nf-core/mindagap/duplicatefinder/tests/nextflow.config b/modules/nf-core/mindagap/duplicatefinder/tests/nextflow.config new file mode 100644 index 0000000..99963ab --- /dev/null +++ b/modules/nf-core/mindagap/duplicatefinder/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: "MINDAGAP_DUPLICATEFINDER" { + ext.args = '90' + } + +} diff --git a/modules/nf-core/mindagap/duplicatefinder/tests/tags.yml b/modules/nf-core/mindagap/duplicatefinder/tests/tags.yml new file mode 100644 index 0000000..6f4a474 --- /dev/null +++ b/modules/nf-core/mindagap/duplicatefinder/tests/tags.yml @@ -0,0 +1,2 @@ +mindagap/duplicatefinder: + - modules/nf-core/mindagap/duplicatefinder/** diff --git a/modules/nf-core/mindagap/mindagap/environment.yml b/modules/nf-core/mindagap/mindagap/environment.yml new file mode 100644 index 0000000..f380fee --- /dev/null +++ b/modules/nf-core/mindagap/mindagap/environment.yml @@ -0,0 +1,7 @@ +name: mindagap_mindagap +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::mindagap=0.0.2 diff --git a/modules/nf-core/mindagap/mindagap/main.nf b/modules/nf-core/mindagap/mindagap/main.nf new file mode 100644 index 0000000..3f06996 --- /dev/null +++ b/modules/nf-core/mindagap/mindagap/main.nf @@ -0,0 +1,44 @@ +process MINDAGAP_MINDAGAP { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mindagap:0.0.2--pyhdfd78af_1' : + 'biocontainers/mindagap:0.0.2--pyhdfd78af_1' }" + + input: + tuple val(meta), path(panorama) + + output: + tuple val(meta), path("*.{tif,tiff}"), emit: tiff + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mindagap.py \\ + $panorama \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mindagap: \$(mindagap.py test -v) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${panorama.baseName}_gridfilled.tiff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mindagap: \$(mindagap.py test -v) + END_VERSIONS + """ +} diff --git a/modules/nf-core/mindagap/mindagap/meta.yml b/modules/nf-core/mindagap/mindagap/meta.yml new file mode 100644 index 0000000..efd0000 --- /dev/null +++ b/modules/nf-core/mindagap/mindagap/meta.yml @@ -0,0 +1,43 @@ +name: "mindagap_mindagap" +description: Takes a single panorama image and fills the empty grid lines with neighbour-weighted values. +keywords: + - imaging + - resolve_bioscience + - spatial_transcriptomics +tools: + - "mindagap": + description: "Mindagap is a collection of tools to process multiplexed FISH data, such as produced by Resolve Biosciences Molecular Cartography." + homepage: "https://github.com/ViriatoII/MindaGap" + documentation: "https://github.com/ViriatoII/MindaGap/blob/main/README.md" + tool_dev_url: "https://github.com/ViriatoII/MindaGap" + licence: ["BSD-3-Clause license"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - panorama: + type: file + description: A tiff file containing gridlines as produced by Molecular Cartography imaging. + pattern: "*.{tif,tiff}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tiff: + type: file + description: A tiff file with gridlines filled based on consecutive gaussian blurring. + pattern: "*.{tiff}" +authors: + - "@ViriatoII" + - "@flowuenne" +maintainers: + - "@ViriatoII" + - "@flowuenne" diff --git a/modules/nf-core/mindagap/mindagap/tests/main.nf.test b/modules/nf-core/mindagap/mindagap/tests/main.nf.test new file mode 100644 index 0000000..dbad491 --- /dev/null +++ b/modules/nf-core/mindagap/mindagap/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process MINDAGAP_MINDAGAP" + script "../main.nf" + config "./nextflow.config" + process "MINDAGAP_MINDAGAP" + + tag "modules" + tag "modules_nfcore" + tag "mindagap" + tag "mindagap/mindagap" + + test("mindgap - tiff") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['imaging']['tiff']['mouse_heart_wga'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tiff).match("tiff") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/nf-core/mindagap/mindagap/tests/main.nf.test.snap b/modules/nf-core/mindagap/mindagap/tests/main.nf.test.snap new file mode 100644 index 0000000..f799a14 --- /dev/null +++ b/modules/nf-core/mindagap/mindagap/tests/main.nf.test.snap @@ -0,0 +1,23 @@ +{ + "tiff": { + "content": [ + [ + [ + { + "id": "test" + }, + "mindagap.mouse_heart.wga_gridfilled.tiff:md5,310cf0017baa54af32176b43a5b0adfd" + ] + ] + ], + "timestamp": "2023-12-15T11:01:20.825556802" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,937acaba2cb90efc2705b71839e6cefc" + ] + ], + "timestamp": "2023-12-15T11:01:20.840211732" + } +} \ No newline at end of file diff --git a/modules/nf-core/mindagap/mindagap/tests/nextflow.config b/modules/nf-core/mindagap/mindagap/tests/nextflow.config new file mode 100644 index 0000000..0991d4f --- /dev/null +++ b/modules/nf-core/mindagap/mindagap/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: "MINDAGAP_MINDAGAP" { + ext.args = "3 40 --Xtilesize 2144" + } +} diff --git a/modules/nf-core/mindagap/mindagap/tests/tags.yml b/modules/nf-core/mindagap/mindagap/tests/tags.yml new file mode 100644 index 0000000..6f1837d --- /dev/null +++ b/modules/nf-core/mindagap/mindagap/tests/tags.yml @@ -0,0 +1,2 @@ +mindagap/mindagap: + - "modules/nf-core/mindagap/mindagap/**" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 0000000..7625b75 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 4b60474..1b9f7c4 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml @@ -41,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee..45a9bc3 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +12,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +29,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +51,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 0000000..d0438ed --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,83 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match() } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 0000000..d37e730 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,21 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + ] + ], + "timestamp": "2024-01-09T23:02:49.911994" + }, + "sarscov2 single-end [fastqc] - stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + ] + ], + "timestamp": "2024-01-09T23:03:14.524346" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 0000000..bea6c0d --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/nextflow.config b/nextflow.config index 9986acb..b7a0dbd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,15 +9,45 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options input = null + // Segmentation command line flags + segmentation_method = 'mesmer' + segmentation_min_area = null + segmentation_max_area = null + cellpose_save_flows = false + cellpose_diameter = 30 + cellpose_chan = 0 + cellpose_chan2 = null + cellpose_pretrained_model = 'cyto' + cellpose_custom_model = null + cellpose_flow_threshold = 0.4 + cellpose_edge_exclude = true + mesmer_image_mpp = 0.138 + mesmer_compartment = 'whole-cell' + ilastik_pixel_project = null + ilastik_multicut_project = null + + // Preprocessing command line flags + mindagap_tilesize = 2144 + mindagap_boxsize = 3 + mindagap_loopnum = 40 + mindagap_edges = false + skip_clahe = false + clahe_cliplimit = 0.01 + clahe_nbins = 256 + clahe_pixel_size = 0.138 + clahe_kernel = 25 + clahe_pyramid_tile = 1072 + + // Training subset command line + create_training_subset = false + crop_amount = 4 + crop_nonzero_fraction = 0.4 + crop_size_x = 400 + crop_size_y = 400 - // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = false // MultiQC options multiqc_config = null multiqc_title = null @@ -27,7 +57,6 @@ params { // Boilerplate options outdir = null - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -36,18 +65,14 @@ params { hook_url = null help = false version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null // Max resource options @@ -56,6 +81,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -75,13 +107,12 @@ try { // } catch (Exception e) { // System.err.println("WARNING: Could not load nf-core/config/molkart profiles: ${params.custom_config_base}/pipeline/molkart.config") // } - - profiles { debug { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' - cleanup = false + cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -104,17 +135,16 @@ profiles { } docker { docker.enabled = true - docker.registry = 'quay.io' - docker.userEmulation = true conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + docker.runOptions = '--entrypoint ""' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true @@ -128,7 +158,6 @@ profiles { } podman { podman.enabled = true - podman.registry = 'quay.io' conda.enabled = false docker.enabled = false singularity.enabled = false @@ -156,6 +185,7 @@ profiles { } apptainer { apptainer.enabled = true + apptainer.autoMounts = true conda.enabled = false docker.enabled = false singularity.enabled = false @@ -165,22 +195,26 @@ profiles { } gitpod { executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' -// Load igenomes.config if required -if (!params.igenomes_ignore) { - includeConfig 'conf/igenomes.config' -} else { - params.genomes = [:] +// Nextflow plugins +plugins { + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -195,32 +229,35 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { name = 'nf-core/molkart' - author = """@FloWuenne""" + author = """@kbestak, @FloWuenne""" homePage = 'https://github.com/nf-core/molkart' description = """An analysis pipeline for Molecular Cartography data from Resolve Biosciences.""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' - version = '1.0dev' + nextflowVersion = '!>=23.04.0' + version = '1.0.0' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index e50b8fd..0d94107 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,6 +5,210 @@ "description": "An analysis pipeline for Molecular Cartography data from Resolve Biosciences.", "type": "object", "definitions": { + "segmentation_methods_and_options": { + "title": "Segmentation methods and options", + "type": "object", + "description": "Define which segmentation methods should be used and how.", + "default": "", + "properties": { + "segmentation_method": { + "type": "string", + "description": "List of segmentation tools to apply to the image written as a comma separated string: mesmer,cellpose,ilastik would run all three options.", + "default": "mesmer", + "fa_icon": "fas fa-border-all" + }, + "segmentation_min_area": { + "type": "integer", + "description": "Minimum area size (in pixels) for segmentation masks.", + "fa_icon": "fas fa-chart-area" + }, + "segmentation_max_area": { + "type": "integer", + "description": "Maximum area size (in pixels) for segmenation masks.", + "fa_icon": "fas fa-chart-area" + }, + "cellpose_diameter": { + "type": "integer", + "default": 30, + "description": "Cell diameter, if 0 will use the diameter of the training labels used in the model, or with built-in model will estimate diameter for each image.", + "fa_icon": "far fa-circle" + }, + "cellpose_chan": { + "type": "integer", + "default": 0, + "description": "Specifies the channel to be segmented by Cellpose.", + "fa_icon": "fas fa-dice-one" + }, + "cellpose_chan2": { + "type": "integer", + "description": "Specifies nuclear channel index for Cellpose if using pretrained models such as cyto.", + "fa_icon": "fas fa-dice-two" + }, + "cellpose_pretrained_model": { + "type": "string", + "default": "cyto", + "description": "Pretrained Cellpose model to be used for segmentation.", + "fa_icon": "fas fa-address-card" + }, + "cellpose_custom_model": { + "type": "string", + "description": "Custom Cellpose model can be provided by the user.", + "fa_icon": "far fa-address-card" + }, + "cellpose_flow_threshold": { + "type": "number", + "default": 0.4, + "description": "Flow error threshold for Cellpose.", + "fa_icon": "fas fa-border-style" + }, + "cellpose_edge_exclude": { + "type": "boolean", + "default": true, + "description": "Should cells detected near image edges be excluded.", + "fa_icon": "fas fa-border-none" + }, + "cellpose_save_flows": { + "type": "boolean", + "description": "Should flow fields from Cellpose be saved?", + "fa_icon": "fas fa-arrows-alt", + "hidden": true + }, + "mesmer_image_mpp": { + "type": "number", + "default": 0.138, + "description": "Pixel size in microns for segmentation with Mesmer.", + "fa_icon": "fas fa-camera" + }, + "mesmer_compartment": { + "type": "string", + "default": "whole-cell", + "description": "Compartment to be segmented with Mesmer (nuclear, whole-cell)", + "fa_icon": "fas fa-border-all" + }, + "ilastik_pixel_project": { + "type": "string", + "description": "Provide ilastik with a pixel classification project to produce probability maps.", + "format": "file-path", + "fa_icon": "fas fa-camera" + }, + "ilastik_multicut_project": { + "type": "string", + "description": "Provide ilastik with a multicut project to create segmentation masks.", + "format": "file-path", + "fa_icon": "fas fa-cut" + } + }, + "required": ["segmentation_method"], + "fa_icon": "fas fa-border-all" + }, + "image_preprocessing": { + "title": "Image preprocessing", + "type": "object", + "description": "Defines gridsize for Mindagap and should contrast adjustment be applied and how.", + "default": "", + "fa_icon": "fas fa-adjust", + "properties": { + "mindagap_boxsize": { + "type": "integer", + "default": 3, + "description": "Box size used by Mindagap to overcome gaps, a larger number allows to overcome large gaps, but results in less fine details in the filled grid.", + "fa_icon": "fab fa-dropbox" + }, + "mindagap_loopnum": { + "type": "integer", + "default": 40, + "description": "Loop number performed by Mindagap. Lower values are faster, but the result is less good.", + "fa_icon": "fas fa-circle-notch" + }, + "clahe_cliplimit": { + "type": "number", + "default": 0.01, + "description": "Contrast limit for localized changes in contrast by CLAHE.", + "fa_icon": "fas fa-border-style" + }, + "clahe_nbins": { + "type": "integer", + "default": 256, + "description": "Number of histogram bins to be used by CLAHE.", + "fa_icon": "fas fa-ruler-combined" + }, + "clahe_pixel_size": { + "type": "number", + "default": 0.138, + "description": "Pixel size to be used by CLAHE.", + "fa_icon": "fas fa-camera" + }, + "clahe_kernel": { + "type": "number", + "default": 25, + "description": "Kernel size to be used by CLAHE.", + "fa_icon": "far fa-object-group" + }, + "skip_clahe": { + "type": "boolean", + "description": "Specifies whether contrast-limited adaptive histogram equalization should be skipped.", + "fa_icon": "fab fa-buromobelexperte" + }, + "mindagap_tilesize": { + "type": "integer", + "default": 2144, + "description": "Tile size (distance between gridlines) for Mindagap.", + "fa_icon": "fas fa-th", + "hidden": true + }, + "mindagap_edges": { + "type": "boolean", + "description": "Should Mindagap blur area around grid for smoother transitions between tiles with different exposures.", + "fa_icon": "fas fa-th-large", + "hidden": true + }, + "clahe_pyramid_tile": { + "type": "integer", + "description": "Tile size used for pyramid generation (must be divisible by 16).", + "fa_icon": "fas fa-cubes", + "hidden": true, + "default": 1072 + } + } + }, + "training_subset_options": { + "title": "Training subset options", + "type": "object", + "description": "Define whether a cropped training set for segmentation methods should be created.", + "default": "", + "fa_icon": "fas fa-running", + "properties": { + "create_training_subset": { + "type": "boolean", + "description": "Create subset for training a segmentation model.", + "fa_icon": "fas fa-running" + }, + "crop_size_x": { + "type": "integer", + "default": 400, + "description": "Indicates crop size on x axis.", + "fa_icon": "fas fa-crop-alt" + }, + "crop_size_y": { + "type": "integer", + "default": 400, + "description": "Indicates crop size on y axis.", + "fa_icon": "fas fa-crop-alt" + }, + "crop_amount": { + "type": "integer", + "default": 4, + "description": "Number of crops you would like to extract.", + "fa_icon": "fas fa-hashtag" + }, + "crop_nonzero_fraction": { + "type": "number", + "default": 0.4, + "description": "Indicates fraction of pixels per crop above global threshold to ensure tissue and not only background is selected.", + "fa_icon": "fas fa-percentage" + } + } + }, "input_output_options": { "title": "Input/output options", "type": "object", @@ -15,12 +219,13 @@ "input": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/molkart/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" + "fa_icon": "fas fa-file-csv", + "schema": "/assets/schema_input.json" }, "outdir": { "type": "string", @@ -40,45 +245,8 @@ "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" } - } - }, - "reference_genome_options": { - "title": "Reference genome options", - "type": "object", - "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow.", - "properties": { - "genome": { - "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." - }, - "fasta": { - "type": "string", - "format": "file-path", - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" - }, - "igenomes_base": { - "type": "string", - "format": "directory-path", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", - "fa_icon": "fas fa-cloud-download-alt", - "hidden": true - }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." - } - } + }, + "help_text": "" }, "institutional_config_options": { "title": "Institutional config options", @@ -157,7 +325,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -228,6 +396,7 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true @@ -243,13 +412,6 @@ "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -257,22 +419,42 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/definitions/segmentation_methods_and_options" + }, + { + "$ref": "#/definitions/image_preprocessing" }, { - "$ref": "#/definitions/reference_genome_options" + "$ref": "#/definitions/training_subset_options" + }, + { + "$ref": "#/definitions/input_output_options" }, { "$ref": "#/definitions/institutional_config_options" diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..870799d --- /dev/null +++ b/nf-test.config @@ -0,0 +1,8 @@ +config { + + testsDir "tests" + workDir ".nf-test" + configFile "tests/nextflow.config" + profile "" + +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 0aecf87..0000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,44 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .set { reads } - - emit: - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - - // add path(s) of the fastq file(s) to the meta map - def fastq_meta = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return fastq_meta -} diff --git a/tests/main.nf.test b/tests/main.nf.test new file mode 100644 index 0000000..48d5546 --- /dev/null +++ b/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "../main.nf" + config "./nextflow.config" + tag "pipeline" + tag "pipeline_molkart" + + test("Nuclear channel, mesmer and cellpose, without clahe") { + + when { + params { + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/molkart/test_data/samplesheets/samplesheet_nuclear.csv' + outdir = "$outputDir" + skip_clahe = true + mindagap_tilesize = 90 + mindagap_boxsize = 7 + mindagap_loopnum = 100 + segmentation_method = "cellpose,mesmer" + } + } + + then { + assert workflow.success + assert snapshot( + path("$outputDir/mindagap/nuc_only_nuclear_gridfilled.tiff"), + path("$outputDir/mindagap/nuc_only_spots_markedDups.txt"), + path("$outputDir/segmentation/cellpose/nuc_only_cellpose_mask.tif"), + path("$outputDir/segmentation/mesmer/nuc_only_mesmer_mask.tif"), + path("$outputDir/segmentation/filtered_masks/nuc_only_mesmer_filtered.tif"), + path("$outputDir/segmentation/filtered_masks/nuc_only_cellpose_filtered.tif"), + path("$outputDir/spot2cell/cellxgene_nuc_only_cellpose.csv"), + path("$outputDir/spot2cell/cellxgene_nuc_only_mesmer.csv"), + path("$outputDir/anndata/nuc_only_cellpose.adata"), + path("$outputDir/anndata/nuc_only_mesmer.adata"), + path("$outputDir/molkartqc/nuc_only.cellpose.spot_QC.csv"), + path("$outputDir/molkartqc/nuc_only.mesmer.spot_QC.csv") + ).match() + assert file("$outputDir/multiqc/multiqc_report.html").exists() + } + } + + test("Two channels, mesmer and cellpose, with clahe") { + + when { + params { + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/molkart/test_data/samplesheets/samplesheet_membrane.csv' + outdir = "$outputDir" + mindagap_tilesize = 90 + mindagap_boxsize = 7 + mindagap_loopnum = 100 + clahe_pyramid_tile = 368 + segmentation_method = "cellpose,mesmer" + } + } + + then { + assert workflow.success + assert snapshot( + path("$outputDir/mindagap/mem_only_membrane_gridfilled.tiff"), + path("$outputDir/mindagap/mem_only_nuclear_gridfilled.tiff"), + path("$outputDir/mindagap/mem_only_spots_markedDups.txt"), + path("$outputDir/segmentation/cellpose/mem_only_cellpose_mask.tif"), + path("$outputDir/segmentation/mesmer/mem_only_mesmer_mask.tif"), + path("$outputDir/segmentation/filtered_masks/mem_only_mesmer_filtered.tif"), + path("$outputDir/segmentation/filtered_masks/mem_only_cellpose_filtered.tif"), + path("$outputDir/spot2cell/cellxgene_mem_only_cellpose.csv"), + path("$outputDir/spot2cell/cellxgene_mem_only_mesmer.csv"), + path("$outputDir/anndata/mem_only_cellpose.adata"), + path("$outputDir/anndata/mem_only_mesmer.adata"), + path("$outputDir/molkartqc/mem_only.cellpose.spot_QC.csv"), + path("$outputDir/molkartqc/mem_only.mesmer.spot_QC.csv"), + ).match() + assert file("$outputDir/stack/mem_only_stack.ome.tif").exists() + assert file("$outputDir/clahe/mem_only_membrane_gridfilled_clahe.tiff").exists() + assert file("$outputDir/clahe/mem_only_nuclear_gridfilled_clahe.tiff").exists() + assert file("$outputDir/multiqc/multiqc_report.html").exists() + } + } + + test("Create training subset") { + + when { + params { + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/molkart/test_data/samplesheets/samplesheet_nuclear.csv' + outdir = "$outputDir" + mindagap_tilesize = 90 + mindagap_boxsize = 7 + mindagap_loopnum = 100 + clahe_pyramid_tile = 368 + create_training_subset = true + crop_amount = 2 + crop_size_x = 10 + crop_size_y = 10 + } + } + + then { + assert workflow.success + assert snapshot( + path("$outputDir/mindagap/nuc_only_nuclear_gridfilled.tiff"), + ).match() + assert file("$outputDir/clahe/nuc_only_nuclear_gridfilled_clahe.tiff").exists() + assert file("$outputDir/training_subset/hdf5/nuc_only_nuclear_gridfilled_clahe_crop0.hdf5").exists() + assert file("$outputDir/training_subset/hdf5/nuc_only_nuclear_gridfilled_clahe_crop1.hdf5").exists() + assert file("$outputDir/training_subset/tiff/nuc_only_nuclear_gridfilled_clahe_crop0.tiff").exists() + assert file("$outputDir/training_subset/tiff/nuc_only_nuclear_gridfilled_clahe_crop1.tiff").exists() + assert file("$outputDir/multiqc/crop_overview.png").exists() + } + } +} diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap new file mode 100644 index 0000000..58e4a17 --- /dev/null +++ b/tests/main.nf.test.snap @@ -0,0 +1,43 @@ +{ + "Two channels, mesmer and cellpose, with clahe": { + "content": [ + "mem_only_membrane_gridfilled.tiff:md5,6e24160d758468d2de07ca200dfb62c2", + "mem_only_nuclear_gridfilled.tiff:md5,123763d54f05b2274690b3b84f9690b1", + "mem_only_spots_markedDups.txt:md5,4562caad05850d7dd7b6e9235e068a8b", + "mem_only_cellpose_mask.tif:md5,590591f541b6d3f17810cd44dd71b252", + "mem_only_mesmer_mask.tif:md5,6f8c792ccc4cca97b5696990fb4e3801", + "mem_only_mesmer_filtered.tif:md5,5751b2e39393a9c8221396f5ef592035", + "mem_only_cellpose_filtered.tif:md5,590591f541b6d3f17810cd44dd71b252", + "cellxgene_mem_only_cellpose.csv:md5,f063c1ce5e93e1e73431af36c6bc1e79", + "cellxgene_mem_only_mesmer.csv:md5,198b187d5f151077437aa591f5991b22", + "mem_only_cellpose.adata:md5,b47ea63ffea0947e43511f17b6920cd8", + "mem_only_mesmer.adata:md5,530649647a3466316ea52dde9dede4ab", + "mem_only.cellpose.spot_QC.csv:md5,df312175498f7942bffc33f2c2d8d1c9", + "mem_only.mesmer.spot_QC.csv:md5,3273bd6fecf93a3d240614d1b38831c9" + ], + "timestamp": "2024-01-05T11:19:49.882138591" + }, + "Nuclear channel, mesmer and cellpose, without clahe": { + "content": [ + "nuc_only_nuclear_gridfilled.tiff:md5,123763d54f05b2274690b3b84f9690b1", + "nuc_only_spots_markedDups.txt:md5,4562caad05850d7dd7b6e9235e068a8b", + "nuc_only_cellpose_mask.tif:md5,2784d8c6683ad80c24d8df4121e1128f", + "nuc_only_mesmer_mask.tif:md5,e9719f53651b4bf76a37e9374db7f4e4", + "nuc_only_mesmer_filtered.tif:md5,5a6761c80ea65d41ce00d06d3414c96b", + "nuc_only_cellpose_filtered.tif:md5,2784d8c6683ad80c24d8df4121e1128f", + "cellxgene_nuc_only_cellpose.csv:md5,0610a8713457c28acf1bc4c298bee8e3", + "cellxgene_nuc_only_mesmer.csv:md5,425cfb90a60b564e4bfff09ac5ea94c7", + "nuc_only_cellpose.adata:md5,fd52e62711465d754fd36a433761cb3b", + "nuc_only_mesmer.adata:md5,cd20ab6db5274bb85c960ea3bd8d2619", + "nuc_only.cellpose.spot_QC.csv:md5,e77b5973e0997170d0fde5c5901ad551", + "nuc_only.mesmer.spot_QC.csv:md5,f8f4eb85bb8269341ac072ac78962ed4" + ], + "timestamp": "2024-01-05T11:18:48.214995389" + }, + "Create training subset": { + "content": [ + "nuc_only_nuclear_gridfilled.tiff:md5,123763d54f05b2274690b3b84f9690b1" + ], + "timestamp": "2023-11-28T21:40:40.925034909" + } +} \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 0000000..08dc560 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,6 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ + diff --git a/tests/tags.yml b/tests/tags.yml new file mode 100644 index 0000000..dce2348 --- /dev/null +++ b/tests/tags.yml @@ -0,0 +1,10 @@ +default: + - bin/** + - conf/** + - lib/** + - modules/** + - subworkflows/** + - tests/** + - workflows/** + - nextflow.config + - main.nf diff --git a/tower.yml b/tower.yml index 787aedf..92288fc 100644 --- a/tower.yml +++ b/tower.yml @@ -1,5 +1,7 @@ reports: multiqc_report.html: display: "MultiQC HTML report" - samplesheet.csv: - display: "Auto-created samplesheet with collated metadata and FASTQ paths" + final_QC.all_samples.csv: + display: "QC metrics across all samples and segmentation methods" + crop_overview.txt: + display: "Crop overview if training subset is created" diff --git a/workflows/molkart.nf b/workflows/molkart.nf index b991898..99b8fb8 100644 --- a/workflows/molkart.nf +++ b/workflows/molkart.nf @@ -1,21 +1,19 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' -// Validate input parameters -WorkflowMolkart.initialise(params, log) +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) -// TODO nf-core: Add all file path parameters for the pipeline to the list below -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +WorkflowMolkart.initialise(params, log) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -23,9 +21,9 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) /* @@ -34,10 +32,20 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { CROPTIFF } from '../modules/local/croptiff' +include { CROPHDF5 } from '../modules/local/crophdf5' +include { CREATE_ANNDATA } from '../modules/local/createanndata' +include { CREATE_STACK } from '../modules/local/createstack' +include { CLAHE } from '../modules/local/clahe' +include { MASKFILTER } from '../modules/local/maskfilter' +include { MOLKARTQC } from '../modules/local/molkartqc' +include { MOLKARTQCPNG } from '../modules/local/molkartqcpng' +include { SPOT2CELL } from '../modules/local/spot2cell' +include { TIFFH5CONVERT } from '../modules/local/tiffh5convert' + // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -48,9 +56,14 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' // // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CELLPOSE } from '../modules/nf-core/cellpose/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { DEEPCELL_MESMER } from '../modules/nf-core/deepcell/mesmer/main' +include { ILASTIK_MULTICUT } from '../modules/nf-core/ilastik/multicut/main' +include { ILASTIK_PIXELCLASSIFICATION } from '../modules/nf-core/ilastik/pixelclassification/main' +include { MINDAGAP_DUPLICATEFINDER } from '../modules/nf-core/mindagap/duplicatefinder/main' +include { MINDAGAP_MINDAGAP } from '../modules/nf-core/mindagap/mindagap/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -68,18 +81,240 @@ workflow MOLKART { // // SUBWORKFLOW: Read in samplesheet, validate and stage input files // - INPUT_CHECK ( - ch_input + //ch_from_samplesheet = Channel.fromSamplesheet("input") + + ch_from_samplesheet = Channel.fromSamplesheet("input") + + // stain: "1" denotes membrane, stain: "0" denotes nuclear image + // this is used to preserve the order later + ch_from_samplesheet + .map { + it[3] != [] ? tuple([id:it[0],stain:"1"], it[3]) : null + }.set { membrane_tuple } // if a membrane image is provided, return membrane image channel tuple (meta, path) + + ch_from_samplesheet + .map { it -> tuple([id:it[0],stain:"0"], it[1]) } + .set { image_tuple } // creates nuclear image channel tuple (meta, path) + + ch_from_samplesheet + .map { it -> tuple([id:it[0]], it[2]) } + .set { spot_tuple } // creates spot table channel tuple (meta, path) + + // + // MODULE: Run Mindagap_mindagap + // + mindagap_in = membrane_tuple.mix(image_tuple) // mindagap input contains both membrane and nuclear images + MINDAGAP_MINDAGAP(mindagap_in) + ch_versions = ch_versions.mix(MINDAGAP_MINDAGAP.out.versions) + + // + // MODULE: Apply Contrast-limited adaptive histogram equalization (CLAHE) + // CLAHE is either applied to all images, or none. + // + CLAHE(MINDAGAP_MINDAGAP.out.tiff) + ch_versions = ch_versions.mix(CLAHE.out.versions) + + map_for_stacks = !params.skip_clahe ? CLAHE.out.img_clahe : MINDAGAP_MINDAGAP.out.tiff + + map_for_stacks + .map { + meta, tiff -> [meta.subMap("id"), tiff, meta.stain] // creates a channel containing only the sample id in meta, path to preprocessed image and the stain value ("0" or "1") + }.groupTuple() // combines based on meta + .map{ + meta, paths, stains -> [meta, [paths[0], stains[0]], [paths[1], stains[1]]] // reorganizes to match path and stain + }.map{ + meta, stain1, stain2 -> [meta, [stain1, stain2].sort{ it[1] }] // sort by stain index (0 for nuclear, 1 for other) + }.map{ + meta, list -> [meta, list[0], list[1]] // sorted will have null as first list + }.map{ + it[1][0] != null ? [it[0],it[1][0],it[2][0]] : [it[0],it[2][0]] // if null, only return the valid nuclear path value, otherwise return both nuclear and membrane paths + }.set { grouped_map_stack } + + grouped_map_stack.filter{ // for rows without a present membrane image, set channel to no_stack + it[2] == null + }.set{ no_stack } + + grouped_map_stack.filter{ // for rows where the membrane image is present, make it compliant with STACK inputs + it[2] != null + }.map{ + [it[0],tuple(it[1],it[2])] + }.set{ create_stack_in } + + // + // MODULE: Stack channels if membrane image provided for segmentation + // + CREATE_STACK(create_stack_in) + ch_versions = ch_versions.mix(CREATE_STACK.out.versions) + stack_mix = no_stack.mix(CREATE_STACK.out.stack) + + if ( params.create_training_subset ) { + // Create subsets of the image for training an ilastik model + stack_mix.join( + grouped_map_stack.map{ + it[2] == null ? tuple(it[0], 1) : tuple(it[0], 2) + } // hardcodes that if membrane channel present, num_channels is 2, otherwise 1 + ).set{ training_in } + + CROPHDF5(training_in) + ch_versions = ch_versions.mix(CROPHDF5.out.versions) + // Combine images with crop_summary for making the same training tiff stacks as ilastik + tiff_crop = stack_mix.join(CROPHDF5.out.crop_summary) + CROPTIFF( + tiff_crop.map(it -> tuple(it[0],it[1])), + tiff_crop.map(it -> tuple(it[0],it[2])), + ) + ch_versions = ch_versions.mix(CROPTIFF.out.versions) + MOLKARTQCPNG(CROPTIFF.out.overview.map{ + tuple('matchkey', it[1]) + }.groupTuple().map{ it[1]} ) + ch_versions = ch_versions.mix(MOLKARTQCPNG.out.versions) + } else { + + // + // MODULE: MINDAGAP Duplicatefinder + // + // Filter out potential duplicate spots from the spots table + MINDAGAP_DUPLICATEFINDER(spot_tuple) + ch_versions = ch_versions.mix(MINDAGAP_DUPLICATEFINDER.out.versions) + + qc_spots = MINDAGAP_DUPLICATEFINDER.out.marked_dups_spots + + // + // MODULE: DeepCell Mesmer segmentation + // + segmentation_masks = Channel.empty() + if (params.segmentation_method.split(',').contains('mesmer')) { + DEEPCELL_MESMER( + grouped_map_stack.map{ tuple(it[0], it[1]) }, + grouped_map_stack.map{ + it[2] == null ? [[:],[]] : tuple(it[0], it[2]) // if no membrane channel specified, give empty membrane input; if membrane image exists, provide it to the process + } + ) + ch_versions = ch_versions.mix(DEEPCELL_MESMER.out.versions) + segmentation_masks = segmentation_masks + .mix(DEEPCELL_MESMER.out.mask + .combine(Channel.of('mesmer'))) + } + // + // MODULE: Cellpose segmentation + // + cellpose_custom_model = params.cellpose_custom_model ? stack_mix.combine(Channel.fromPath(params.cellpose_custom_model)) : [] + if (params.segmentation_method.split(',').contains('cellpose')) { + CELLPOSE( + stack_mix, + cellpose_custom_model ? cellpose_custom_model.map{it[2]} : [] + ) + ch_versions = ch_versions.mix(CELLPOSE.out.versions) + segmentation_masks = segmentation_masks + .mix(CELLPOSE.out.mask + .combine(Channel.of('cellpose'))) + } + // + // MODULE: ilastik segmentation + // + if (params.segmentation_method.split(',').contains('ilastik')) { + if (params.ilastik_pixel_project == null) { + error "ILASTIK_PIXELCLASSIFICATION module was not provided with the project .ilp file." + } + stack_mix.join( + grouped_map_stack.map{ + it[2] == null ? tuple(it[0], 1) : tuple(it[0], 2) + }).set{ tiffin } + + TIFFH5CONVERT(tiffin) + ch_versions = ch_versions.mix(TIFFH5CONVERT.out.versions) + + TIFFH5CONVERT.out.hdf5.combine( + Channel.fromPath(params.ilastik_pixel_project) + ).set{ ilastik_in } + ILASTIK_PIXELCLASSIFICATION( + ilastik_in.map{ [it[0], it[1]] }, + ilastik_in.map{ [it[0], it[2]] } + ) + ch_versions = ch_versions.mix(ILASTIK_PIXELCLASSIFICATION.out.versions) + + if (params.ilastik_multicut_project == null) { + error "ILASTIK_MULTICUT module was not provided with the project .ilp file." + } + ilastik_in.join(ILASTIK_PIXELCLASSIFICATION.out.output) + .combine(Channel.fromPath(params.ilastik_multicut_project)) + .set{ multicut_in } + + ILASTIK_MULTICUT( + multicut_in.map{ tuple(it[0], it[1]) }, + multicut_in.map{ tuple(it[0], it[4]) }, + multicut_in.map{ tuple(it[0], it[3]) } + ) + ch_versions = ch_versions.mix(ILASTIK_MULTICUT.out.versions) + segmentation_masks = segmentation_masks + .mix(ILASTIK_MULTICUT.out.out_tiff + .combine(Channel.of('ilastik'))) + } + segmentation_masks.map{ + meta, mask, segmentation -> + new_meta = meta.clone() + new_meta.segmentation = segmentation + [new_meta, mask] + }.set { matched_segmasks } + + // + // MODULE: filter segmentation masks + // + MASKFILTER(matched_segmasks) + ch_versions = ch_versions.mix(MASKFILTER.out.versions) + MASKFILTER.out.filtered_mask.map{ + meta, mask -> + tuple(meta.subMap("id"), mask, meta.segmentation) + }.set { filtered_masks } + + // + // MODULE: assign spots to segmentation mask + // + qc_spots + .combine(filtered_masks, by: 0) + .map { + meta, spots_table, mask, segmethod -> + new_meta = meta.clone() + new_meta.segmentation = segmethod + [new_meta, spots_table, mask] + } + .set { dedup_spots } + + SPOT2CELL( + dedup_spots.map(it -> tuple(it[0],it[1])), + dedup_spots.map(it -> tuple(it[0],it[2])) ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + ch_versions = ch_versions.mix(SPOT2CELL.out.versions) // - // MODULE: Run FastQC + // MODULE: create anndata squidpy object from spot2cell table // - FASTQC ( - INPUT_CHECK.out.reads + CREATE_ANNDATA( + SPOT2CELL.out.cellxgene_table ) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + ch_versions = ch_versions.mix(CREATE_ANNDATA.out.versions) + + // + // MODULE: MOLKARTQC + // + SPOT2CELL.out.cellxgene_table.combine( + MASKFILTER.out.filtered_qc, by: 0 + ).map{ + meta, quant, filterqc -> + [meta.subMap("id"), quant, meta.segmentation, filterqc] + }.set { spot2cell_out } + + qc_spots + .combine(spot2cell_out, by: 0) + .set{ molkartqc } + + MOLKARTQC(molkartqc) + ch_versions = ch_versions.mix(MOLKARTQC.out.versions) + + } + // + // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS + // CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') @@ -88,17 +323,29 @@ workflow MOLKART { // // MODULE: MultiQC // - workflow_summary = WorkflowMolkart.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowMolkart.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + workflow_summary = WorkflowMolkart.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) + methods_description = WorkflowMolkart.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) ch_methods_description = Channel.value(methods_description) ch_multiqc_files = Channel.empty() + + if ( params.create_training_subset ){ + ch_multiqc_files = ch_multiqc_files.mix( + MOLKARTQCPNG.out.png_overview + .collectFile(name: "crop_overview.png", storeDir: "${params.outdir}/multiqc" )) + ch_multiqc_files = ch_multiqc_files.mix( + CROPHDF5.out.crop_summary.map{it[1]} + .collectFile(name: 'crop_overview.txt', storeDir: "${params.outdir}/multiqc") + ) + } else { + ch_multiqc_files = ch_multiqc_files.mix( + MOLKARTQC.out.qc.map{it[1]} + .collectFile(name: 'final_QC.all_samples.csv', keepHeader: true, storeDir: "${params.outdir}/multiqc")) + } ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) MULTIQC ( ch_multiqc_files.collect(), @@ -119,6 +366,7 @@ workflow.onComplete { if (params.email || params.email_on_fail) { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } + NfcoreTemplate.dump_parameters(workflow, params) NfcoreTemplate.summary(workflow, params, log) if (params.hook_url) { NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log)
    Process Name \\", + " \\ Software Version
    CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
    yaml5.4.1
    TOOL1tool10.11.9
    TOOL2tool21.9
    WorkflowNextflow