From 5f259661d41a210b6ffe91c35073f6519478480f Mon Sep 17 00:00:00 2001 From: WackerO Date: Wed, 27 Mar 2024 09:59:10 +0100 Subject: [PATCH 1/7] Trying to fix the pipeline pull issue --- nextflow.config | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index 64afbb72..801b8eda 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,10 +22,10 @@ params { sizefactors_from_controls = false // Reporting - logo_file = "docs/images/nf-core-differentialabundance_logo_light.png" - css_file = "assets/nf-core_style.css" - citations_file = "CITATIONS.md" - report_file = "assets/differentialabundance_report.Rmd" + logo_file = "$projectDir/docs/images/nf-core-differentialabundance_logo_light.png" + css_file = "$projectDir/assets/nf-core_style.css" + citations_file = "$projectDir/CITATIONS.md" + report_file = "$projectDir/assets/differentialabundance_report.Rmd" report_title = null report_author = null report_contributors = null From 23a827016f2ac7ec71db78fab10f8b76c1aab7ce Mon Sep 17 00:00:00 2001 From: WackerO Date: Tue, 23 Apr 2024 10:26:46 +0200 Subject: [PATCH 2/7] changed filter_difftable to python --- modules/local/filter_difftable.nf | 60 ++++++++++++++----------------- 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/modules/local/filter_difftable.nf b/modules/local/filter_difftable.nf index c8d6c5f9..52b19978 100644 --- a/modules/local/filter_difftable.nf +++ b/modules/local/filter_difftable.nf @@ -2,14 +2,14 @@ process FILTER_DIFFTABLE { label 'process_single' - conda "conda-forge::gawk=5.1.0" + conda "pandas=1.5.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : - 'biocontainers/gawk:5.1.0' }" + 'https://depot.galaxyproject.org/singularity/pandas:1.5.2' : + 'biocontainers/pandas:1.5.2' }" input: tuple val(meta), path(input_file) - tuple val(logFC_column), val(logFC_threshold) + tuple val(logFC_column), val(FC_threshold) tuple val(padj_column), val(padj_threshold) output: @@ -20,42 +20,34 @@ process FILTER_DIFFTABLE { task.ext.when == null || task.ext.when script: - def VERSION = '9.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ - output_file=\$(echo $input_file | sed 's/\\(.*\\)\\..*/\\1/')_filtered.tsv + #!/usr/bin/env python - # Function to find column number - find_column_number() { - awk -v column="\$2" '{for(i=1;i<=NF;i++) if (\$i == column) {print i; exit}}' <<< "\$(head -n 1 "\$1")" - } + from math import log2 + from os import path + import pandas as pd + import platform + from sys import exit - # Extract column numbers - logFC_col=\$(find_column_number "$input_file" "log2FoldChange") - padj_col=\$(find_column_number "$input_file" "padj") - - # Prepare the output file - head -n 1 "$input_file" > "\${output_file}.tmp" - - # The following snippet performs the following checks on each row (add +0.0 to the numbers so that they are definitely treated as numerics): - # # 1. Check that the current logFC/padj is not NA # 2. Check that the current logFC is >= threshold (abs does not work, so use a workaround) # 3. Check that the current padj is <= threshold - # # If this is true, the row is written to the new file, otherwise not - - awk -F'\\t' -v logFC_col="\$logFC_col" -v padj_col="\$padj_col" -v logFC_thresh="$logFC_threshold" -v padj_thresh="$padj_threshold" ' - NR > 1 && \$logFC_col != "NA" && \$padj_col != "NA" && - ((\$logFC_col+0.0 >= logFC_thresh+0.0) || (-\$logFC_col+0.0 >= logFC_thresh+0.0)) && - \$padj_col+0.0 <= padj_thresh+0.0 { print } - ' "$input_file" >> "\${output_file}.tmp" - - # Rename temporary file to final output file - mv "\${output_file}.tmp" "\$output_file" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bash: \$(echo \$(bash --version | grep -Eo 'version [[:alnum:].]+' | sed 's/version //')) - END_VERSIONS + if not any("$input_file".endswith(ext) for ext in [".csv", ".tsv", ".txt"]): + exit("Please provide a .csv, .tsv or .txt file!") + + table = pd.read_csv("$input_file", sep=("," if "$input_file".endswith(".csv") else "\t"), header=0) + logFC_threshold = log2(float("$FC_threshold")) + table = table[~table["$logFC_column"].isna() & + ~table["$padj_column"].isna() & + (pd.to_numeric(table["$logFC_column"], errors='coerce').abs() >= float(logFC_threshold)) & + (pd.to_numeric(table["$padj_column"], errors='coerce') <= float("$padj_threshold"))] + + table.to_csv(path.splitext(path.basename("$input_file"))[0]+"_filtered.tsv", sep="\t", index=False) + + with open('versions.yml', 'a') as version_file: + version_file.write('"${task.process}":' + "\\n") + version_file.write(" python: " + (platform.python_version()) + "\\n") + version_file.write(" pandas: " + str(pd.__version__) + "\\n") """ } From a9a7847bb0de7985edf3cb51cc6aa501a3789552 Mon Sep 17 00:00:00 2001 From: WackerO Date: Tue, 23 Apr 2024 10:28:34 +0200 Subject: [PATCH 3/7] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96e54174..51843fa7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [[#260](https://github.com/nf-core/differentialabundance/pull/260)] - Change FILTER_DIFFTABLE to python because AWK does not correctly filter reliablys ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) - [[#232](https://github.com/nf-core/differentialabundance/pull/232)] - Mention missing dots in volcano plot, change rounding, turn off rounding by default ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) ## v1.4.0 - 2023-11-27 From 456cb35b911c26a1514a3d250afcedd91c81d5fe Mon Sep 17 00:00:00 2001 From: WackerO Date: Tue, 23 Apr 2024 10:36:07 +0200 Subject: [PATCH 4/7] fixed PR number in changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51843fa7..668ad774 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,7 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` -- [[#260](https://github.com/nf-core/differentialabundance/pull/260)] - Change FILTER_DIFFTABLE to python because AWK does not correctly filter reliablys ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#264](https://github.com/nf-core/differentialabundance/pull/264)] - Change FILTER_DIFFTABLE to python because AWK does not correctly filter reliablys ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) - [[#232](https://github.com/nf-core/differentialabundance/pull/232)] - Mention missing dots in volcano plot, change rounding, turn off rounding by default ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) ## v1.4.0 - 2023-11-27 From b379fef75d01a0af30e3ee58d927da575e360347 Mon Sep 17 00:00:00 2001 From: WackerO Date: Tue, 23 Apr 2024 10:41:04 +0200 Subject: [PATCH 5/7] change nf-core.yml to fix linting --- .nf-core.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc81..d2cda970 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,8 @@ repository_type: pipeline +lint: + nextflow_config: + - config_defaults: + - params.logo_file + - params.css_file + - params.citations_file + - params.report_file From be656d939c4d7b1ed54f13de9c4099fd47cfd35c Mon Sep 17 00:00:00 2001 From: WackerO Date: Wed, 24 Apr 2024 09:13:41 +0200 Subject: [PATCH 6/7] removed writing python version --- modules/local/filter_difftable.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/local/filter_difftable.nf b/modules/local/filter_difftable.nf index 52b19978..da9612e3 100644 --- a/modules/local/filter_difftable.nf +++ b/modules/local/filter_difftable.nf @@ -47,7 +47,6 @@ process FILTER_DIFFTABLE { with open('versions.yml', 'a') as version_file: version_file.write('"${task.process}":' + "\\n") - version_file.write(" python: " + (platform.python_version()) + "\\n") version_file.write(" pandas: " + str(pd.__version__) + "\\n") """ } From d6f50cb15025b3a643eda97c87b07136ad7cfac2 Mon Sep 17 00:00:00 2001 From: WackerO Date: Wed, 24 Apr 2024 11:46:41 +0200 Subject: [PATCH 7/7] fix typo --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 668ad774..c7629657 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,7 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` -- [[#264](https://github.com/nf-core/differentialabundance/pull/264)] - Change FILTER_DIFFTABLE to python because AWK does not correctly filter reliablys ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#264](https://github.com/nf-core/differentialabundance/pull/264)] - Change FILTER_DIFFTABLE to python because AWK does not correctly filter reliably ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) - [[#232](https://github.com/nf-core/differentialabundance/pull/232)] - Mention missing dots in volcano plot, change rounding, turn off rounding by default ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) ## v1.4.0 - 2023-11-27