add parameter for propd_weighted_degree

nf-core · Oct 17, 2024 · 9819e84 · 9819e84
1 parent d7ab1ca
commit 9819e84
Show file tree

Hide file tree

Showing 4 changed files with 28 additions and 8 deletions.
diff --git a/conf/modules.config b/conf/modules.config
@@ -530,7 +530,8 @@ process {
             "--moderated ${params.propd_moderated}",
             "--fdr ${params.propd_fdr}",
             "--permutation ${params.propd_permutation}",
-            "--number_of_cutoffs ${params.propd_ncutoffs}"
+            "--number_of_cutoffs ${params.propd_ncutoffs}",
+            "--weighted_degree ${params.propd_weighted_degree}"
         ].join(' ').trim() }
         publishDir = [
             path: {

diff --git a/modules/local/propr/propd/templates/propd.R b/modules/local/propr/propd/templates/propd.R
@@ -90,6 +90,7 @@ get_connectivity <- function(pd, adj, de, cutoff, features_id_col='gene_id'){
     # add weighted degree
     # each connection is weighted by the theta value
     # so lower theta values (higher between group variance than within group variance) will have a higher weight
+    # NOTE this is a placeholder for the proper weighted degree, maybe we are gonna change the way how we compute it
     mat <- getMatrix(pd)
     diag(mat) <- NA
     connectivity[,3] <- colSums((1 - mat) * adj, na.rm=TRUE)
@@ -100,7 +101,7 @@ get_connectivity <- function(pd, adj, de, cutoff, features_id_col='gene_id'){
     connectivity[,4] <- de
 
     # add average theta of the connections
-    connectivity[,5] <- connectivity[,3] / connectivity[,2]
+    connectivity[,5] <- colSums(mat * adj, na.rm=TRUE) / colSums(adj)
 
     # classification
     # green for DE genes, and red for non-DE genes
@@ -211,7 +212,7 @@ opt <- list(
     number_of_cutoffs = 100,                  # number of cutoffs for permutation test
 
     # parameters for getting the hub genes
-    weighted_degree   = FALSE,                # use weighted degree for hub genes or not
+    weighted_degree   = FALSE,                 # use weighted degree for hub genes or not
 
     # other parameters
     seed              = NA,                   # seed for reproducibility
@@ -232,6 +233,7 @@ opt_types <- list(
     fdr               = 'numeric',
     permutation       = 'numeric',
     number_of_cutoffs = 'numeric',
+    weighted_degree   = 'logical',
     seed              = 'numeric',
     ncores            = 'numeric'
 )
@@ -276,7 +278,7 @@ for (file_input in c('count','samplesheet')){
 
 # check parameters are valid
 
-if (opt$permutation < 0) {
+if (opt\$permutation < 0) {
     stop('permutation should be a positive integer')
 }
 
@@ -383,6 +385,8 @@ if (opt\$permutation == 0) {
     )
     if (theta_cutoff) {
 
+        warning('Significant theta value found: ', theta_cutoff)
+
         # get adjacency matrix
         # this matrix will have 1s for significant pairs and 0s for the rest
         # diagonals are set to 0
@@ -413,8 +417,12 @@ if (opt\$permutation == 0) {
         )
         results <- results[,c("Partner", "Pair", "theta")]
         results\$class <- "red"
-        results\$class[which(results\$Pair %in% hub_genes[opt\$features_id_col] | results\$Partner %in% hub_genes[opt\$features_id_col])] <- "yellow"
-        results\$class[which(results\$Pair %in% hub_genes[opt\$features_id_col] & results\$Partner %in% hub_genes[opt\$features_id_col])] <- "green"
+        results\$class[which(de[results\$Pair] < theta_cutoff | de[results\$Partner] < theta_cutoff)] <- "yellow"
+        results\$class[which(de[results\$Pair] < theta_cutoff & de[results\$Partner] < theta_cutoff)] <- "green"
+
+        # sort significant pairs
+
+        results <- results[order(results\$theta),]
     }
 
 } else {
@@ -438,6 +446,8 @@ if (opt\$permutation == 0) {
     )
     if (theta_cutoff) {
 
+        warning('Significant theta value found: ', theta_cutoff)
+
         # get adjacency matrix
 
         adj <- getAdjacencyFDR(
@@ -466,9 +476,12 @@ if (opt\$permutation == 0) {
         )
         results <- results[,c("Partner", "Pair", "theta")]
         results\$class <- "red"
-        results\$class[which(results\$Pair %in% hub_genes\$gene | results\$Partner %in% hub_genes\$gene)] <- "yellow"
-        results\$class[which(results\$Pair %in% hub_genes\$gene & results\$Partner %in% hub_genes\$gene)] <- "green"
+        results\$class[which(de[results\$Pair] < theta_cutoff | de[results\$Partner] < theta_cutoff)] <- "yellow"
+        results\$class[which(de[results\$Pair] < theta_cutoff & de[results\$Partner] < theta_cutoff)] <- "green"
+
+        # sort significant pairs
 
+        results <- results[order(results\$theta),]
     }
 }
 

diff --git a/nextflow.config b/nextflow.config
@@ -232,6 +232,7 @@ params {
     propd_fdr          = 0.05
     propd_permutation  = 0
     propd_ncutoffs     = 100
+    propd_weighted_degree = false
 
     // propr options
     propr_metric       = 'rho'

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -788,6 +788,11 @@
                     "type": "integer",
                     "default": 100,
                     "description": "Because it is expensive to calculate an associated p-value for all the pairs through all the permutation tests, this number is used to define on how many values to calculate the FDR. The higher this value is, the higher the granularity."
+                },
+                "propd_weighted_degree": {
+                    "type": "boolean",
+                    "default": false,
+                    "description": "If true, use weighted degree to filter the hub genes. Otherwie, use the degree."
                 }
             }
         },