Merge branch 'docs'

theislab · Jan 28, 2024 · ca5deaf · ca5deaf
2 parents c4736af + 1ed105e
commit ca5deaf
Show file tree

Hide file tree

Showing 54 changed files with 1,945 additions and 1,735 deletions.
diff --git a/.github/workflows/test_action.yml b/.github/workflows/test_action.yml
@@ -15,6 +15,11 @@ jobs:
           channels: conda-forge, bioconda
       - name: Install Nextflow
         uses: nf-core/setup-nextflow@v1
+      - name: Set up Python to install gdown
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.9"
+          cache: "pip"
       - name: Download test dataset
         run: bash ${GITHUB_WORKSPACE}/test_data/download_data.sh
       - name: Run pipeline with test data

diff --git a/.gitignore b/.gitignore
@@ -7,4 +7,3 @@ testing/
 testing*
 *.pyc
 docs/build/
-nextflow_internal.config
diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@
 [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
 [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
 
-![Caption](docs/source/_static/images/pipeline_v2.png)
+![Caption](docs/source/_static/images/pipeline.png)
 
 Read the [documentation](https://hadge.readthedocs.io/en/latest/) and the associated [manuscript](https://www.biorxiv.org/content/10.1101/2023.07.23.550061v1)
 

diff --git a/bin/HTODemux-visualisation.R b/bin/HTODemux-visualisation.R
@@ -1,40 +1,40 @@
 #!/usr/bin/env Rscript
-#Libraries
+# Libraries
 library(Seurat)
 library(argparse)
 library(ggplot2)
 
 # Create a parser
 parser <- ArgumentParser("Parameters for HTODemux Visualisation")
-parser$add_argument("--hashtagPath",help="folder where rds object was saved from the first part of HTODemux")
-parser$add_argument("--assay",help="Name of the Hashtag assay HTO by default", default = "HTO")
-#Output graphs - Ridge Plot
+parser$add_argument("--hashtagPath", help = "folder where rds object was saved from the first part of HTODemux")
+parser$add_argument("--assay", help = "Name of the Hashtag assay HTO by default", default = "HTO")
+# Output graphs - Ridge Plot
 parser$add_argument("--ridgePlot", help = "Generates a ridge plot from the results, True to generate", default = "True")
 parser$add_argument("--ridgeNCol", help = "Number of columns for ridgePlot", default = 3, type = "integer")
 
-#Output graphs - Scatter Feature
-parser$add_argument("--featureScatter",help = "Generates a ridge plot from the results, True to generate", default = "True")
+# Output graphs - Scatter Feature
+parser$add_argument("--featureScatter", help = "Generates a ridge plot from the results, True to generate", default = "True")
 parser$add_argument("--scatterFeat1", help = "Feature 1 for Feature Scatter Plot", default = "hto_HTO-A")
 parser$add_argument("--scatterFeat2", help = "Feature 2 for Feature Scatter Plot", default = "hto_HTO-B")
 
-#Output graphs - Violin Plot
+# Output graphs - Violin Plot
 parser$add_argument("--vlnPlot", help = "Generates a violin plot from the results, True to generate", default = "True")
 parser$add_argument("--vlnFeatures", help = "Features to plot (gene expression, metrics, PC scores, anything that can be retreived by FetchData)", default = "nCount_RNA")
 parser$add_argument("--vlnLog", help = "plot the feature axis on log scale", action = "store_true")
 
-#Output graphs - tSNE
+# Output graphs - tSNE
 parser$add_argument("--tSNE", help = "Generate a two dimensional tSNE embedding for HTOs", default = "True")
 parser$add_argument("--tSNEIdents", help = "What should we remove from the object (we have Singlet,Doublet and Negative)", default = "Negative")
 parser$add_argument("--tSNEInvert", action = "store_true")
 parser$add_argument("--tSNEVerbose", action = "store_true")
 parser$add_argument("--tSNEApprox", action = "store_true")
-parser$add_argument("--tSNEDimMax", help = "max number of donors ",type = "integer", default = 1)
-parser$add_argument("--tSNEPerplexity", help = "value for perplexity", type = "integer",  default = 100)
+parser$add_argument("--tSNEDimMax", help = "max number of donors ", type = "integer", default = 1)
+parser$add_argument("--tSNEPerplexity", help = "value for perplexity", type = "integer", default = 100)
 
-#Output graphs - Heatmap
+# Output graphs - Heatmap
 parser$add_argument("--heatMap", help = "Generate a Heatmap", default = "False")
-parser$add_argument("--heatMapNcells", help ="value for number of cells", type = "integer",  default = 500)
-parser$add_argument("--outputdir", help='Output directory')
+parser$add_argument("--heatMapNcells", help = "value for number of cells", type = "integer", default = 500)
+parser$add_argument("--outputdir", help = "Output directory")
 
 args <- parser$parse_args()
 
@@ -45,53 +45,50 @@ params <- data.frame(Argument, Value)
 
 
 print(paste0("Loading RDS object in ", args$hashtagPath))
-if (!endsWith(args$hashtagPath, ".rds")){
-    hash_file <- list.files(args$hashtagPath, pattern = "\\.rds$", full.names = TRUE)[1]
-}else{
-    hash_file <- args$hashtagPath
+if (!endsWith(args$hashtagPath, ".rds")) {
+  hash_file <- list.files(args$hashtagPath, pattern = "\\.rds$", full.names = TRUE)[1]
+} else {
+  hash_file <- args$hashtagPath
 }
 print(hash_file)
-hashtag <-readRDS(hash_file)
+hashtag <- readRDS(hash_file)
 
 
 # Ridge Plot
 # Group cells based on the max HTO signal
 if (args$ridgePlot == "True") {
   Idents(hashtag) <- paste0(args$assay, "_maxID")
   RidgePlot(hashtag, assay = args$assay, features = rownames(hashtag[[args$assay]]), ncol = args$ridgeNCol)
-  ggsave(paste0(args$outputdir, '/ridge.jpeg'), device = 'jpeg', dpi = 500) # height = 10, width = 10
+  ggsave(paste0(args$outputdir, "/ridge.jpeg"), device = "jpeg", dpi = 500) # height = 10, width = 10
 }
 
 if (args$featureScatter == "True") {
   FeatureScatter(hashtag, feature1 = args$scatterFeat1, feature2 = args$scatterFeat2)
-  ggsave(paste0(args$outputdir, '/featureScatter.jpeg'), device = 'jpeg',dpi = 500)
+  ggsave(paste0(args$outputdir, "/featureScatter.jpeg"), device = "jpeg", dpi = 500)
 }
 
 if (args$vlnPlot == "True") {
   Idents(hashtag) <- paste0(args$assay, "_classification.global")
   VlnPlot(hashtag, features = args$vlnFeatures, pt.size = 0.1, log = args$vlnLog)
-  ggsave(paste0(args$outputdir, '/violinPlot.jpeg'), device = 'jpeg', dpi = 500)
+  ggsave(paste0(args$outputdir, "/violinPlot.jpeg"), device = "jpeg", dpi = 500)
 }
 
 if (args$tSNE == "True") {
   hashtag.subset <- subset(hashtag, idents = args$tSNEIdents, invert = args$tSNEInvert)
   DefaultAssay(hashtag.subset) <- args$assay
-  hashtag.subset <- ScaleData(hashtag.subset, features = rownames(hashtag.subset),
-                                   verbose = args$tSNEVerbose)
+  hashtag.subset <- ScaleData(hashtag.subset,
+    features = rownames(hashtag.subset),
+    verbose = args$tSNEVerbose
+  )
   hashtag.subset <- RunPCA(hashtag.subset, features = rownames(hashtag.subset), approx = args$tSNEApprox)
   hashtag.subset <- RunTSNE(hashtag.subset, dims = 1:args$tSNEDimMax, perplexity = args$tSNEPerplexity, check_duplicates = FALSE)
   DimPlot(hashtag.subset)
-  ggsave(paste0(args$outputdir, '/tSNE.jpeg'), device = 'jpeg', dpi = 500)
+  ggsave(paste0(args$outputdir, "/tSNE.jpeg"), device = "jpeg", dpi = 500)
 }
 
 if (args$heatMap == "True") {
   HTOHeatmap(hashtag, assay = args$assay, ncells = args$heatMapNcells)
-  ggsave(paste0(args$outputdir, '/heatMap.jpeg'), device = 'jpeg', dpi = 500)
+  ggsave(paste0(args$outputdir, "/heatMap.jpeg"), device = "jpeg", dpi = 500)
 }
 
 write.csv(params, paste0(args$outputdir, "/visual_params.csv"))
-
-
-
-
-
diff --git a/bin/HTODemux.R b/bin/HTODemux.R
@@ -1,5 +1,5 @@
 #!/usr/bin/env Rscript
-#Libraries
+# Libraries
 library(Seurat)
 library(argparse)
 library(ggplot2)
@@ -15,54 +15,52 @@ parser$add_argument("--seed", help = "Sets the random seed.", type = "integer",
 parser$add_argument("--init", help = "Initial number of clusters for hashtags.", default = NULL, type = "integer")
 parser$add_argument("--assay", help = "Assay name", default = "HTO")
 parser$add_argument("--objectOutHTOdemux", help = "Prefix name for the object containing the output of HTODemux object", type = "character", default = "htodemux")
-parser$add_argument("--assignmentOutHTOdemux", help="Prefeix name for the file containing the output of HTODemux assignment", type = "character", default = "htodemux")
-parser$add_argument("--outputdir", help='Output directory')
+parser$add_argument("--assignmentOutHTOdemux", help = "Prefeix name for the file containing the output of HTODemux assignment", type = "character", default = "htodemux")
+parser$add_argument("--outputdir", help = "Output directory")
 
 
 args <- parser$parse_args()
-if (!endsWith(args$seuratObject, ".rds")){
-    seuratObj <- list.files(args$seuratObject, pattern = "\\.rds$", full.names = TRUE)[1]
-}else{
-    seuratObj <- args$seuratObject
+if (!endsWith(args$seuratObject, ".rds")) {
+  seuratObj <- list.files(args$seuratObject, pattern = "\\.rds$", full.names = TRUE)[1]
+} else {
+  seuratObj <- args$seuratObject
 }
 
 init <- args$init
-if(is.null(init)){
-    init <- "NULL"
+if (is.null(init)) {
+  init <- "NULL"
 }
 Argument <- c("seuratObject", "quantile", "kfunc", "nstarts", "nsamples", "seed", "init", "assay")
 Value <- c(seuratObj, args$quantile, args$kfunc, args$nstarts, args$nsamples, args$seed, init, args$assay)
 
 params <- data.frame(Argument, Value)
 # Loading Seurat object
-hashtag <-readRDS(seuratObj)
+hashtag <- readRDS(seuratObj)
 
 # Demultiplex cells based on HTO enrichment
-if(args$kfunc == "clara"){
-    hashtag <- HTODemux(hashtag, assay = args$assay, positive.quantile = args$quantile, init = args$init, nsamples = args$nsamples, kfunc = "clara", seed = args$seed)
-}else{
-    hashtag <- HTODemux(hashtag, assay = args$assay, positive.quantile = args$quantile, init = args$init, nstarts = args$nstarts, kfunc = "kmeans", seed = args$seed)
+if (args$kfunc == "clara") {
+  hashtag <- HTODemux(hashtag, assay = args$assay, positive.quantile = args$quantile, init = args$init, nsamples = args$nsamples, kfunc = "clara", seed = args$seed)
+} else {
+  hashtag <- HTODemux(hashtag, assay = args$assay, positive.quantile = args$quantile, init = args$init, nstarts = args$nstarts, kfunc = "kmeans", seed = args$seed)
 }
 
 
 # Global classification results
-#table(hashtag[[paste0(args$assay,"_classification.global")]])
+# table(hashtag[[paste0(args$assay,"_classification.global")]])
 
-#table(hashtag[[paste0(args$assay,"_classification")]])
+# table(hashtag[[paste0(args$assay,"_classification")]])
 
 
 # Saving results
 
 donors <- rownames(hashtag[[args$assay]])
-assignment <- hashtag[[paste0(args$assay,"_classification")]]
-assignment[[paste0(args$assay,"_classification")]][!assignment[[paste0(args$assay,"_classification")]] %in% c(donors, 'Negative')] <- "Doublet"
+assignment <- hashtag[[paste0(args$assay, "_classification")]]
+assignment[[paste0(args$assay, "_classification")]][!assignment[[paste0(args$assay, "_classification")]] %in% c(donors, "Negative")] <- "Doublet"
 
 
 
 write.csv(params, paste0(args$outputdir, "/params.csv"))
 write.csv(assignment, paste0(args$outputdir, "/", args$assignmentOutHTOdemux, "_assignment_htodemux.csv"))
-#write.csv(hashtag[[paste0(args$assay,"_classification")]], paste0(args$outputdir, "/", args$assignmentOutHTOdemux, "_assignment_htodemux.csv"))
-write.csv(hashtag[[paste0(args$assay,"_classification.global")]], paste0(args$outputdir, "/", args$assignmentOutHTOdemux, "_classification_htodemux.csv"))
-saveRDS(hashtag, file=paste0(args$outputdir, "/", args$objectOutHTOdemux,".rds"))
-
-
+# write.csv(hashtag[[paste0(args$assay,"_classification")]], paste0(args$outputdir, "/", args$assignmentOutHTOdemux, "_assignment_htodemux.csv"))
+write.csv(hashtag[[paste0(args$assay, "_classification.global")]], paste0(args$outputdir, "/", args$assignmentOutHTOdemux, "_classification_htodemux.csv"))
+saveRDS(hashtag, file = paste0(args$outputdir, "/", args$objectOutHTOdemux, ".rds"))
diff --git a/bin/MultiSeq.R b/bin/MultiSeq.R
@@ -1,5 +1,5 @@
 #!/usr/bin/env Rscript
-#Libraries
+# Libraries
 library(Seurat)
 library(argparse)
 library(ggplot2)
@@ -9,34 +9,33 @@ parser <- ArgumentParser("Parameters for MultiSeqDemux")
 parser$add_argument("--seuratObjectPath", help = "Seurat object or the folder containing the seurat object")
 parser$add_argument("--quantile", help = "The quantile to use for classification", type = "double", default = 0.7)
 parser$add_argument("--autoThresh", help = "Whether to perform automated threshold finding to define the best quantile", action = "store_true")
-parser$add_argument("--maxiter", help = "Maximum number of iterations if autoThresh = TRUE ", default = 5, type="integer")
+parser$add_argument("--maxiter", help = "Maximum number of iterations if autoThresh = TRUE ", default = 5, type = "integer")
 parser$add_argument("--qrangeFrom", help = "A range of possible quantile values to try if autoThresh is TRUE", type = "double", default = 0.1)
 parser$add_argument("--qrangeTo", help = "A range of possible quantile values to try if autoThresh is TRUE", type = "double", default = 0.9)
 parser$add_argument("--qrangeBy", help = "A range of possible quantile values to try if autoThresh is TRUE", type = "double", default = 0.05)
 parser$add_argument("--verbose", help = "Prints the output", action = "store_true")
-parser$add_argument("--assay", help = "Name of the multiplexing assay", default="HTO")
-parser$add_argument("--assignmentOutMulti",help = "Prefix name for the file containing the output of MULTI-Seq Demux object", default = "mutliseq")
+parser$add_argument("--assay", help = "Name of the multiplexing assay", default = "HTO")
+parser$add_argument("--assignmentOutMulti", help = "Prefix name for the file containing the output of MULTI-Seq Demux object", default = "mutliseq")
 parser$add_argument("--objectOutMulti", help = "Prefix name for the object containing the output of MULTI-Seq Demux object", default = "multiseq")
-parser$add_argument("--outputdir", help='Output directory')
+parser$add_argument("--outputdir", help = "Output directory")
 
 args <- parser$parse_args()
-if (!endsWith(args$seuratObjectPath, ".rds")){
-    seuratObj <- list.files(args$seuratObjectPath, pattern = "\\.rds$", full.names = TRUE)[1]
-}else{
-    seuratObj <- args$seuratObjectPath
+if (!endsWith(args$seuratObjectPath, ".rds")) {
+  seuratObj <- list.files(args$seuratObjectPath, pattern = "\\.rds$", full.names = TRUE)[1]
+} else {
+  seuratObj <- args$seuratObjectPath
 }
 
 Argument <- c("seuratObjectPath", "quantile", "autoThresh", "maxiter", "qrangeFrom", "qrangeTo", "qrangeBy", "verbose", "assay")
 Value <- c(seuratObj, args$quantile, args$autoThresh, args$maxiter, args$qrangeFrom, args$qrangeTo, args$qrangeBy, args$verbose, args$assay)
 
 params <- data.frame(Argument, Value)
 
-hashtag <-readRDS(seuratObj)
+hashtag <- readRDS(seuratObj)
 if (args$autoThresh == TRUE) {
-    hashtag <- MULTIseqDemux(hashtag, assay = args$assay,  quantile = args$quantile, autoThresh = TRUE, maxiter = args$maxiter, qrange=seq(from = args$qrangeFrom, to = args$qrangeTo, by = args$qrangeBy), verbose = args$verbose)
-}else{
-    hashtag <- MULTIseqDemux(hashtag, assay = args$assay, quantile = args$quantile, verbose = args$verbose)
-
+  hashtag <- MULTIseqDemux(hashtag, assay = args$assay, quantile = args$quantile, autoThresh = TRUE, maxiter = args$maxiter, qrange = seq(from = args$qrangeFrom, to = args$qrangeTo, by = args$qrangeBy), verbose = args$verbose)
+} else {
+  hashtag <- MULTIseqDemux(hashtag, assay = args$assay, quantile = args$quantile, verbose = args$verbose)
 }
 
 table(hashtag$MULTI_ID)