Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve docs #35

Merged
merged 9 commits into from
Jan 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/test_action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ jobs:
channels: conda-forge, bioconda
- name: Install Nextflow
uses: nf-core/setup-nextflow@v1
- name: Set up Python to install gdown
uses: actions/setup-python@v5
with:
python-version: "3.9"
cache: "pip"
- name: Download test dataset
run: bash ${GITHUB_WORKSPACE}/test_data/download_data.sh
- name: Run pipeline with test data
Expand Down
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ result*/
testing/
testing*
*.pyc
docs/build/
docs/build/
test_data/
res_gx12/
res_gx38/
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)

![Caption](docs/source/_static/images/pipeline_v2.png)
![Caption](docs/source/_static/images/pipeline.png)

Read the [documentation](https://hadge.readthedocs.io/en/latest/) and the associated [manuscript](https://www.biorxiv.org/content/10.1101/2023.07.23.550061v1)

Expand Down
57 changes: 27 additions & 30 deletions bin/HTODemux-visualisation.R
Original file line number Diff line number Diff line change
@@ -1,40 +1,40 @@
#!/usr/bin/env Rscript
#Libraries
# Libraries
library(Seurat)
library(argparse)
library(ggplot2)

# Create a parser
parser <- ArgumentParser("Parameters for HTODemux Visualisation")
parser$add_argument("--hashtagPath",help="folder where rds object was saved from the first part of HTODemux")
parser$add_argument("--assay",help="Name of the Hashtag assay HTO by default", default = "HTO")
#Output graphs - Ridge Plot
parser$add_argument("--hashtagPath", help = "folder where rds object was saved from the first part of HTODemux")
parser$add_argument("--assay", help = "Name of the Hashtag assay HTO by default", default = "HTO")
# Output graphs - Ridge Plot
parser$add_argument("--ridgePlot", help = "Generates a ridge plot from the results, True to generate", default = "True")
parser$add_argument("--ridgeNCol", help = "Number of columns for ridgePlot", default = 3, type = "integer")

#Output graphs - Scatter Feature
parser$add_argument("--featureScatter",help = "Generates a ridge plot from the results, True to generate", default = "True")
# Output graphs - Scatter Feature
parser$add_argument("--featureScatter", help = "Generates a ridge plot from the results, True to generate", default = "True")
parser$add_argument("--scatterFeat1", help = "Feature 1 for Feature Scatter Plot", default = "hto_HTO-A")
parser$add_argument("--scatterFeat2", help = "Feature 2 for Feature Scatter Plot", default = "hto_HTO-B")

#Output graphs - Violin Plot
# Output graphs - Violin Plot
parser$add_argument("--vlnPlot", help = "Generates a violin plot from the results, True to generate", default = "True")
parser$add_argument("--vlnFeatures", help = "Features to plot (gene expression, metrics, PC scores, anything that can be retreived by FetchData)", default = "nCount_RNA")
parser$add_argument("--vlnLog", help = "plot the feature axis on log scale", action = "store_true")

#Output graphs - tSNE
# Output graphs - tSNE
parser$add_argument("--tSNE", help = "Generate a two dimensional tSNE embedding for HTOs", default = "True")
parser$add_argument("--tSNEIdents", help = "What should we remove from the object (we have Singlet,Doublet and Negative)", default = "Negative")
parser$add_argument("--tSNEInvert", action = "store_true")
parser$add_argument("--tSNEVerbose", action = "store_true")
parser$add_argument("--tSNEApprox", action = "store_true")
parser$add_argument("--tSNEDimMax", help = "max number of donors ",type = "integer", default = 1)
parser$add_argument("--tSNEPerplexity", help = "value for perplexity", type = "integer", default = 100)
parser$add_argument("--tSNEDimMax", help = "max number of donors ", type = "integer", default = 1)
parser$add_argument("--tSNEPerplexity", help = "value for perplexity", type = "integer", default = 100)

#Output graphs - Heatmap
# Output graphs - Heatmap
parser$add_argument("--heatMap", help = "Generate a Heatmap", default = "False")
parser$add_argument("--heatMapNcells", help ="value for number of cells", type = "integer", default = 500)
parser$add_argument("--outputdir", help='Output directory')
parser$add_argument("--heatMapNcells", help = "value for number of cells", type = "integer", default = 500)
parser$add_argument("--outputdir", help = "Output directory")

args <- parser$parse_args()

Expand All @@ -45,53 +45,50 @@ params <- data.frame(Argument, Value)


print(paste0("Loading RDS object in ", args$hashtagPath))
if (!endsWith(args$hashtagPath, ".rds")){
hash_file <- list.files(args$hashtagPath, pattern = "\\.rds$", full.names = TRUE)[1]
}else{
hash_file <- args$hashtagPath
if (!endsWith(args$hashtagPath, ".rds")) {
hash_file <- list.files(args$hashtagPath, pattern = "\\.rds$", full.names = TRUE)[1]
} else {
hash_file <- args$hashtagPath
}
print(hash_file)
hashtag <-readRDS(hash_file)
hashtag <- readRDS(hash_file)


# Ridge Plot
# Group cells based on the max HTO signal
if (args$ridgePlot == "True") {
Idents(hashtag) <- paste0(args$assay, "_maxID")
RidgePlot(hashtag, assay = args$assay, features = rownames(hashtag[[args$assay]]), ncol = args$ridgeNCol)
ggsave(paste0(args$outputdir, '/ridge.jpeg'), device = 'jpeg', dpi = 500) # height = 10, width = 10
ggsave(paste0(args$outputdir, "/ridge.jpeg"), device = "jpeg", dpi = 500) # height = 10, width = 10
}

if (args$featureScatter == "True") {
FeatureScatter(hashtag, feature1 = args$scatterFeat1, feature2 = args$scatterFeat2)
ggsave(paste0(args$outputdir, '/featureScatter.jpeg'), device = 'jpeg',dpi = 500)
ggsave(paste0(args$outputdir, "/featureScatter.jpeg"), device = "jpeg", dpi = 500)
}

if (args$vlnPlot == "True") {
Idents(hashtag) <- paste0(args$assay, "_classification.global")
VlnPlot(hashtag, features = args$vlnFeatures, pt.size = 0.1, log = args$vlnLog)
ggsave(paste0(args$outputdir, '/violinPlot.jpeg'), device = 'jpeg', dpi = 500)
ggsave(paste0(args$outputdir, "/violinPlot.jpeg"), device = "jpeg", dpi = 500)
}

if (args$tSNE == "True") {
hashtag.subset <- subset(hashtag, idents = args$tSNEIdents, invert = args$tSNEInvert)
DefaultAssay(hashtag.subset) <- args$assay
hashtag.subset <- ScaleData(hashtag.subset, features = rownames(hashtag.subset),
verbose = args$tSNEVerbose)
hashtag.subset <- ScaleData(hashtag.subset,
features = rownames(hashtag.subset),
verbose = args$tSNEVerbose
)
hashtag.subset <- RunPCA(hashtag.subset, features = rownames(hashtag.subset), approx = args$tSNEApprox)
hashtag.subset <- RunTSNE(hashtag.subset, dims = 1:args$tSNEDimMax, perplexity = args$tSNEPerplexity, check_duplicates = FALSE)
DimPlot(hashtag.subset)
ggsave(paste0(args$outputdir, '/tSNE.jpeg'), device = 'jpeg', dpi = 500)
ggsave(paste0(args$outputdir, "/tSNE.jpeg"), device = "jpeg", dpi = 500)
}

if (args$heatMap == "True") {
HTOHeatmap(hashtag, assay = args$assay, ncells = args$heatMapNcells)
ggsave(paste0(args$outputdir, '/heatMap.jpeg'), device = 'jpeg', dpi = 500)
ggsave(paste0(args$outputdir, "/heatMap.jpeg"), device = "jpeg", dpi = 500)
}

write.csv(params, paste0(args$outputdir, "/visual_params.csv"))





44 changes: 21 additions & 23 deletions bin/HTODemux.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env Rscript
#Libraries
# Libraries
library(Seurat)
library(argparse)
library(ggplot2)
Expand All @@ -15,54 +15,52 @@ parser$add_argument("--seed", help = "Sets the random seed.", type = "integer",
parser$add_argument("--init", help = "Initial number of clusters for hashtags.", default = NULL, type = "integer")
parser$add_argument("--assay", help = "Assay name", default = "HTO")
parser$add_argument("--objectOutHTOdemux", help = "Prefix name for the object containing the output of HTODemux object", type = "character", default = "htodemux")
parser$add_argument("--assignmentOutHTOdemux", help="Prefeix name for the file containing the output of HTODemux assignment", type = "character", default = "htodemux")
parser$add_argument("--outputdir", help='Output directory')
parser$add_argument("--assignmentOutHTOdemux", help = "Prefeix name for the file containing the output of HTODemux assignment", type = "character", default = "htodemux")
parser$add_argument("--outputdir", help = "Output directory")


args <- parser$parse_args()
if (!endsWith(args$seuratObject, ".rds")){
seuratObj <- list.files(args$seuratObject, pattern = "\\.rds$", full.names = TRUE)[1]
}else{
seuratObj <- args$seuratObject
if (!endsWith(args$seuratObject, ".rds")) {
seuratObj <- list.files(args$seuratObject, pattern = "\\.rds$", full.names = TRUE)[1]
} else {
seuratObj <- args$seuratObject
}

init <- args$init
if(is.null(init)){
init <- "NULL"
if (is.null(init)) {
init <- "NULL"
}
Argument <- c("seuratObject", "quantile", "kfunc", "nstarts", "nsamples", "seed", "init", "assay")
Value <- c(seuratObj, args$quantile, args$kfunc, args$nstarts, args$nsamples, args$seed, init, args$assay)

params <- data.frame(Argument, Value)
# Loading Seurat object
hashtag <-readRDS(seuratObj)
hashtag <- readRDS(seuratObj)

# Demultiplex cells based on HTO enrichment
if(args$kfunc == "clara"){
hashtag <- HTODemux(hashtag, assay = args$assay, positive.quantile = args$quantile, init = args$init, nsamples = args$nsamples, kfunc = "clara", seed = args$seed)
}else{
hashtag <- HTODemux(hashtag, assay = args$assay, positive.quantile = args$quantile, init = args$init, nstarts = args$nstarts, kfunc = "kmeans", seed = args$seed)
if (args$kfunc == "clara") {
hashtag <- HTODemux(hashtag, assay = args$assay, positive.quantile = args$quantile, init = args$init, nsamples = args$nsamples, kfunc = "clara", seed = args$seed)
} else {
hashtag <- HTODemux(hashtag, assay = args$assay, positive.quantile = args$quantile, init = args$init, nstarts = args$nstarts, kfunc = "kmeans", seed = args$seed)
}


# Global classification results
#table(hashtag[[paste0(args$assay,"_classification.global")]])
# table(hashtag[[paste0(args$assay,"_classification.global")]])

#table(hashtag[[paste0(args$assay,"_classification")]])
# table(hashtag[[paste0(args$assay,"_classification")]])


# Saving results

donors <- rownames(hashtag[[args$assay]])
assignment <- hashtag[[paste0(args$assay,"_classification")]]
assignment[[paste0(args$assay,"_classification")]][!assignment[[paste0(args$assay,"_classification")]] %in% c(donors, 'Negative')] <- "Doublet"
assignment <- hashtag[[paste0(args$assay, "_classification")]]
assignment[[paste0(args$assay, "_classification")]][!assignment[[paste0(args$assay, "_classification")]] %in% c(donors, "Negative")] <- "Doublet"



write.csv(params, paste0(args$outputdir, "/params.csv"))
write.csv(assignment, paste0(args$outputdir, "/", args$assignmentOutHTOdemux, "_assignment_htodemux.csv"))
#write.csv(hashtag[[paste0(args$assay,"_classification")]], paste0(args$outputdir, "/", args$assignmentOutHTOdemux, "_assignment_htodemux.csv"))
write.csv(hashtag[[paste0(args$assay,"_classification.global")]], paste0(args$outputdir, "/", args$assignmentOutHTOdemux, "_classification_htodemux.csv"))
saveRDS(hashtag, file=paste0(args$outputdir, "/", args$objectOutHTOdemux,".rds"))


# write.csv(hashtag[[paste0(args$assay,"_classification")]], paste0(args$outputdir, "/", args$assignmentOutHTOdemux, "_assignment_htodemux.csv"))
write.csv(hashtag[[paste0(args$assay, "_classification.global")]], paste0(args$outputdir, "/", args$assignmentOutHTOdemux, "_classification_htodemux.csv"))
saveRDS(hashtag, file = paste0(args$outputdir, "/", args$objectOutHTOdemux, ".rds"))
27 changes: 13 additions & 14 deletions bin/MultiSeq.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env Rscript
#Libraries
# Libraries
library(Seurat)
library(argparse)
library(ggplot2)
Expand All @@ -9,34 +9,33 @@ parser <- ArgumentParser("Parameters for MultiSeqDemux")
parser$add_argument("--seuratObjectPath", help = "Seurat object or the folder containing the seurat object")
parser$add_argument("--quantile", help = "The quantile to use for classification", type = "double", default = 0.7)
parser$add_argument("--autoThresh", help = "Whether to perform automated threshold finding to define the best quantile", action = "store_true")
parser$add_argument("--maxiter", help = "Maximum number of iterations if autoThresh = TRUE ", default = 5, type="integer")
parser$add_argument("--maxiter", help = "Maximum number of iterations if autoThresh = TRUE ", default = 5, type = "integer")
parser$add_argument("--qrangeFrom", help = "A range of possible quantile values to try if autoThresh is TRUE", type = "double", default = 0.1)
parser$add_argument("--qrangeTo", help = "A range of possible quantile values to try if autoThresh is TRUE", type = "double", default = 0.9)
parser$add_argument("--qrangeBy", help = "A range of possible quantile values to try if autoThresh is TRUE", type = "double", default = 0.05)
parser$add_argument("--verbose", help = "Prints the output", action = "store_true")
parser$add_argument("--assay", help = "Name of the multiplexing assay", default="HTO")
parser$add_argument("--assignmentOutMulti",help = "Prefix name for the file containing the output of MULTI-Seq Demux object", default = "mutliseq")
parser$add_argument("--assay", help = "Name of the multiplexing assay", default = "HTO")
parser$add_argument("--assignmentOutMulti", help = "Prefix name for the file containing the output of MULTI-Seq Demux object", default = "mutliseq")
parser$add_argument("--objectOutMulti", help = "Prefix name for the object containing the output of MULTI-Seq Demux object", default = "multiseq")
parser$add_argument("--outputdir", help='Output directory')
parser$add_argument("--outputdir", help = "Output directory")

args <- parser$parse_args()
if (!endsWith(args$seuratObjectPath, ".rds")){
seuratObj <- list.files(args$seuratObjectPath, pattern = "\\.rds$", full.names = TRUE)[1]
}else{
seuratObj <- args$seuratObjectPath
if (!endsWith(args$seuratObjectPath, ".rds")) {
seuratObj <- list.files(args$seuratObjectPath, pattern = "\\.rds$", full.names = TRUE)[1]
} else {
seuratObj <- args$seuratObjectPath
}

Argument <- c("seuratObjectPath", "quantile", "autoThresh", "maxiter", "qrangeFrom", "qrangeTo", "qrangeBy", "verbose", "assay")
Value <- c(seuratObj, args$quantile, args$autoThresh, args$maxiter, args$qrangeFrom, args$qrangeTo, args$qrangeBy, args$verbose, args$assay)

params <- data.frame(Argument, Value)

hashtag <-readRDS(seuratObj)
hashtag <- readRDS(seuratObj)
if (args$autoThresh == TRUE) {
hashtag <- MULTIseqDemux(hashtag, assay = args$assay, quantile = args$quantile, autoThresh = TRUE, maxiter = args$maxiter, qrange=seq(from = args$qrangeFrom, to = args$qrangeTo, by = args$qrangeBy), verbose = args$verbose)
}else{
hashtag <- MULTIseqDemux(hashtag, assay = args$assay, quantile = args$quantile, verbose = args$verbose)

hashtag <- MULTIseqDemux(hashtag, assay = args$assay, quantile = args$quantile, autoThresh = TRUE, maxiter = args$maxiter, qrange = seq(from = args$qrangeFrom, to = args$qrangeTo, by = args$qrangeBy), verbose = args$verbose)
} else {
hashtag <- MULTIseqDemux(hashtag, assay = args$assay, quantile = args$quantile, verbose = args$verbose)
}

table(hashtag$MULTI_ID)
Expand Down
Loading
Loading