Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Balanced Cell Group Comparisons #19

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ LazyData: false
Depends: R (>= 4.0)
Imports:
S4Vectors,
limma,
scrapper,
ggraph,
igraph,
methods,
Expand All @@ -30,7 +30,7 @@ Imports:
graphics,
statmod,
Cepo
RoxygenNote: 7.1.1
RoxygenNote: 7.3.2
Suggests:
knitr,
rmarkdown,
Expand Down
4 changes: 1 addition & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ exportClasses(scClassifyTrainModel)
exportClasses(scClassifyTrainModelList)
import(ggplot2)
import(igraph)
import(limma)
import(statmod)
importClassesFrom(S4Vectors,DataFrame)
importClassesFrom(S4Vectors,SimpleList)
Expand All @@ -45,8 +44,6 @@ importFrom(hopach,hdist)
importFrom(hopach,improveordering)
importFrom(hopach,is.hdist)
importFrom(hopach,vectmatrix)
importFrom(limma,eBayes)
importFrom(limma,lmFit)
importFrom(methods,as)
importFrom(methods,is)
importFrom(methods,new)
Expand All @@ -57,6 +54,7 @@ importFrom(mixtools,normalmixEM)
importFrom(proxy,as.dist)
importFrom(proxy,dist)
importFrom(proxyC,simil)
importFrom(scrapper,scoreMarkers)
importFrom(stats,coef)
importFrom(stats,cutree)
importFrom(stats,dnorm)
Expand Down
72 changes: 11 additions & 61 deletions R/featureSelection.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
#' @importFrom methods new
#' @importFrom Cepo Cepo topGenes
#' @import limma

#' @importFrom scrapper scoreMarkers

featureSelection <- function(exprsMat,
trainClass,
feature = c("limma", "DV", "DD", "chisq", "BI",
sampleID = NULL,
feature = c("DM", "DV", "DD", "chisq", "BI",
"Cepo"),
topN = 50,
pSig = 0.001
){

feature <- match.arg(feature, c("limma", "DV", "DD", "chisq", "BI", "Cepo"),
feature <- match.arg(feature, c("DM", "DV", "DD", "chisq", "BI", "Cepo"),
several.ok = FALSE)


Expand Down Expand Up @@ -40,69 +40,19 @@ featureSelection <- function(exprsMat,
tt <- Cepo::Cepo(as.matrix(exprsMat), trainClass, exprsPct = 0.05)
res <- Reduce(union, Cepo::topGenes(tt, n = topN))
} else{
tt <- doLimma(exprsMat, trainClass)
res <- Reduce(union, lapply(tt, function(t)
rownames(t[t$logFC > 0 & (t$meanPct.2 - t$meanPct.1) > 0.05 &
t$adj.P.Val < pSig,])[seq_len(topN)]))
effectSizes <- scrapper::scoreMarkers(exprsMat, trainClass, sampleID, block.weight.policy = "equal")
res <- Reduce(union, mapply(function(typeD, propDetect)
{
bestNames <- rownames(exprsMat)[order(typeD$mean, decreasing = TRUE)]
bestNames <- bestNames[propDetect$mean > 0.05]
bestNames[seq_len(topN)]
}, effectSizes[["cohens.d"]], effectSizes[["delta.detected"]], SIMPLIFY = FALSE))

}

return(res)
}


#' @importFrom limma eBayes lmFit
#' @importFrom methods new

doLimma <- function(exprsMat, cellTypes, exprs_pct = 0.05){

cellTypes <- droplevels(as.factor(cellTypes))
tt <- list()
for (i in seq_len(nlevels(cellTypes))) {
tmp_celltype <- (ifelse(cellTypes == levels(cellTypes)[i], 1, 0))
design <- stats::model.matrix(~tmp_celltype)


meanExprs <- do.call(cbind, lapply(c(0,1), function(i){
Matrix::rowMeans(exprsMat[, tmp_celltype == i, drop = FALSE])
}))

meanPct <- do.call(cbind, lapply(c(0,1), function(i){
Matrix::rowSums(exprsMat[, tmp_celltype == i,
drop = FALSE] > 0)/sum(tmp_celltype == i)
}))

keep <- meanPct[,2] > exprs_pct

y <- methods::new("EList")
y$E <- exprsMat[keep, ]
fit <- limma::lmFit(y, design = design)
fit <- limma::eBayes(fit, trend = TRUE, robust = TRUE)
tt[[i]] <- limma::topTable(fit, n = Inf, adjust.method = "BH", coef = 2)



if (!is.null(tt[[i]]$ID)) {
tt[[i]] <- tt[[i]][!duplicated(tt[[i]]$ID),]
rownames(tt[[i]]) <- tt[[i]]$ID
}

tt[[i]]$meanExprs.1 <- meanExprs[rownames(tt[[i]]), 1]
tt[[i]]$meanExprs.2 <- meanExprs[rownames(tt[[i]]), 2]
tt[[i]]$meanPct.1 <- meanPct[rownames(tt[[i]]), 1]
tt[[i]]$meanPct.2 <- meanPct[rownames(tt[[i]]), 2]
}



return(tt)


}




doDV <- function(exprsMat, cellTypes){


Expand Down
18 changes: 9 additions & 9 deletions R/predict_scClassify.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
#' @param cor_threshold_high A numeric indicates the highest
#' correlation threshold
#' @param features A vector indicates the gene selection method,
#' set as "limma" by default.
#' This should be one or more of "limma", "DV", "DD", "chisq", "BI".
#' set as "DM" (Difference in Means) by default.
#' This should be one or more of "DM", "DV", "DD", "chisq", "BI".
#' @param algorithm A vector indicates the KNN method that are used,
#' set as "WKNN" by default.
#' This should be one or more of "WKNN", "KNN", "DWKNN".
Expand Down Expand Up @@ -46,7 +46,7 @@
#' trainRes = trainClassExample_xin,
#' cellTypes_test = wang_cellTypes,
#' algorithm = "WKNN",
#' features = c("limma"),
#' features = "DM",
#' similarity = c("pearson"),
#' prob_threshold = 0.7,
#' verbose = TRUE)
Expand All @@ -67,7 +67,7 @@ predict_scClassify <- function(exprsMat_test,
prob_threshold = 0.7,
cor_threshold_static = 0.5,
cor_threshold_high = 0.7,
features = "limma",
features = "DM",
algorithm = "WKNN",
similarity = "pearson",
cutoff_method = c("dynamic", "static"),
Expand Down Expand Up @@ -238,8 +238,8 @@ predict_scClassify <- function(exprsMat_test,
#' @param prob_threshold A numeric indicates the probability threshold for KNN/WKNN/DWKNN.
#' @param cor_threshold_static A numeric indicates the static correlation threshold.
#' @param cor_threshold_high A numeric indicates the highest correlation threshold
#' @param features A vector indicates the method to select features, set as "limma" by default.
#' This should be one or more of "limma", "DV", "DD", "chisq", "BI".
#' @param features A vector indicates the method to select features, set as "DM" (Difference in Means) by default.
#' This should be one or more of "DM", "DV", "DD", "chisq", "BI".
#' @param algorithm A vector indicates the KNN method that are used, set as "WKNN" by default.
#' This should be one or more of "WKNN", "KNN", "DWKNN".
#' @param similarity A vector indicates the similarity measure that are used,
Expand Down Expand Up @@ -269,7 +269,7 @@ predict_scClassify <- function(exprsMat_test,
#' trainRes = trainClassExampleJoint,
#' cellTypes_test = wang_cellTypes,
#' algorithm = "WKNN",
#' features = c("limma"),
#' features = "DM",
#' similarity = c("pearson"),
#' prob_threshold = 0.7,
#' verbose = FALSE)
Expand All @@ -287,7 +287,7 @@ predict_scClassifyJoint <- function(exprsMat_test,
prob_threshold = 0.7,
cor_threshold_static = 0.5,
cor_threshold_high = 0.7,
features = "limma",
features = "DM",
algorithm = "WKNN",
similarity = "pearson",
cutoff_method = c("dynamic", "static"),
Expand Down Expand Up @@ -363,7 +363,7 @@ predict_scClassifySingle <- function(exprsMat_test,
prob_threshold = 0.7,
cor_threshold_static = 0.5,
cor_threshold_high = 0.7,
features = "limma",
features = "DM",
algorithm = c("WKNN", "KNN", "DWKNN"),
similarity = c("pearson", "spearman",
"cosine", "jaccard",
Expand Down
2 changes: 1 addition & 1 deletion R/sampleSizeCal.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ runSubSampling <- function(exprsMat,
test = cellTypes_test),
...)

acc_cls <- trainRes$testRes$test$pearson_WKNN_limma$classifyRes
acc_cls <- trainRes$testRes$test$pearson_WKNN_DM$classifyRes
return(acc_cls)
}

Expand Down
13 changes: 8 additions & 5 deletions R/scClassify.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
#'
#' @param exprsMat_train A matrix of log-transformed expression matrix of reference dataset
#' @param cellTypes_train A vector of cell types of reference dataset
#' @param sampleID_train Default: \code{NULL}. A vector of sample IDs, if the cells come from more than one individual.
#' @param exprsMat_test A list or a matrix indicates the expression matrices of the query datasets
#' @param cellTypes_test A list or a vector indicates cell types of the query datasets (Optional).
#' @param tree A vector indicates the method to build hierarchical tree, set as "HOPACH" by default.
#' This should be one of "HOPACH" and "HC" (using hclust).
#' @param selectFeatures A vector indicates the gene selection method, set as "limma" by default.
#' This should be one or more of "limma", "DV", "DD", "chisq", "BI" and "Cepo".
#' @param selectFeatures A vector indicates the gene selection method, set as "DM" (Difference in Means) by default.
#' This should be one or more of "DM", "DV", "DD", "chisq", "BI" and "Cepo".
#' @param algorithm A vector indicates the KNN method that are used, set as
#' "WKNN" by default. Thisshould be one or more of "WKNN", "KNN", "DWKNN".
#' @param similarity A vector indicates the similarity measure that are used,
Expand Down Expand Up @@ -56,7 +57,7 @@
#' cellTypes_test = list(wang = wang_cellTypes),
#' tree = "HOPACH",
#' algorithm = "WKNN",
#' selectFeatures = c("limma"),
#' selectFeatures = "DM",
#' similarity = c("pearson"),
#' returnList = FALSE,
#' verbose = FALSE)
Expand All @@ -69,11 +70,12 @@

scClassify <- function(exprsMat_train = NULL,
cellTypes_train = NULL,
sampleID_train = NULL,
exprsMat_test = NULL,
cellTypes_test = NULL,
tree = "HOPACH",
algorithm = "WKNN",
selectFeatures = "limma",
selectFeatures = "DM",
similarity = "pearson",
cutoff_method = c("dynamic", "static"),
weighted_ensemble = FALSE,
Expand Down Expand Up @@ -147,7 +149,7 @@ scClassify <- function(exprsMat_train = NULL,

tree <- match.arg(tree, c("HOPACH", "HC"), several.ok = FALSE)
selectFeatures <- match.arg(selectFeatures,
c("limma", "DV", "DD", "chisq", "BI", "Cepo"),
c("DM", "DV", "DD", "chisq", "BI", "Cepo"),
several.ok = TRUE)

algorithm <- match.arg(algorithm,
Expand Down Expand Up @@ -231,6 +233,7 @@ scClassify <- function(exprsMat_train = NULL,
### train_scClassify
trainRes <- train_scClassify(exprsMat_train,
cellTypes_train,
sampleID_train,
tree = tree,
selectFeatures = selectFeatures,
topN = topN,
Expand Down
33 changes: 20 additions & 13 deletions R/train_scClassify.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
#'
#' @param exprsMat_train A matrix of log-transformed expression matrix of reference dataset
#' @param cellTypes_train A vector of cell types of reference dataset
#' @param sampleID_train Default: \code{NULL}. A vector of sample IDs, if the cells come from more than one individual.
#' @param tree A vector indicates the method to build hierarchical tree,
#' set as "HOPACH" by default.
#' This should be one of "HOPACH" and "HC" (using stats::hclust).
#' @param selectFeatures A vector indicates the gene selection method,
#' set as "limma" by default.
#' This should be one or more of "limma", "DV", "DD", "chisq", "BI", "Cepo".
#' set as "DM" (Difference in Means) by default.
#' This should be one or more of "DM", "DV", "DD", "chisq", "BI", "Cepo".
#' @param topN An integer indicates the top number of features that are selected
#' @param hopach_kmax An integer between 1 and 9 specifying the maximum number of
#' children at each node in the HOPACH tree.
Expand All @@ -32,7 +33,7 @@
#' exprsMat_xin_subset <- scClassify_example$exprsMat_xin_subset
#' trainClass <- train_scClassify(exprsMat_train = exprsMat_xin_subset,
#' cellTypes_train = xin_cellTypes,
#' selectFeatures = c("limma", "BI"),
#' selectFeatures = c("DM", "BI"),
#' returnList = FALSE
#' )
#'
Expand All @@ -46,8 +47,9 @@

train_scClassify <- function(exprsMat_train,
cellTypes_train,
sampleID_train = NULL,
tree = "HOPACH",
selectFeatures = "limma",
selectFeatures = "DM",
topN = 50,
hopach_kmax = 5,
pSig = 0.05,
Expand All @@ -69,7 +71,7 @@ train_scClassify <- function(exprsMat_train,

# Matching the argument of feature selection method
selectFeatures <- match.arg(selectFeatures,
c("limma", "DV", "DD", "chisq", "BI", "Cepo"),
c("DM", "DV", "DD", "chisq", "BI", "Cepo"),
several.ok = TRUE)


Expand Down Expand Up @@ -133,6 +135,7 @@ train_scClassify <- function(exprsMat_train,
for (train_list_idx in seq_len(length(exprsMat_train))) {
trainRes[[train_list_idx]] <- train_scClassifySingle(exprsMat_train[[train_list_idx]],
cellTypes_train[[train_list_idx]],
sampleID_train[[train_list_idx]],
tree = tree,
selectFeatures = selectFeatures,
topN = topN,
Expand All @@ -149,6 +152,7 @@ train_scClassify <- function(exprsMat_train,

trainRes <- train_scClassifySingle(exprsMat_train,
cellTypes_train,
sampleID_train,
tree = tree,
selectFeatures = selectFeatures,
topN = topN,
Expand Down Expand Up @@ -211,8 +215,9 @@ train_scClassify <- function(exprsMat_train,

train_scClassifySingle <- function(exprsMat_train,
cellTypes_train,
sampleID_train = NULL,
tree = "HOPACH",
selectFeatures = "limma",
selectFeatures = "DM",
topN = 50,
hopach_kmax = 5,
pSig = 0.05,
Expand Down Expand Up @@ -247,7 +252,7 @@ train_scClassifySingle <- function(exprsMat_train,

# Matching the argument of feature selection method
selectFeatures <- match.arg(selectFeatures,
c("limma", "DV", "DD", "chisq", "BI", "Cepo"),
c("DM", "DV", "DD", "chisq", "BI", "Cepo"),
several.ok = TRUE)


Expand All @@ -257,10 +262,12 @@ train_scClassifySingle <- function(exprsMat_train,


# Select the features to construct tree
tt <- doLimma(exprsMat_train, cellTypes_train)
de <- Reduce(union, lapply(tt, function(t)
rownames(t)[seq_len(max(min(50, sum(t$adj.P.Val < 0.001)), 30))]))
de <- na.omit(de)
effectSizes <- scrapper::scoreMarkers(exprsMat_train, cellTypes_train, sampleID_train, block.weight.policy = "equal")
de <- Reduce(union, mapply(function(typeD, propDetect)
{
bestNames <- rownames(exprsMat_train)[order(typeD$mean, decreasing = TRUE)]
bestNames[max(30, nrow(exprsMat_train))]
}, effectSizes[["cohens.d"]], effectSizes[["delta.detected"]], SIMPLIFY = FALSE))
if (verbose) {
print(paste("Number of genes selected to construct HOPACH tree",
length(de)))
Expand Down Expand Up @@ -420,11 +427,11 @@ currentClass <- function(cellTypes, cutree_res){
hierarchyKNNcor <- function(exprsMat,
cellTypes,
cutree_list,
feature = c("limma", "DV", "DD", "chisq", "BI"),
feature = c("DM", "DV", "DD", "chisq", "BI"),
topN = 50,
pSig = 0.001,
verbose= TRUE){
feature <- match.arg(feature, c("limma", "DV", "DD", "chisq", "BI", "Cepo"))
feature <- match.arg(feature, c("DM", "DV", "DD", "chisq", "BI", "Cepo"))
numHierchy <- length(cutree_list)
levelModel <- list()
levelHVG <- list()
Expand Down
Loading