Skip to content

Commit

Permalink
Release 1.0.1 (#542)
Browse files Browse the repository at this point in the history
* bugfix remove sys.time for weird error in R 4.0

* bug fix new uwot save and loading features

* deprecated

* version number

* handle error where spurious fragments seem to be 1 tile above max

* update bugfixes

* let quantCut in plotTrajectory be null

* grepExclude in plotTrajectoryHeatmap

* bugfix integrative analysis coaccessibility and peak2genelinks

* change file locking location groupcoverages

* throw error if no peakset for adding annotations

* make it so predictionScore is not needed for peak2gene links

* update addPeakSet

* updated description

* bugfix named list

* update error handling plotEnrichHeatmap

* update error messages tilematrix

* handle error with bsgenomes

* width is end - start + 1

* bugfix checkCairo in iterativeLSI

* update archr verbosity and logging for markerfeatures

* bugfix + documentation

* update subsetting to metadata

* handling case n = 1 for deviations matrix

* bugfix ordering of scTrack

* handle error where no barcodes passing on a small chromosome/scaffold

* bugfix validBSgenome

* fix missing paren

* fix missing paren

* add feature for supplying custom gene list for rna integration

* add sanity check to partial matrix

* update cellsInArrow in case user overrides sample in ArchRProj

* Add null option for genesUse to validInput

* bugfix NA in combined vars

* add checks for genes symbol to be not a list

Co-authored-by: Ryan Corces <[email protected]>
  • Loading branch information
jgranja24 and rcorces authored Feb 22, 2021
1 parent 46b519f commit ecfe12a
Show file tree
Hide file tree
Showing 33 changed files with 379 additions and 85 deletions.
Binary file modified .DS_Store
Binary file not shown.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: ArchR
Type: Package
Date: 2020-10-01
Date: 2020-11-23
Title: Analyzing single-cell regulatory chromatin in R.
Version: 1.0.0
Version: 1.0.1
Authors@R: c(
person("Jeffrey", "Granja", email = "[email protected]", role = c("aut","cre")),
person("Ryan", "Corces", role = "aut"))
Expand All @@ -11,7 +11,7 @@ Roxygen: list(markdown = TRUE)
License: GPL (>= 2)
LinkingTo: Rcpp
LazyData: TRUE
RoxygenNote: 7.0.2
RoxygenNote: 7.1.1
Encoding: UTF-8
Imports:
Rcpp (>= 0.12.16),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export(addArchRDebugging)
export(addArchRGenome)
export(addArchRLogging)
export(addArchRThreads)
export(addArchRVerbose)
export(addBgdPeaks)
export(addCellColData)
export(addClusters)
Expand Down Expand Up @@ -68,6 +69,7 @@ export(getArchRDebugging)
export(getArchRGenome)
export(getArchRLogging)
export(getArchRThreads)
export(getArchRVerbose)
export(getArrowFiles)
export(getAvailableMatrices)
export(getBgdPeaks)
Expand Down
4 changes: 2 additions & 2 deletions R/AllClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ ArchRProject <- function(

message("Getting SampleNames...")
sampleNames <- unlist(.safelapply(seq_along(ArrowFiles), function(x){
message(x, " ", appendLF = FALSE)
if(getArchRVerbose()) message(x, " ", appendLF = FALSE)
.sampleName(ArrowFiles[x])
}, threads = threads))
message("")
Expand Down Expand Up @@ -159,7 +159,7 @@ ArchRProject <- function(
#Cell Information
message("Getting Cell Metadata...")
metadataList <- .safelapply(seq_along(ArrowFiles), function(x){
message(x, " ", appendLF = FALSE)
if(getArchRVerbose()) message(x, " ", appendLF = FALSE)
.getMetadata(ArrowFiles[x])
}, threads = threads)
message("")
Expand Down
9 changes: 8 additions & 1 deletion R/AnnotationGenome.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,14 @@ createGenomeAnnotation <- function(

##################
message("Getting blacklist..")
blacklist <- .getBlacklist(genome = bsg@provider_version)

genomeName <- tryCatch({
bsg@provider_version
}, error = function(e){
strsplit(bsg@pkgname,"\\.")[[1]][4]
})

blacklist <- .getBlacklist(genome = genomeName)

}else{

Expand Down
27 changes: 24 additions & 3 deletions R/AnnotationPeaks.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ getMatches <- function(ArchRProj = NULL, name = NULL, annoName = NULL){
#' binary value is stored indicating whether each region is observed within the peak region.
#'
#' @param ArchRProj An `ArchRProject` object.
#' @param regions A `list` of `GRanges` that are to be overlapped with the `peakSet` in the `ArchRProject`.
#' @param regions A named `list` of `GRanges` that are to be overlapped with the `peakSet` in the `ArchRProject`.
#' @param name The name of `peakAnnotation` object to be stored as in `ArchRProject`.
#' @param force A boolean value indicating whether to force the `peakAnnotation` object indicated by `name` to be overwritten
#' if it already exists in the given `ArchRProject`.
Expand Down Expand Up @@ -136,6 +136,10 @@ addPeakAnnotations <- function(

}else{

if(is.null(names(regions))){
names(regions) <- paste0("Region_", seq_along(regions))
}

regionPositions <- lapply(seq_along(regions), function(x){

.logThis(regions[[x]], paste0("regions[[x]]-", x), logFile = logFile)
Expand Down Expand Up @@ -184,6 +188,9 @@ addPeakAnnotations <- function(
# Peak Overlap Matrix
#############################################################
peakSet <- getPeakSet(ArchRProj)
if(is.null(peakSet)){
.logStop("peakSet is NULL. You need a peakset to run addMotifAnnotations! See addReproduciblePeakSet!", logFile = logFile)
}
allPositions <- unlist(regionPositions)

.logDiffTime("Creating Peak Overlap Matrix", t1 = tstart, verbose = TRUE, logFile = logFile)
Expand Down Expand Up @@ -431,6 +438,9 @@ addMotifAnnotations <- function(
#############################################################
.logDiffTime("Finding Motif Positions with motifmatchr!", t1 = tstart, verbose = TRUE, logFile = logFile)
peakSet <- ArchRProj@peakSet
if(is.null(peakSet)){
.logStop("peakSet is NULL. You need a peakset to run addMotifAnnotations! See addReproduciblePeakSet!", logFile = logFile)
}
motifPositions <- motifmatchr::matchMotifs(
pwms = motifs,
subject = peakSet,
Expand Down Expand Up @@ -606,7 +616,11 @@ addArchRAnnotations <- function(
}
}

genome <- tolower(validBSgenome(getGenome(ArchRProj))@provider_version)
genome <- tolower(tryCatch({
validBSgenome(getGenome(ArchRProj))$provider_version
}, error = function(e){
strsplit(validBSgenome(getGenome(ArchRProj))@pkgname,"\\.")[[1]][4]
}))

annoPath <- file.path(find.package("ArchR", NULL, quiet = TRUE), "data", "Annotations")
dir.create(annoPath, showWarnings = FALSE)
Expand Down Expand Up @@ -690,6 +704,9 @@ addArchRAnnotations <- function(
# Peak Overlap Matrix
#############################################################
peakSet <- getPeakSet(ArchRProj)
if(is.null(peakSet)){
.logStop("peakSet is NULL. You need a peakset to run addMotifAnnotations! See addReproduciblePeakSet!", logFile = logFile)
}
chr <- paste0(unique(seqnames(peakSet)))

.logMessage("Annotating Chromosomes", verbose = TRUE, logFile = logFile)
Expand Down Expand Up @@ -1061,6 +1078,10 @@ plotEnrichHeatmap <- function(
mat <- mat[keep, ,drop = FALSE]
.logThis(mat, "mat-mlog10Padj-Filter", logFile = logFile)

if(nrow(mat)==0){
stop("No enrichments found for your cutoff!")
}

passMat <- lapply(seq_len(nrow(mat)), function(x){
(mat[x, ] >= 0.9*max(mat[x, ])) * 1
}) %>% Reduce("rbind", .) %>% data.frame
Expand All @@ -1070,7 +1091,7 @@ plotEnrichHeatmap <- function(
mat[mat > pMax] <- pMax

if(nrow(mat)==0){
stop("No enrichments found!")
stop("No enrichments found for your cutoff!")
}

mat <- .rowScale(as.matrix(mat), min = 0)
Expand Down
6 changes: 6 additions & 0 deletions R/ArchRBrowser.R
Original file line number Diff line number Diff line change
Expand Up @@ -1749,6 +1749,12 @@ plotBrowserTrack <- function(

title <- paste0(as.character(seqnames(region)),":", start(region)-1, "-", end(region), " ", title)

#Re-Order
groupDF$group2 <- factor(
paste0(groupDF$group2),
levels = gtools::mixedsort(unique(paste0(groupDF$group2)))
)

p <- ggplot(groupDF, aes(x=bp, y=y, width = tileSize, fill = group2, color = group2)) +
geom_tile(size = scTileSize) +
facet_grid(group2 ~ ., scales="free_y") +
Expand Down
22 changes: 20 additions & 2 deletions R/ArrowRead.R
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,11 @@ getMatrixFromArrow <- function(

matFiles <- lapply(mat, function(x) x[[2]]) %>% Reduce("c", .)
mat <- lapply(mat, function(x) x[[1]]) %>% Reduce("cbind", .)
if(!all(cellNames %in% colnames(mat))){
.logThis(sampledCellNames, "cellNames supplied", logFile = logFile)
.logThis(colnames(mat), "cellNames from matrix", logFile = logFile)
stop("Error not all cellNames found in partialMatrix")
}
mat <- mat[,sampledCellNames, drop = FALSE]
mat <- .checkSparseMatrix(mat, length(sampledCellNames))

Expand All @@ -793,6 +798,11 @@ getMatrixFromArrow <- function(
}else{

mat <- Reduce("cbind", mat)
if(!all(cellNames %in% colnames(mat))){
.logThis(cellNames, "cellNames supplied", logFile = logFile)
.logThis(colnames(mat), "cellNames from matrix", logFile = logFile)
stop("Error not all cellNames found in partialMatrix")
}
mat <- mat[,cellNames, drop = FALSE]
mat <- .checkSparseMatrix(mat, length(cellNames))

Expand Down Expand Up @@ -902,6 +912,16 @@ getMatrixFromArrow <- function(
stop("Means Variances and Ns lengths not identical")
}

#Check if samples have NAs due to N = 1 sample or some other weird thing.
#Set it to min non NA variance
dfVars <- lapply(seq_len(nrow(dfVars)), function(x){
vx <- dfVars[x, ]
if(any(is.na(vx))){
vx[is.na(vx)] <- min(vx[!is.na(vx)])
}
vx
}) %>% Reduce("rbind", .)

combinedMeans <- rowSums(t(t(dfMeans) * ns)) / sum(ns)
summedVars <- rowSums(t(t(dfVars) * (ns - 1)) + t(t(dfMeans^2) * ns))
combinedVars <- (summedVars - sum(ns)*combinedMeans^2)/(sum(ns)-1)
Expand All @@ -925,8 +945,6 @@ getMatrixFromArrow <- function(
length(.availableCells(ArrowFiles[y], useMatrix))
}) %>% unlist



#Compute RowVars
summaryDF <- .safelapply(seq_along(featureDF), function(x){

Expand Down
12 changes: 11 additions & 1 deletion R/ArrowUtils.R
Original file line number Diff line number Diff line change
Expand Up @@ -379,10 +379,20 @@
o <- h5createGroup(outArrow, groupName)

mData <- ArrowInfo[[groupName]]
cellNames <- .h5read(inArrow, "Metadata/CellNames")
idx <- which(cellNames %in% stringr::str_split(cellsKeep, pattern="#", simplify=TRUE)[,2])

if(length(idx)==0){
stop("No cells matching in arrow file!")
}

for(i in seq_len(nrow(mData))){
h5name <- paste0(groupName, "/", mData$name[i])
h5write(.h5read(inArrow, h5name), file = outArrow, name = h5name)
mDatai <- .h5read(inArrow, h5name)
if(length(mDatai)==length(cellNames)){
mDatai <- mDatai[idx]
}
h5write(mDatai, file = outArrow, name = h5name)
}

#2. scATAC-Fragments
Expand Down
76 changes: 51 additions & 25 deletions R/CreateArrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ createArrowFiles <- function(

.logDiffTime("Continuing through after error ggplot for Fragment Size Distribution", t1 = tstart, logFile = logFile)
#print(x)
message("\n")
if(getArchRVerbose()) message("\n")

})
gc()
Expand Down Expand Up @@ -537,7 +537,7 @@ createArrowFiles <- function(

.logDiffTime("Continuing through after error ggplot for TSS by Frags", t1 = tstart, logFile = logFile)
#message(x)
message("\n")
if(getArchRVerbose()) message("\n")

})

Expand Down Expand Up @@ -1119,7 +1119,7 @@ createArrowFiles <- function(
TRUE
}, error = function(x){
tryCatch({
message("Attempting to index ", file," as tabix..")
if(getArchRVerbose()) message("Attempting to index ", file," as tabix..")
indexTabix(file, format = "bed")
TRUE
}, error = function(y){
Expand All @@ -1138,7 +1138,7 @@ createArrowFiles <- function(
}
}, error = function(x){
tryCatch({
message("Attempting to index ", file," as bam...")
if(getArchRVerbose()) message("Attempting to index ", file," as bam...")
indexBam(file)
TRUE
}, error = function(y){
Expand Down Expand Up @@ -1278,7 +1278,7 @@ createArrowFiles <- function(
o <- .suppressAll(h5createDataset(tmpFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0))
o <- .suppressAll(h5createDataset(tmpFile, chrRGValues, storage.mode = "character",
dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1))
o <- h5write(obj = cbind(dt$V2,dt$V3-dt$V2), file = tmpFile, name = chrPos)
o <- h5write(obj = cbind(dt$V2,dt$V3 - dt$V2 + 1), file = tmpFile, name = chrPos)
o <- h5write(obj = RG@lengths, file = tmpFile, name = chrRGLengths)
o <- h5write(obj = RG@values, file = tmpFile, name = chrRGValues)

Expand Down Expand Up @@ -1314,7 +1314,7 @@ createArrowFiles <- function(
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character",
dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1))

o <- h5write(obj = cbind(dt$V2,dt$V3-dt$V2), file = tmpChrFile, name = chrPos)
o <- h5write(obj = cbind(dt$V2,dt$V3 - dt$V2 + 1), file = tmpChrFile, name = chrPos)
o <- h5write(obj = RG@lengths, file = tmpChrFile, name = chrRGLengths)
o <- h5write(obj = RG@values, file = tmpChrFile, name = chrRGValues)

Expand Down Expand Up @@ -1650,7 +1650,7 @@ createArrowFiles <- function(
o <- .suppressAll(h5createDataset(tmpFile, chrRGValues, storage.mode = "character",
dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1))

o <- h5write(obj = cbind(dt$start,dt$end-dt$start), file = tmpFile, name = chrPos)
o <- h5write(obj = cbind(dt$start, dt$end - dt$start + 1), file = tmpFile, name = chrPos)
o <- h5write(obj = RG@lengths, file = tmpFile, name = chrRGLengths)
o <- h5write(obj = RG@values, file = tmpFile, name = chrRGValues)

Expand Down Expand Up @@ -1686,7 +1686,7 @@ createArrowFiles <- function(
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character",
dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1))

o <- h5write(obj = cbind(dt$start,dt$end-dt$start), file = tmpChrFile, name = chrPos)
o <- h5write(obj = cbind(dt$start, dt$end - dt$start + 1), file = tmpChrFile, name = chrPos)
o <- h5write(obj = RG@lengths, file = tmpChrFile, name = chrRGLengths)
o <- h5write(obj = RG@values, file = tmpChrFile, name = chrRGValues)

Expand Down Expand Up @@ -1930,14 +1930,29 @@ createArrowFiles <- function(
chrPos <- paste0("Fragments/",chr,"/Ranges")
chrRGLengths <- paste0("Fragments/",chr,"/RGLengths")
chrRGValues <- paste0("Fragments/",chr,"/RGValues")
o <- h5createGroup(outArrow, paste0("Fragments/",chr))
o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0))
o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0))
o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0,
size = max(nchar(mcols(fragments)$RG@values)) + 1))
o <- h5write(obj = cbind(start(fragments),width(fragments)), file = outArrow, name = chrPos)
o <- h5write(obj = mcols(fragments)$RG@lengths, file = outArrow, name = chrRGLengths)
o <- h5write(obj = mcols(fragments)$RG@values, file = outArrow, name = chrRGValues)

if(lengthRG == 0){

.logMessage(msg = paste0(prefix, " detected 0 Fragments in cells passing filtering threshold for ", chr), logFile = logFile)

o <- h5createGroup(outArrow, paste0("Fragments/",chr))
o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(0, 2), level = 0))
o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = 0))
o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(0, 1), level = 0,
size = 10))

}else{

o <- h5createGroup(outArrow, paste0("Fragments/",chr))
o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0))
o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0))
o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0,
size = max(nchar(mcols(fragments)$RG@values)) + 1))

o <- h5write(obj = cbind(start(fragments),width(fragments)), file = outArrow, name = chrPos)
o <- h5write(obj = mcols(fragments)$RG@lengths, file = outArrow, name = chrRGLengths)
o <- h5write(obj = mcols(fragments)$RG@values, file = outArrow, name = chrRGValues)
}

#Free Some Memory!
rm(fragments)
Expand Down Expand Up @@ -1995,15 +2010,26 @@ createArrowFiles <- function(
chrRGLengths <- paste0(chr, "._.RGLengths")
chrRGValues <- paste0(chr, "._.RGValues")

#HDF5 Write
o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0))
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0))
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0,
size = max(nchar(mcols(fragments)$RG@values)) + 1))

o <- h5write(obj = cbind(start(fragments),width(fragments)), file = tmpChrFile, name = chrPos)
o <- h5write(obj = mcols(fragments)$RG@lengths, file = tmpChrFile, name = chrRGLengths)
o <- h5write(obj = mcols(fragments)$RG@values, file = tmpChrFile, name = chrRGValues)
if(lengthRG == 0){

#HDF5 Write
o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(0, 2), level = 0))
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = 0))
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(0, 1), level = 0,
size = 10))

}else{

#HDF5 Write
o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0))
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0))
o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0,
size = max(nchar(mcols(fragments)$RG@values)) + 1))

o <- h5write(obj = cbind(start(fragments),width(fragments)), file = tmpChrFile, name = chrPos)
o <- h5write(obj = mcols(fragments)$RG@lengths, file = tmpChrFile, name = chrRGLengths)
o <- h5write(obj = mcols(fragments)$RG@values, file = tmpChrFile, name = chrRGValues)
}

#Free Some Memory!
rm(fragments)
Expand Down
Loading

0 comments on commit ecfe12a

Please sign in to comment.