Skip to content

Commit

Permalink
Merge pull request #4 from tlesluyes/dev
Browse files Browse the repository at this point in the history
Update from dev
  • Loading branch information
tlesluyes authored Oct 10, 2023
2 parents 3d2ee1f + 0bc2f9b commit 02b0daa
Show file tree
Hide file tree
Showing 16 changed files with 451 additions and 117 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: myFun
Type: Package
Title: myFun is a collection of my favorite R functions, packaged for simplicity
Version: 1.0.4
Date: 2023-10-03
Version: 1.0.5
Date: 2023-10-10
Authors@R: person("Tom", "Lesluyes", email = "[email protected]", role = c("aut", "cre"))
Author: Tom Lesluyes [aut, cre]
Maintainer: Tom Lesluyes <[email protected]>
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,19 @@

export(adjustPositions)
export(checkGRlist)
export(computeBAF)
export(computeFit)
export(computeISA)
export(computeISA_batch)
export(computeLogR)
export(computeMD)
export(computeMD_batch)
export(generate_cytoband_and_CHRsize)
export(harmonizeGRanges)
export(load_CHRsize)
export(load_cytoband)
export(occurrenceGRanges)
export(reestimate_ploidy)
export(reestimate_purity)
export(splitDF)
importFrom(foreach,"%dopar%")
53 changes: 53 additions & 0 deletions R/computeISA.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,57 @@ computeISA=function(GR1, GR2, CNstatus="CNstatus") {
profiles=harmonizeGRanges(list(GR1, GR2))
sameCN=which(GenomicRanges::mcols(profiles[[1]])[, CNstatus]==GenomicRanges::mcols(profiles[[2]])[, CNstatus])
return(sum(IRanges::width(profiles[[1]][sameCN, ]))/sum(IRanges::width(profiles[[1]]))*100)
}

#' @title computeISA_batch
#' @description Compute the inter-sample agreement (ISA) for a batch of samples
#' @details This function computes the inter-sample agreement (ISA) between multiple profiles (as a list of GRanges objects).
#' @param myGRList a list of GRanges objects, each object should correspond to one CNA profile
#' @param cores a numeric, the number of cores to use (default: 1)
#' @param min_seg_size a numeric, the minimum segment size (in bp) to consider (default: 0)
#' @param CNstatus a metadata column name for the copy-number status (default: "CNstatus"). Can be total (e.g. "3") or allele-specific (e.g. "2+1")
#' @return A matrix of ISA values
#' @examples
#' require("GenomicRanges")
#' GR1=GRanges(seqnames=rep("1", 3),
#' ranges=IRanges(start=c(1, 1001, 10001), end=c(1000, 10000, 20000)),
#' CNstatus=c("1+1", "2+1", "1+1"))
#' GR2=GRanges(seqnames=rep("1", 2),
#' ranges=IRanges(start=c(500, 10001), end=c(10000, 25000)),
#' CNstatus=c("2+1", "1+1"))
#' GR3=GRanges(seqnames="1",
#' ranges=IRanges(start=500, end=25000),
#' CNstatus="1+1")
#' myGRList=list(GR1, GR2, GR3)
#' names(myGRList)=c("GR1", "GR2", "GR3")
#' computeISA_batch(myGRList)
#' @author tlesluyes
#' @export
computeISA_batch=function(myGRList, cores=1, min_seg_size=0, CNstatus="CNstatus") {
if (cores>1) doParallel::registerDoParallel(cores=cores)
checkGRlist(myGRList)
stopifnot(all(sapply(myGRList, function(x) all("CNstatus" %in% names(GenomicRanges::mcols(x))))))
if (is.null(names(myGRList))) {
NAMES=as.character(1:length(myGRList))
} else {
NAMES=names(myGRList)
}
myGRList=harmonizeGRanges(myGRList, cores=cores)
if (min_seg_size>0) {
TO_KEEP=IRanges::width(myGRList[[1]])>min_seg_size
myGRList=lapply(myGRList, function(x) x[TO_KEEP, ])
rm(TO_KEEP)
}
CNstatus_matrix=do.call(cbind, lapply(myGRList, function(x) { return(GenomicRanges::mcols(x)[, CNstatus]) }))
WIDTHS=IRanges::width(myGRList[[1]])
SUM_WIDTH=sum(WIDTHS)
ISA=foreach::foreach(i=1:length(myGRList), .combine=cbind) %dopar% {
if (i %% 50==0) print(paste0(i, "/", length(myGRList)))
SAME=CNstatus_matrix==CNstatus_matrix[, i]
return(sapply(1:length(myGRList), function(x) sum(WIDTHS[SAME[, x]])/SUM_WIDTH))
}
ISA=ISA*100
rownames(ISA)=NAMES
colnames(ISA)=NAMES
return(ISA)
}
52 changes: 0 additions & 52 deletions R/computeISA_batch.R

This file was deleted.

60 changes: 60 additions & 0 deletions R/computeMD.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,64 @@ computeMD=function(GR1, GR2, nMajor="nMajor", nMinor="nMinor", convertMb=FALSE)
MD=sum((abs(GenomicRanges::mcols(profiles[[1]])[, nMajor]-GenomicRanges::mcols(profiles[[2]])[, nMajor])+abs(GenomicRanges::mcols(profiles[[1]])[, nMinor]-GenomicRanges::mcols(profiles[[2]])[, nMinor]))*IRanges::width(profiles[[1]]))
if (convertMb) MD=MD/1e6
return(MD)
}

#' @title computeMD_batch
#' @description Compute the Manhattan distance (MD) for a batch of samples
#' @details This function computes the Manhattan distance (MD) between multiple profiles (as a list of GRanges objects).
#' @param myGRList a list of GRanges objects, each object should correspond to one CNA profile
#' @param cores a numeric, the number of cores to use (default: 1)
#' @param min_seg_size a numeric, the minimum segment size (in bp) to consider (default: 0)
#' @param nMajor a metadata column name for the major allele (default: "nMajor")
#' @param nMinor a metadata column name for the minor allele (default: "nMinor")
#' @param convertMb a boolean, the MD will be converted to megabases if set to TRUE (default: FALSE)
#' @return A matrix of MD values
#' @examples
#' require("GenomicRanges")
#' GR1=GRanges(seqnames=rep("1", 3),
#' ranges=IRanges(start=c(1, 1001, 10001), end=c(1000, 10000, 20000)),
#' nMajor=c(1, 2, 1),
#' nMinor=c(1, 1, 1))
#' GR2=GRanges(seqnames=rep("1", 2),
#' ranges=IRanges(start=c(500, 10001), end=c(10000, 25000)),
#' nMajor=c(2, 1),
#' nMinor=c(1, 1))
#' GR3=GRanges(seqnames="1",
#' ranges=IRanges(start=500, end=25000),
#' nMajor=1,
#' nMinor=1)
#' myGRList=list(GR1, GR2, GR3)
#' names(myGRList)=c("GR1", "GR2", "GR3")
#' computeMD_batch(myGRList)
#' @author tlesluyes
#' @export
computeMD_batch=function(myGRList, cores=1, min_seg_size=0, nMajor="nMajor", nMinor="nMinor", convertMb=FALSE) {
if (cores>1) doParallel::registerDoParallel(cores=cores)
checkGRlist(myGRList)
stopifnot(length(convertMb)==1 && is.logical(convertMb))
stopifnot(all(sapply(myGRList, function(x) all(c(nMajor, nMinor) %in% names(GenomicRanges::mcols(x))))))
stopifnot(all(sapply(myGRList, function(x) is.numeric(GenomicRanges::mcols(x)[, nMajor]))))
stopifnot(all(sapply(myGRList, function(x) is.numeric(GenomicRanges::mcols(x)[, nMinor]))))
if (is.null(names(myGRList))) {
NAMES=as.character(1:length(myGRList))
} else {
NAMES=names(myGRList)
}
myGRList=harmonizeGRanges(myGRList, cores=cores)
if (min_seg_size>0) {
TO_KEEP=IRanges::width(myGRList[[1]])>min_seg_size
myGRList=lapply(myGRList, function(x) x[TO_KEEP, ])
rm(TO_KEEP)
}
nMajor_matrix=do.call(cbind, lapply(myGRList, function(x) { return(GenomicRanges::mcols(x)[, nMajor]) }))
nMinor_matrix=do.call(cbind, lapply(myGRList, function(x) { return(GenomicRanges::mcols(x)[, nMinor]) }))
WIDTHS=IRanges::width(myGRList[[1]])
MD=foreach::foreach(i=1:length(myGRList), .combine=cbind) %dopar% {
if (i %% 50==0) print(paste0(i, "/", length(myGRList)))
return(apply((abs(nMajor_matrix-nMajor_matrix[, i])+abs(nMinor_matrix-nMinor_matrix[, i]))*WIDTHS, 2, sum))
}
rownames(MD)=NAMES
colnames(MD)=NAMES
if (convertMb) MD=MD/1e6
return(MD)
}
59 changes: 0 additions & 59 deletions R/computeMD_batch.R

This file was deleted.

114 changes: 114 additions & 0 deletions R/logR_BAF_purity_ploidy.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#' @title computeLogR
#' @description Compute the theoretical logR value for a given segment
#' @details This function computes the theoretical logR value for a given segment (from nMajor, nMinor, purity and ploidy values). Since logR isn't allele-specific, ntot can be used instead of nMajor (and nMinor should set to 0).
#' @param nMajor the number of copies of the major allele
#' @param nMinor the number of copies of the minor allele
#' @param purity the purity estimate of the tumour
#' @param ploidy the ploidy estimate of the tumour
#' @param digits a numeric, the number of digits to round to (default: 4)
#' @return A number representing the logR value
#' @seealso https://doi.org/10.1038/s41592-020-01013-2
#' @examples
#' # A 2+1 state in a diploid tumour with 90% purity
#' computeLogR(2, 1, 0.9, 2)
#' # A loss of 1 copy (2+1) in a pseudo-tetraploid tumour with 60% purity
#' computeLogR(2, 1, 0.6, 3.5)
#' @author tlesluyes
#' @export
computeLogR=function(nMajor, nMinor, purity, ploidy, digits=4) {
stopifnot(all(is.numeric(c(nMajor, nMinor, purity, ploidy, digits))))
round(log2((purity*(nMajor+nMinor)+2*(1-purity))/(purity*ploidy+2*(1-purity))), digits)
}

#' @title computeBAF
#' @description Compute the theoretical BAF values for a given segment
#' @details This function computes the theoretical BAF values for a given segment (from nMajor, nMinor and purity values).
#' @param nMajor the number of copies of the major allele
#' @param nMinor the number of copies of the minor allele
#' @param purity the purity estimate of the tumour
#' @param digits a numeric, the number of digits to round to (default: 4)
#' @return A vector of two numbers representing the BAF values
#' @seealso https://doi.org/10.1038/s41592-020-01013-2
#' @examples
#' # A 2+1 state in a tumour with 90% purity
#' computeBAF(2, 1, 0.9)
#' # A 1+0 state in a tumour with 60% purity
#' computeBAF(1, 0, 0.6)
#' @author tlesluyes
#' @export
computeBAF=function(nMajor, nMinor, purity, digits=4) {
stopifnot(all(is.numeric(c(nMajor, nMinor, purity, digits))))
BAF1=(purity*nMinor+(1-purity))/(purity*(nMajor+nMinor)+2*(1-purity))
BAF2=(purity*nMajor+(1-purity))/(purity*(nMajor+nMinor)+2*(1-purity))
return(round(c(BAF1, BAF2), digits))
}

#' @title computeFit
#' @description Compute the purity/ploidy fit for a given segment
#' @details This function computes the purity/ploidy fit (rho, psi and psit) for a given segment (from logR, BAF, proposed nMajor and proposed nMinor).
#' @param logR the logR value of the segment
#' @param BAF the BAF value of the segment (upper band only so the value should be in the 0.5-1 space)
#' @param nMajor the number of copies of the major allele
#' @param nMinor the number of copies of the minor allele
#' @param gamma the gamma parameter is platform-dependent and represents the expected logR decrease in a diploid sample where one copy is lost (should be 1 for HTS data and 0.55 for SNP arrays)
#' @param digits a numeric, the number of digits to round to (default: 4)
#' @return A list with the rho (=purity), psi (=total ploidy) and psit (=tumour ploidy) values
#' @examples
#' # A segment has logR=0.5361 and BAF=0.3448/0.6552
#' # What is the purity/ploidy fit if I believe that the segment is 2+1?
#' computeFit(0.5361, 0.6552, 2, 1, 1) # purity=90%; ploidy=2
#' @author tlesluyes
#' @export
#' @seealso https://doi.org/10.1038/s41592-020-01013-2
computeFit=function(logR, BAF, nMajor, nMinor, gamma, digits=4) {
stopifnot(all(is.numeric(c(logR, BAF, nMajor, nMinor, gamma, digits))))
rho=(2*BAF-1)/(2*BAF-BAF*(nMajor+nMinor)-1+nMajor)
psi=(rho*(nMajor+nMinor)+2-2*rho)/(2^(logR/gamma))
psit=(psi-2*(1-rho))/rho
return(list(rho=round(rho, digits), psi=round(psi, digits), psit=round(psit, digits)))
}

#' @title reestimate_ploidy
#' @description Compute the re-estimated ploidy for a given sample
#' @details This function computes the re-estimated ploidy for a given sample (from its old purity/ploidy fit and the re-estimated purity).
#' @param rho.old old purity estimate
#' @param psit.old old ploidy estimate
#' @param rho.new new purity estimate
#' @param WGD number of WGD events (0 if there is no WGD)
#' @param digits a numeric, the number of digits to round to (default: 4)
#' @return A number representing the re-estimated ploidy
#' @examples
#' # A pseudo-diploid sample has purity=74% and ploidy=2.4
#' # What is the re-estimated ploidy if I believe that the sample has purity=61%?
#' reestimate_ploidy(0.74, 2.4, 0.61, 0)
#' @author tlesluyes
#' @export
reestimate_ploidy=function(rho.old, psit.old, rho.new, WGD, digits=4) {
stopifnot(all(is.numeric(c(rho.old, psit.old, rho.new, digits))))
COEF=2*(WGD+1)
return(round(((rho.old*psit.old)+COEF*(rho.new-rho.old))/rho.new, digits))
}

#' @title reestimate_purity
#' @description Compute the re-estimated purity for a given sample
#' @details This function computes the re-estimated purity for a given sample in the context of a jump in ploidy (so the matched ploidy needs to be doubled or halved).
#' @param rho.old old purity estimate
#' @param psit.old old ploidy estimate
#' @param switch a character ("double" or "halve") indicating whether the ploidy should be doubled or halved
#' @param digits a numeric, the number of digits to round to (default: 4)
#' @return A number representing the re-estimated purity
#' @examples
#' # A sample has purity=74% and ploidy=2.4 but the CNA profile needs to be doubled
#' # What is the re-estimated purity?
#' reestimate_purity(0.74, 2.4, "double")
#' @author tlesluyes
#' @export
reestimate_purity=function(rho.old, psit.old, switch, digits=4) {
stopifnot(all(is.numeric(c(rho.old, psit.old, digits))))
stopifnot(switch %in% c("double", "halve"))
if (switch=="double") {
return(round(rho.old/(2-rho.old), digits))
} else {
return(round(2*rho.old/(rho.old+1), digits))
}
}
Loading

0 comments on commit 02b0daa

Please sign in to comment.