diff --git a/R/funtoonorm.R b/R/funtoonorm.R index 5d27258..90583d6 100644 --- a/R/funtoonorm.R +++ b/R/funtoonorm.R @@ -2,7 +2,7 @@ #Main function of the package.s funtoonorm <- function(sigA, sigB, Annot=NULL, controlred, controlgrn, cp.types=NULL, cell_type, ncmp=4, ncv.fold=10, - logged.data = FALSE, save.quant=TRUE, type.fits="PCR", apply.fits=TRUE, validate=FALSE) + logged2.data = FALSE, save.quant=TRUE, type.fits="PCR", apply.fits=TRUE, validate=FALSE) { #################################################################################### # functions @@ -13,7 +13,7 @@ funtoonorm <- function(sigA, sigB, Annot=NULL, sum1 <- function(x, v2) { return(x + v2) } ## calcbeta <- function(A,B,offset) { - return((exp(B)-1)/(exp(A) + exp(B)-2 + offset)) + return((2^B-1)/(2^A + 2^B-2 + offset)) } extractqnt <- function(x, i, AB, ncmp) { return(x$fitted.values[i,AB,ncmp]) } @@ -125,8 +125,8 @@ funtoonorm <- function(sigA, sigB, Annot=NULL, stop("apparent inconsistency w.r.t. log transformation of sigA/B data and control data \n") } if (max(sigA, na.rm=TRUE)>25) { - message("Assuming data have not been previously log transformed, and applying a log transformation, \n") - logged.data <- FALSE + message("Assuming data have not been previously log2 transformed, and applying a log2 transformation, \n") + logged2.data <- FALSE } @@ -137,7 +137,7 @@ funtoonorm <- function(sigA, sigB, Annot=NULL, message("Data is ok.", '\n') - if (!logged.data) { # log transformation if data are not already logged at input + if (!logged2.data) { # log transformation if data are not already logged at input sigA <- log2(1 + sigA) sigB <- log2(1 + sigB) } @@ -186,7 +186,7 @@ funtoonorm <- function(sigA, sigB, Annot=NULL, load("quantilesB.II.RData") } - if (!logged.data) { # same assumption as for signalA, signalB w.r.t. log transformation + if (!logged2.data) { # same assumption as for signalA, signalB w.r.t. log transformation controlgrn <- log2(1 + controlgrn) controlred <- log2(1 + controlred) } diff --git a/man/funtooNorm-package.Rd b/man/funtooNorm-package.Rd index 9838e31..332e4cd 100644 --- a/man/funtooNorm-package.Rd +++ b/man/funtooNorm-package.Rd @@ -29,7 +29,7 @@ License: \tab GPL-3\cr %% funtoonormout <- funtoonorm(sigA=sigAsample, sigB=sigBsample, Annot=Annot, %% controlred=matred, controlgrn=matgrn, %% cp.types=cp.types, cell_type = cell_type, -%% logged.data=FALSE, save.quant=FALSE, ncmp=ncmp, apply.fits=TRUE, +%% logged2.data=FALSE, save.quant=FALSE, ncmp=ncmp, apply.fits=TRUE, %% validate=FALSE) %%} diff --git a/man/funtoonorm.Rd b/man/funtoonorm.Rd index be38b99..e0c47e0 100644 --- a/man/funtoonorm.Rd +++ b/man/funtoonorm.Rd @@ -10,7 +10,7 @@ This function performs normalization of Illumina Infinium Human Methylation 450 \usage{ funtoonorm(sigA, sigB, Annot = NULL, controlred, controlgrn, cp.types = NULL, cell_type, ncmp = 4, -ncv.fold = 10, logged.data=FALSE, save.quant=TRUE, +ncv.fold = 10, logged2.data=FALSE, save.quant=TRUE, type.fits="PCR", apply.fits = TRUE, validate = FALSE) } @@ -36,8 +36,8 @@ Number of components, from either principal component regression or partial leas \item{ncv.fold}{ Number of cross-validation partitions. } -\item{logged.data}{ -Logical, \verb{TRUE} if data have been previously log transformed (sigA, sigB, controlred, controlgrn), and \verb{FALSE} if not. +\item{logged2.data}{ +Logical, \verb{TRUE} if data have been previously log2 transformed (sigA, sigB, controlred, controlgrn), and \verb{FALSE} if not. } \item{save.quant}{ @@ -79,14 +79,14 @@ If \verb{validate=TRUE}, then cross-validation is performed exploring performanc funtoonormout <- funtoonorm(sigA=sigAsample, sigB=sigBsample, controlred=matred, controlgrn=matgrn, cp.types=NULL, cell_type = cell_type, - logged.data=FALSE, save.quant=TRUE, apply.fits=FALSE, + logged2.data=FALSE, save.quant=TRUE, apply.fits=FALSE, validate=TRUE) ## to normalize methylation data, assuming save.quant was set to TRUE in the previous call: ncmp <- 4 funtoonormout <- funtoonorm(sigA=sigAsample, sigB=sigBsample, Annot=Annot, controlred=matred, controlgrn=matgrn, cp.types=NULL, cell_type = cell_type, - logged.data=FALSE, save.quant=FALSE, ncmp=ncmp, apply.fits=TRUE, + logged2.data=FALSE, save.quant=FALSE, ncmp=ncmp, apply.fits=TRUE, validate=FALSE) } diff --git a/vignettes/funtooNorm.Rnw b/vignettes/funtooNorm.Rnw index bcd7d75..e73cef8 100644 --- a/vignettes/funtooNorm.Rnw +++ b/vignettes/funtooNorm.Rnw @@ -80,7 +80,7 @@ A default annotation table is provided if not supplied including all probes on t Finally, a number of parameters control whether intermediate calculations should be stored, simply so that the analysis can be performed in stages if desired. We have provided a small data set containing $N=93$ individuals and 20,000 probes to demonstrate the usage of the package. -The samples are either from cord blood or foetal placenta tissue. +The samples are either from cord blood or fetal placental tissue. Here is a basic call to normalize this sample data set: %The code to be inserted @@ -91,7 +91,7 @@ Here is a basic call to normalize this sample data set: funtoonormout <- funtoonorm(sigA=sigAsample, sigB=sigBsample, Annot=NULL, controlred=matred, controlgrn=matgrn, cp.types=NULL, cell_type = cell_type, - ncmp=4, ncv.fold=10, logged.data=FALSE, save.quant=TRUE, + ncmp=4, ncv.fold=10, logged2.data=FALSE, save.quant=TRUE, type.fits="PCR", apply.fits=TRUE, validate=FALSE) @ @@ -106,8 +106,8 @@ By default, funtooNorm will perform 10-fold cross-validation, but this can be ch Other parameters include: \\ \texttt{save.quant:} When \texttt{TRUE}, the quantiles should be saved. When \texttt{FALSE}, saved quantiles from a previous run will be loaded and used. \\ \texttt{apply.fits:} When \texttt{TRUE}, the results of the model fitting process should be used - i.e. the original data should be normalized. This parameter can be set fo \texttt{FALSE} when exploring the desired number of components. \\ -\texttt{logged.data:} If \texttt{TRUE}, the \texttt{sigA} and \texttt{sigB} matrices, as well as the control probe data matrices (controlred, controlgrn) are assumed to have been previously log transformed prior to sending the data -into the algorithm. If \texttt{logged.data=FALSE}, then these data will be log transformed (log(1+x)) inside the algorithm. \\ +\texttt{logged2.data:} If \texttt{TRUE}, the \texttt{sigA} and \texttt{sigB} matrices, as well as the control probe data matrices (controlred, controlgrn) are assumed to have been previously log2 transformed prior to sending the data +into the algorithm. If \texttt{logged2.data=FALSE}, then these data will be log2 transformed (log2(1+x)) inside the algorithm. \\ The following call performs cross-validation to assess the performance of the model fitting. Note that here the \texttt{ncmp} parameter does not need to be specified. @@ -117,7 +117,7 @@ Note that here the \texttt{ncmp} parameter does not need to be specified. #of parameter ncmp for PLS regression: funtoonormmout <- funtoonorm(sigA=sigAsample, sigB=sigBsample, controlred=matred, controlgrn=matgrn, cp.type = cp.types, cell_type = cell_type, - ncv.fold=10, logged.data=FALSE, save.quant=TRUE, + ncv.fold=10, logged2.data=FALSE, save.quant=TRUE, type.fits="PCR", apply.fits=FALSE, validate = TRUE) @@ -151,7 +151,7 @@ After choosing a desired number of components, in order to run the program to no # and PLR regresion funtoonormmout <- funtoonorm(sigA=sigAsample, sigB=sigBsample, controlred=matred, controlgrn=matgrn, cp.type = cp.types, cell_type = cell_type, - logged.data=FALSE, save.quant=FALSE, ncmp=4, + logged2.data=FALSE, save.quant=FALSE, ncmp=4, type.fits="PLS", apply.fits=TRUE, validate = FALSE) @ @@ -259,6 +259,7 @@ green matrices. <<>>= ctrlRed <- matRed[rownames(matRed) %in% ctrlInfo$Address,] ctrlGrn <- matGrn[rownames(matGrn) %in% ctrlInfo$Address,] +cp.types<-ctrlInfo$Type[match(rownames(ctrlRed),ctrlInfo$Address)] @ As the \texttt{minfi} vignette describes nicely, the 450k array @@ -281,44 +282,22 @@ signal matrices to be the probe names whereas the \texttt{matRed} and <<>>= typeI_info <- getProbeInfo(manifest, type="I") typeII_info <- getProbeInfo(manifest, type="II") +snpI_info <- getProbeInfo(manifest, type="SnpI") +snpII_info <- getProbeInfo(manifest, type="SnpII") -typeIARed <- typeI_info[typeI_info$Color=="Red",] -typeIAGrn <- typeI_info[typeI_info$Color=="Grn",] +probeID <-c(typeI_info$Name, typeII_info$Name, snpI_info$Name, snpII_info$Name) +address<- c(typeI_info$AddressA, typeII_info$AddressA, snpI_info$AddressA, snpII_info$AddressA) -sigAIRed <- matRed[rownames(matRed) %in% typeIARed$AddressA,] -rownames(sigAIRed) <- typeIARed$Name -sigAIGrn <- matGrn[rownames(matGrn) %in% typeIAGrn$AddressA,] -rownames(sigAIGrn) <- typeIAGrn$Name + -sigAI <- rbind(sigAIRed, sigAIGrn) @ -The extraction of the signal A matrix for Type II probes is much simpler: +Extraction of the signal A matrix and signal B matrix: <<>>= -sigAII <- matRed[rownames(matRed) %in% typeII_info$AddressA,] -rownames(sigAII) <- typeII_info$Name -@ -Now these the Type I and Type II signal A matrices can be combined: -<<>>= -sigA <- rbind(sigAI, sigAII) -@ -The following code block repeats the same steps for the unmethylated B -signal matrix: -<<>>= -typeIBRed <- typeI_info[typeI_info$Color=="Red",] -typeIBGrn <- typeI_info[typeI_info$Color=="Grn",] - -sigBIRed <- matRed[rownames(matRed) %in% typeIBRed$AddressB,] -rownames(sigBIRed) <- typeIBRed$Name -sigBIGrn <- matGrn[rownames(matGrn) %in% typeIBGrn$AddressB,] -rownames(sigBIGrn) <- typeIBGrn$Name - -sigBI <- rbind(sigBIRed, sigBIGrn) - -sigBII <- matGrn[rownames(matGrn) %in% typeII_info$AddressA,] -rownames(sigBII) <- typeII_info$Name - -sigB <- rbind(sigBI, sigBII) +sigA<-matRed[rownames(matRed) %in% address,] + rownames(sigA)=probeID[match(rownames(sigA),address)] +sigB<-matGrn[rownames(matGrn) %in% address,] + rownames(sigB)=probeID[match(rownames(sigB),address)] @ \texttt{funtooNorm} requires a vector with the cell type of each of @@ -340,7 +319,8 @@ After these conversion steps, we can run \texttt{funtooNorm()} on the \texttt{RgsetEx} data: <<>>= ftRGsetExOut <- funtoonorm(sigA=sigA, sigB=sigB, controlred=ctrlRed, - controlgrn=ctrlGrn, cell_type=cellType, + controlgrn=ctrlGrn, logged2.data=FALSE, + cp.types=cp.types,cell_type=cellType, Annot=annot) @