Merge pull request #4 from kkleinoros/master

Corrections for log2 transformation and extracting files from minfi RGset
GreenwoodLab · Feb 3, 2016 · 394bcc6 · 394bcc6
2 parents d187a49 + c4daa0f
commit 394bcc6
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 51 deletions.
diff --git a/R/funtoonorm.R b/R/funtoonorm.R
@@ -2,7 +2,7 @@
 #Main function of the package.s
 funtoonorm <- function(sigA, sigB, Annot=NULL, 
                        controlred, controlgrn, cp.types=NULL, cell_type, ncmp=4,  ncv.fold=10, 
-                       logged.data = FALSE, save.quant=TRUE, type.fits="PCR", apply.fits=TRUE, validate=FALSE)
+                       logged2.data = FALSE, save.quant=TRUE, type.fits="PCR", apply.fits=TRUE, validate=FALSE)
 {
   ####################################################################################
   # functions
@@ -13,7 +13,7 @@ funtoonorm <- function(sigA, sigB, Annot=NULL,
   sum1 <- function(x, v2) { return(x + v2) }
   ##
   calcbeta <- function(A,B,offset) {
-    return((exp(B)-1)/(exp(A) + exp(B)-2 + offset))
+    return((2^B-1)/(2^A + 2^B-2 + offset))
   }
   extractqnt <- function(x, i, AB, ncmp)  { 
     return(x$fitted.values[i,AB,ncmp])  }
@@ -125,8 +125,8 @@ funtoonorm <- function(sigA, sigB, Annot=NULL,
     stop("apparent inconsistency w.r.t. log transformation of sigA/B data and control data \n")
   }
   if (max(sigA, na.rm=TRUE)>25) {
-    message("Assuming data have not been previously log transformed, and applying a log transformation, \n")
-    logged.data <- FALSE
+    message("Assuming data have not been previously log2 transformed, and applying a log2 transformation, \n")
+    logged2.data <- FALSE
   }
 
 
@@ -137,7 +137,7 @@ funtoonorm <- function(sigA, sigB, Annot=NULL,
 
   message("Data is ok.", '\n')
 
-  if (!logged.data) {        # log transformation if data are not already logged at input
+  if (!logged2.data) {        # log transformation if data are not already logged at input
     sigA <- log2(1 + sigA)
     sigB <- log2(1 + sigB)
   }
@@ -186,7 +186,7 @@ funtoonorm <- function(sigA, sigB, Annot=NULL,
     load("quantilesB.II.RData")
   }
 
-  if (!logged.data) {   # same assumption as for signalA, signalB w.r.t. log transformation
+  if (!logged2.data) {   # same assumption as for signalA, signalB w.r.t. log transformation
     controlgrn <- log2(1 + controlgrn)
     controlred <- log2(1 + controlred)
   }

diff --git a/man/funtooNorm-package.Rd b/man/funtooNorm-package.Rd
@@ -29,7 +29,7 @@ License: \tab GPL-3\cr
 %%   funtoonormout <- funtoonorm(sigA=sigAsample, sigB=sigBsample, Annot=Annot,
 %%                     controlred=matred, controlgrn=matgrn,
 %%                      cp.types=cp.types, cell_type = cell_type,
-%%                      logged.data=FALSE, save.quant=FALSE, ncmp=ncmp, apply.fits=TRUE,
+%%                      logged2.data=FALSE, save.quant=FALSE, ncmp=ncmp, apply.fits=TRUE,
 %%                      validate=FALSE)
 
 %%}
diff --git a/man/funtoonorm.Rd b/man/funtoonorm.Rd
@@ -10,7 +10,7 @@ This function performs normalization of Illumina Infinium Human Methylation 450
 \usage{
 funtoonorm(sigA, sigB, Annot = NULL, controlred, controlgrn,
 cp.types = NULL, cell_type, ncmp = 4, 
-ncv.fold = 10, logged.data=FALSE, save.quant=TRUE,
+ncv.fold = 10, logged2.data=FALSE, save.quant=TRUE,
 type.fits="PCR", apply.fits = TRUE, validate = FALSE)
 }
 
@@ -36,8 +36,8 @@ Number of components, from either principal component regression or partial leas
 \item{ncv.fold}{
 Number of cross-validation partitions.
 }
-\item{logged.data}{
-Logical, \verb{TRUE} if data have been previously log transformed (sigA, sigB, controlred, controlgrn), and \verb{FALSE} if not.
+\item{logged2.data}{
+Logical, \verb{TRUE} if data have been previously log2 transformed (sigA, sigB, controlred, controlgrn), and \verb{FALSE} if not.
 }
 
 \item{save.quant}{
@@ -79,14 +79,14 @@ If \verb{validate=TRUE}, then cross-validation is performed exploring performanc
 funtoonormout <- funtoonorm(sigA=sigAsample, sigB=sigBsample,
                       controlred=matred, controlgrn=matgrn, 
                       cp.types=NULL, cell_type = cell_type,
-                      logged.data=FALSE, save.quant=TRUE, apply.fits=FALSE,  
+                      logged2.data=FALSE, save.quant=TRUE, apply.fits=FALSE,  
                       validate=TRUE)
 ## to normalize methylation data, assuming save.quant was set to TRUE in the previous call:
 ncmp <- 4
 funtoonormout <- funtoonorm(sigA=sigAsample, sigB=sigBsample, Annot=Annot,
                       controlred=matred, controlgrn=matgrn,
                       cp.types=NULL, cell_type = cell_type,
-                      logged.data=FALSE, save.quant=FALSE, ncmp=ncmp, apply.fits=TRUE,
+                      logged2.data=FALSE, save.quant=FALSE, ncmp=ncmp, apply.fits=TRUE,
                       validate=FALSE)
 
 }
diff --git a/vignettes/funtooNorm.Rnw b/vignettes/funtooNorm.Rnw
@@ -80,7 +80,7 @@ A default annotation table is provided if not supplied including all probes on t
 Finally, a number of parameters control whether intermediate calculations should be stored, simply so that the analysis can be performed in stages if desired.
 
 We have provided a small data set containing $N=93$ individuals and 20,000 probes to demonstrate the usage of the package. 
-The samples are either from cord blood or foetal placenta tissue. 
+The samples are either from cord blood or fetal placental tissue. 
 
 Here is a basic call to normalize this sample data set: 
  %The code to be inserted
@@ -91,7 +91,7 @@ Here is a basic call to normalize this sample data set:
     funtoonormout <- funtoonorm(sigA=sigAsample, sigB=sigBsample, Annot=NULL, 
                       controlred=matred, controlgrn=matgrn, 
                       cp.types=NULL, cell_type = cell_type,
-                      ncmp=4, ncv.fold=10, logged.data=FALSE, save.quant=TRUE, 
+                      ncmp=4, ncv.fold=10, logged2.data=FALSE, save.quant=TRUE, 
                       type.fits="PCR", apply.fits=TRUE, validate=FALSE)
 
 @
@@ -106,8 +106,8 @@ By default, funtooNorm will perform 10-fold cross-validation, but this can be ch
 Other parameters include: \\
 \texttt{save.quant:} When \texttt{TRUE}, the quantiles should be saved. When \texttt{FALSE}, saved quantiles from a previous run will be loaded and used. \\
 \texttt{apply.fits:} When \texttt{TRUE}, the results of the model fitting process should be used - i.e. the original data should be normalized. This parameter can be set fo \texttt{FALSE} when exploring the desired number of components. \\
-\texttt{logged.data:} If \texttt{TRUE}, the \texttt{sigA} and \texttt{sigB} matrices, as well as the control probe data matrices (controlred, controlgrn) are assumed to have been previously log transformed prior to sending the data
-into the algorithm.  If \texttt{logged.data=FALSE}, then these data will be log transformed (log(1+x)) inside the algorithm. \\
+\texttt{logged2.data:} If \texttt{TRUE}, the \texttt{sigA} and \texttt{sigB} matrices, as well as the control probe data matrices (controlred, controlgrn) are assumed to have been previously log2 transformed prior to sending the data
+into the algorithm.  If \texttt{logged2.data=FALSE}, then these data will be log2 transformed (log2(1+x)) inside the algorithm. \\
 
 The following call performs cross-validation to assess the performance of the model fitting.  
 Note that here the \texttt{ncmp} parameter does not need to be specified.
@@ -117,7 +117,7 @@ Note that here the \texttt{ncmp} parameter does not need to be specified.
     #of parameter ncmp for PLS regression:
     funtoonormmout <- funtoonorm(sigA=sigAsample, sigB=sigBsample,
                       controlred=matred, controlgrn=matgrn, cp.type = cp.types, cell_type = cell_type,
-                      ncv.fold=10, logged.data=FALSE, save.quant=TRUE, 
+                      ncv.fold=10, logged2.data=FALSE, save.quant=TRUE, 
                       type.fits="PCR", apply.fits=FALSE, validate = TRUE)
 
 
@@ -151,7 +151,7 @@ After choosing a desired number of components, in order to run the program to no
     # and PLR regresion
     funtoonormmout <- funtoonorm(sigA=sigAsample, sigB=sigBsample,
                       controlred=matred, controlgrn=matgrn, cp.type = cp.types, cell_type = cell_type,
-                      logged.data=FALSE, save.quant=FALSE, ncmp=4, 
+                      logged2.data=FALSE, save.quant=FALSE, ncmp=4, 
                       type.fits="PLS", apply.fits=TRUE, validate = FALSE)
 
 @
@@ -259,6 +259,7 @@ green matrices.
 <<>>=
 ctrlRed <- matRed[rownames(matRed) %in% ctrlInfo$Address,]
 ctrlGrn <- matGrn[rownames(matGrn) %in% ctrlInfo$Address,]
+cp.types<-ctrlInfo$Type[match(rownames(ctrlRed),ctrlInfo$Address)]
 @
 
 As the \texttt{minfi} vignette describes nicely, the 450k array
@@ -281,44 +282,22 @@ signal matrices to be the probe names whereas the \texttt{matRed} and
 <<>>=
 typeI_info <- getProbeInfo(manifest, type="I")
 typeII_info <- getProbeInfo(manifest, type="II")
+snpI_info <- getProbeInfo(manifest, type="SnpI")
+snpII_info <- getProbeInfo(manifest, type="SnpII")
 
-typeIARed <- typeI_info[typeI_info$Color=="Red",]
-typeIAGrn <- typeI_info[typeI_info$Color=="Grn",]
+probeID <-c(typeI_info$Name, typeII_info$Name, snpI_info$Name, snpII_info$Name) 
+address<- c(typeI_info$AddressA, typeII_info$AddressA, snpI_info$AddressA, snpII_info$AddressA)  
 
-sigAIRed <- matRed[rownames(matRed) %in% typeIARed$AddressA,]
-rownames(sigAIRed) <- typeIARed$Name
-sigAIGrn <- matGrn[rownames(matGrn) %in% typeIAGrn$AddressA,]
-rownames(sigAIGrn) <- typeIAGrn$Name
+  
 
-sigAI <- rbind(sigAIRed, sigAIGrn)
 @
 
-The extraction of the signal A matrix for Type II probes is much simpler:
+Extraction of the signal A matrix and signal B matrix:
 <<>>=
-sigAII <- matRed[rownames(matRed) %in% typeII_info$AddressA,]
-rownames(sigAII) <- typeII_info$Name
-@
-Now these the Type I and Type II signal A matrices can be combined:
-<<>>=
-sigA <- rbind(sigAI, sigAII)
-@
-The following code block repeats the same steps for the unmethylated B
-signal matrix:
-<<>>=
-typeIBRed <- typeI_info[typeI_info$Color=="Red",]
-typeIBGrn <- typeI_info[typeI_info$Color=="Grn",]
-
-sigBIRed <- matRed[rownames(matRed) %in% typeIBRed$AddressB,]
-rownames(sigBIRed) <- typeIBRed$Name
-sigBIGrn <- matGrn[rownames(matGrn) %in% typeIBGrn$AddressB,]
-rownames(sigBIGrn) <- typeIBGrn$Name
-
-sigBI <- rbind(sigBIRed, sigBIGrn)
-
-sigBII <- matGrn[rownames(matGrn) %in% typeII_info$AddressA,]
-rownames(sigBII) <- typeII_info$Name
-
-sigB <- rbind(sigBI, sigBII)
+sigA<-matRed[rownames(matRed) %in% address,]
+    rownames(sigA)=probeID[match(rownames(sigA),address)]
+sigB<-matGrn[rownames(matGrn) %in% address,]
+    rownames(sigB)=probeID[match(rownames(sigB),address)]
 @
 
 \texttt{funtooNorm} requires a vector with the cell type of each of
@@ -340,7 +319,8 @@ After these conversion steps, we can run \texttt{funtooNorm()} on the
 \texttt{RgsetEx} data:
 <<>>=
 ftRGsetExOut <- funtoonorm(sigA=sigA, sigB=sigB, controlred=ctrlRed,
-                           controlgrn=ctrlGrn, cell_type=cellType,
+                           controlgrn=ctrlGrn, logged2.data=FALSE,
+                           cp.types=cp.types,cell_type=cellType,
                            Annot=annot)
 @