Skip to content

Commit

Permalink
Merge pull request #4 from kkleinoros/master
Browse files Browse the repository at this point in the history
Corrections for log2 transformation and extracting files from minfi RGset
  • Loading branch information
kkleinoros committed Feb 3, 2016
2 parents d187a49 + c4daa0f commit 394bcc6
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 51 deletions.
12 changes: 6 additions & 6 deletions R/funtoonorm.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#Main function of the package.s
funtoonorm <- function(sigA, sigB, Annot=NULL,
controlred, controlgrn, cp.types=NULL, cell_type, ncmp=4, ncv.fold=10,
logged.data = FALSE, save.quant=TRUE, type.fits="PCR", apply.fits=TRUE, validate=FALSE)
logged2.data = FALSE, save.quant=TRUE, type.fits="PCR", apply.fits=TRUE, validate=FALSE)
{
####################################################################################
# functions
Expand All @@ -13,7 +13,7 @@ funtoonorm <- function(sigA, sigB, Annot=NULL,
sum1 <- function(x, v2) { return(x + v2) }
##
calcbeta <- function(A,B,offset) {
return((exp(B)-1)/(exp(A) + exp(B)-2 + offset))
return((2^B-1)/(2^A + 2^B-2 + offset))
}
extractqnt <- function(x, i, AB, ncmp) {
return(x$fitted.values[i,AB,ncmp]) }
Expand Down Expand Up @@ -125,8 +125,8 @@ funtoonorm <- function(sigA, sigB, Annot=NULL,
stop("apparent inconsistency w.r.t. log transformation of sigA/B data and control data \n")
}
if (max(sigA, na.rm=TRUE)>25) {
message("Assuming data have not been previously log transformed, and applying a log transformation, \n")
logged.data <- FALSE
message("Assuming data have not been previously log2 transformed, and applying a log2 transformation, \n")
logged2.data <- FALSE
}


Expand All @@ -137,7 +137,7 @@ funtoonorm <- function(sigA, sigB, Annot=NULL,

message("Data is ok.", '\n')

if (!logged.data) { # log transformation if data are not already logged at input
if (!logged2.data) { # log transformation if data are not already logged at input
sigA <- log2(1 + sigA)
sigB <- log2(1 + sigB)
}
Expand Down Expand Up @@ -186,7 +186,7 @@ funtoonorm <- function(sigA, sigB, Annot=NULL,
load("quantilesB.II.RData")
}

if (!logged.data) { # same assumption as for signalA, signalB w.r.t. log transformation
if (!logged2.data) { # same assumption as for signalA, signalB w.r.t. log transformation
controlgrn <- log2(1 + controlgrn)
controlred <- log2(1 + controlred)
}
Expand Down
2 changes: 1 addition & 1 deletion man/funtooNorm-package.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ License: \tab GPL-3\cr
%% funtoonormout <- funtoonorm(sigA=sigAsample, sigB=sigBsample, Annot=Annot,
%% controlred=matred, controlgrn=matgrn,
%% cp.types=cp.types, cell_type = cell_type,
%% logged.data=FALSE, save.quant=FALSE, ncmp=ncmp, apply.fits=TRUE,
%% logged2.data=FALSE, save.quant=FALSE, ncmp=ncmp, apply.fits=TRUE,
%% validate=FALSE)

%%}
10 changes: 5 additions & 5 deletions man/funtoonorm.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This function performs normalization of Illumina Infinium Human Methylation 450
\usage{
funtoonorm(sigA, sigB, Annot = NULL, controlred, controlgrn,
cp.types = NULL, cell_type, ncmp = 4,
ncv.fold = 10, logged.data=FALSE, save.quant=TRUE,
ncv.fold = 10, logged2.data=FALSE, save.quant=TRUE,
type.fits="PCR", apply.fits = TRUE, validate = FALSE)
}

Expand All @@ -36,8 +36,8 @@ Number of components, from either principal component regression or partial leas
\item{ncv.fold}{
Number of cross-validation partitions.
}
\item{logged.data}{
Logical, \verb{TRUE} if data have been previously log transformed (sigA, sigB, controlred, controlgrn), and \verb{FALSE} if not.
\item{logged2.data}{
Logical, \verb{TRUE} if data have been previously log2 transformed (sigA, sigB, controlred, controlgrn), and \verb{FALSE} if not.
}

\item{save.quant}{
Expand Down Expand Up @@ -79,14 +79,14 @@ If \verb{validate=TRUE}, then cross-validation is performed exploring performanc
funtoonormout <- funtoonorm(sigA=sigAsample, sigB=sigBsample,
controlred=matred, controlgrn=matgrn,
cp.types=NULL, cell_type = cell_type,
logged.data=FALSE, save.quant=TRUE, apply.fits=FALSE,
logged2.data=FALSE, save.quant=TRUE, apply.fits=FALSE,
validate=TRUE)
## to normalize methylation data, assuming save.quant was set to TRUE in the previous call:
ncmp <- 4
funtoonormout <- funtoonorm(sigA=sigAsample, sigB=sigBsample, Annot=Annot,
controlred=matred, controlgrn=matgrn,
cp.types=NULL, cell_type = cell_type,
logged.data=FALSE, save.quant=FALSE, ncmp=ncmp, apply.fits=TRUE,
logged2.data=FALSE, save.quant=FALSE, ncmp=ncmp, apply.fits=TRUE,
validate=FALSE)

}
58 changes: 19 additions & 39 deletions vignettes/funtooNorm.Rnw
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ A default annotation table is provided if not supplied including all probes on t
Finally, a number of parameters control whether intermediate calculations should be stored, simply so that the analysis can be performed in stages if desired.

We have provided a small data set containing $N=93$ individuals and 20,000 probes to demonstrate the usage of the package.
The samples are either from cord blood or foetal placenta tissue.
The samples are either from cord blood or fetal placental tissue.

Here is a basic call to normalize this sample data set:
%The code to be inserted
Expand All @@ -91,7 +91,7 @@ Here is a basic call to normalize this sample data set:
funtoonormout <- funtoonorm(sigA=sigAsample, sigB=sigBsample, Annot=NULL,
controlred=matred, controlgrn=matgrn,
cp.types=NULL, cell_type = cell_type,
ncmp=4, ncv.fold=10, logged.data=FALSE, save.quant=TRUE,
ncmp=4, ncv.fold=10, logged2.data=FALSE, save.quant=TRUE,
type.fits="PCR", apply.fits=TRUE, validate=FALSE)
@
Expand All @@ -106,8 +106,8 @@ By default, funtooNorm will perform 10-fold cross-validation, but this can be ch
Other parameters include: \\
\texttt{save.quant:} When \texttt{TRUE}, the quantiles should be saved. When \texttt{FALSE}, saved quantiles from a previous run will be loaded and used. \\
\texttt{apply.fits:} When \texttt{TRUE}, the results of the model fitting process should be used - i.e. the original data should be normalized. This parameter can be set fo \texttt{FALSE} when exploring the desired number of components. \\
\texttt{logged.data:} If \texttt{TRUE}, the \texttt{sigA} and \texttt{sigB} matrices, as well as the control probe data matrices (controlred, controlgrn) are assumed to have been previously log transformed prior to sending the data
into the algorithm. If \texttt{logged.data=FALSE}, then these data will be log transformed (log(1+x)) inside the algorithm. \\
\texttt{logged2.data:} If \texttt{TRUE}, the \texttt{sigA} and \texttt{sigB} matrices, as well as the control probe data matrices (controlred, controlgrn) are assumed to have been previously log2 transformed prior to sending the data
into the algorithm. If \texttt{logged2.data=FALSE}, then these data will be log2 transformed (log2(1+x)) inside the algorithm. \\

The following call performs cross-validation to assess the performance of the model fitting.
Note that here the \texttt{ncmp} parameter does not need to be specified.
Expand All @@ -117,7 +117,7 @@ Note that here the \texttt{ncmp} parameter does not need to be specified.
#of parameter ncmp for PLS regression:
funtoonormmout <- funtoonorm(sigA=sigAsample, sigB=sigBsample,
controlred=matred, controlgrn=matgrn, cp.type = cp.types, cell_type = cell_type,
ncv.fold=10, logged.data=FALSE, save.quant=TRUE,
ncv.fold=10, logged2.data=FALSE, save.quant=TRUE,
type.fits="PCR", apply.fits=FALSE, validate = TRUE)
Expand Down Expand Up @@ -151,7 +151,7 @@ After choosing a desired number of components, in order to run the program to no
# and PLR regresion
funtoonormmout <- funtoonorm(sigA=sigAsample, sigB=sigBsample,
controlred=matred, controlgrn=matgrn, cp.type = cp.types, cell_type = cell_type,
logged.data=FALSE, save.quant=FALSE, ncmp=4,
logged2.data=FALSE, save.quant=FALSE, ncmp=4,
type.fits="PLS", apply.fits=TRUE, validate = FALSE)
@
Expand Down Expand Up @@ -259,6 +259,7 @@ green matrices.
<<>>=
ctrlRed <- matRed[rownames(matRed) %in% ctrlInfo$Address,]
ctrlGrn <- matGrn[rownames(matGrn) %in% ctrlInfo$Address,]
cp.types<-ctrlInfo$Type[match(rownames(ctrlRed),ctrlInfo$Address)]
@

As the \texttt{minfi} vignette describes nicely, the 450k array
Expand All @@ -281,44 +282,22 @@ signal matrices to be the probe names whereas the \texttt{matRed} and
<<>>=
typeI_info <- getProbeInfo(manifest, type="I")
typeII_info <- getProbeInfo(manifest, type="II")
snpI_info <- getProbeInfo(manifest, type="SnpI")
snpII_info <- getProbeInfo(manifest, type="SnpII")
typeIARed <- typeI_info[typeI_info$Color=="Red",]
typeIAGrn <- typeI_info[typeI_info$Color=="Grn",]
probeID <-c(typeI_info$Name, typeII_info$Name, snpI_info$Name, snpII_info$Name)
address<- c(typeI_info$AddressA, typeII_info$AddressA, snpI_info$AddressA, snpII_info$AddressA)
sigAIRed <- matRed[rownames(matRed) %in% typeIARed$AddressA,]
rownames(sigAIRed) <- typeIARed$Name
sigAIGrn <- matGrn[rownames(matGrn) %in% typeIAGrn$AddressA,]
rownames(sigAIGrn) <- typeIAGrn$Name
sigAI <- rbind(sigAIRed, sigAIGrn)
@

The extraction of the signal A matrix for Type II probes is much simpler:
Extraction of the signal A matrix and signal B matrix:
<<>>=
sigAII <- matRed[rownames(matRed) %in% typeII_info$AddressA,]
rownames(sigAII) <- typeII_info$Name
@
Now these the Type I and Type II signal A matrices can be combined:
<<>>=
sigA <- rbind(sigAI, sigAII)
@
The following code block repeats the same steps for the unmethylated B
signal matrix:
<<>>=
typeIBRed <- typeI_info[typeI_info$Color=="Red",]
typeIBGrn <- typeI_info[typeI_info$Color=="Grn",]
sigBIRed <- matRed[rownames(matRed) %in% typeIBRed$AddressB,]
rownames(sigBIRed) <- typeIBRed$Name
sigBIGrn <- matGrn[rownames(matGrn) %in% typeIBGrn$AddressB,]
rownames(sigBIGrn) <- typeIBGrn$Name
sigBI <- rbind(sigBIRed, sigBIGrn)
sigBII <- matGrn[rownames(matGrn) %in% typeII_info$AddressA,]
rownames(sigBII) <- typeII_info$Name
sigB <- rbind(sigBI, sigBII)
sigA<-matRed[rownames(matRed) %in% address,]
rownames(sigA)=probeID[match(rownames(sigA),address)]
sigB<-matGrn[rownames(matGrn) %in% address,]
rownames(sigB)=probeID[match(rownames(sigB),address)]
@

\texttt{funtooNorm} requires a vector with the cell type of each of
Expand All @@ -340,7 +319,8 @@ After these conversion steps, we can run \texttt{funtooNorm()} on the
\texttt{RgsetEx} data:
<<>>=
ftRGsetExOut <- funtoonorm(sigA=sigA, sigB=sigB, controlred=ctrlRed,
controlgrn=ctrlGrn, cell_type=cellType,
controlgrn=ctrlGrn, logged2.data=FALSE,
cp.types=cp.types,cell_type=cellType,
Annot=annot)
@

Expand Down

0 comments on commit 394bcc6

Please sign in to comment.