-
Notifications
You must be signed in to change notification settings - Fork 23
/
princmp.r
67 lines (62 loc) · 2.31 KB
/
princmp.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
princmp <- function(formula, data=environment(formula),
method=c('regular', 'sparse'),
k=min(5, p), cor=TRUE, offset=0.8, col=1,
adj=0, orig=TRUE, pl=TRUE, sw=FALSE) {
method <- match.arg(method)
if(method == 'sparse')
if(! require(pcaPP))
stop('You must install the pcaPP package')
X <- model.matrix.lm(formula, data)
cat('Used', nrow(X), 'observations with no NAs out of', nrow(data), '\n')
X <- X[, -1, drop=FALSE] # remove intercept
p <- ncol(X)
g <- switch(method,
regular = princomp(X, cor=cor),
sparse = sPCAgrid(X, k=k, method='sd',
center=mean,
scale=if(cor) sd else function(x) 1.,
scores=TRUE, maxiter=10) )
co <- unclass(g$loadings)
prz <- function(x, m) {
x <- format(round(x, m), zero.print=FALSE)
print(x, quote=FALSE)
}
type <- switch(method, regular='', sparse='Sparse ')
cat('\n', type, 'PC Coefficients of Standardized Variables\n', sep='')
prz(co[, 1 : k], 3)
p <- ncol(co)
if(orig) {
sds <- g$scale
cat('\n', type, 'PC Coefficients of Original Variables\n', sep='')
co <- co / matrix(rep(sds, p), nrow=length(sds))
prz(co[, 1 : k], 5)
}
if(pl) {
plot(g, type='lines', main='', npcs=k)
vars <- g$sdev^2
cumv <- cumsum(vars) / sum(vars)
p <- length(cumv)
text(1:k, vars[1:k] + offset*par('cxy')[2],
as.character(round(cumv[1:k], 2)),
srt=45, adj=adj, cex=.65, xpd=NA, col=col)
}
if(sw) {
if(! require(leaps)) stop('You must install the leaps package')
for(j in 1 : k) {
cat('\nStepwise Approximations to PC', j, '\n', sep='')
fchar <- capture.output(
f <- regsubsets(X, g$scores[, j], method='forward',
nbest=1, nvmax=max(1, p - 1)))
s <- summary(f)
w <- s$which[, -1, drop=FALSE] # omit intercept
print(t(ifelse(w, '*', ' ')), quote=FALSE)
cat('R2:', round(s$rsq, 3), '\n')
}
}
## Use predict method with newdata to ensure that PCs NA when
## original variables NA
## See https://stackoverflow.com/questions/5616210
X <- model.matrix.lm(formula, data, na.action=na.pass)
pcs <- predict(g, newdata=X)[, 1 : k]
invisible(pcs)
}