diff --git a/demo/Chapitre3.R b/demo/Chapitre3.R index 10b9f50..2828d6e 100644 --- a/demo/Chapitre3.R +++ b/demo/Chapitre3.R @@ -8,7 +8,7 @@ #Chapitre 3 #Analyse en composantes principales -#page 141 +#page 131 #q1 d_macdo<-read.csv("https://tinyurl.com/y3qobgsd") @@ -18,39 +18,38 @@ str(d_macdo) #q3 summary(d_macdo) -#page 143 +#page 132 library(GGally) +ggparcoord(d_macdo, columns = 3:ncol(d_macdo), groupColumn=1, + scale = "std", boxplot = TRUE, alphaLines = 0.5)+facet_grid( + rows=vars(Category))+ theme(axis.text.x=element_text(angle=45, + hjust=1,vjust=1),legend.position="top") -ggparcoord(d_macdo, columns = 3:ncol(d_macdo), groupColumn=1, - scale = "std", boxplot = TRUE, alphaLines = 0.5)+facet_grid( - rows=vars(Category))+ theme(axis.text.x=element_text(angle=45, - hjust=1,vjust=1),legend.position="top") - -if(!require(ggiraphExtra)){install.packages("ggiraphExtra")} +if(!require("ggiraphExtra")){install.packages("ggiraphExtra")} library(ggiraphExtra) ggRadar(d_macdo[,-c(2,3)],aes(facet=Category)) -#page 145 +#page 134 #q4 cor(d_macdo[,-c(1,2,3)])[1:3,1:3] -#page 146 +#page 135 #q5 tmp_d_macdo <- d_macdo p_ <- GGally::print_if_interactive plotList <- list() -plotList[[1]] <- ggally_smooth_lm(tmp_d_macdo, mapping = - ggplot2::aes(x = Calories, y = Total.Fat)) -plotList[[2]] <- ggally_smooth_loess(tmp_d_macdo, mapping = - ggplot2::aes(x = Calories, y = Total.Fat)) -pm <- ggmatrix(plotList, 1, 2, c("Ajustement lin\u00e9aire", - "R\u00e9gression polynomiale locale"), byrow = TRUE) +plotList[[1]] <- ggally_smooth_lm(tmp_d_macdo, mapping = + ggplot2::aes(x = Calories, y = Total.Fat)) +plotList[[2]] <- ggally_smooth_loess(tmp_d_macdo, mapping = + ggplot2::aes(x = Calories, y = Total.Fat)) +pm <- ggmatrix(plotList, 1, 2, c("Ajustement lin\u00e9aire", + "R\u00e9gression polynomiale locale"), byrow = TRUE) p_(pm) ggduo(tmp_d_macdo, c("Calories"), c("Total.Fat"), types = list(continuous = "smooth_loess")) -#page 147 +#page 136 #Pour sauvegarder le graphique enlever les commentaires des quatres lignes ci-dessous # pdf("ggduo.pdf") # ggduo(tmp_d_macdo, c("Calories"), c("Total.Fat"), @@ -59,15 +58,15 @@ ggduo(tmp_d_macdo, c("Calories"), c("Total.Fat"), #q6 library(MVN) -result = mvn(data = d_macdo[,c("Calories","Total.Fat")], - mvnTest = "mardia", - univariateTest = "SW", univariatePlot = "histogram", - multivariatePlot = "qq", - multivariateOutlierMethod = "adj", - showOutliers = TRUE, showNewData = TRUE) +result = mvn(data = d_macdo[,c("Calories","Total.Fat")], + mvnTest = "mardia", + univariateTest = "SW", univariatePlot = "histogram", + multivariatePlot = "qq", + multivariateOutlierMethod = "adj", + showOutliers = TRUE, showNewData = TRUE) result$multivariateNormality -#page 148 +#page 137 d_macdo[83,c("Item","Calories","Total.Fat")] sort(d_macdo$Calories,decreasing = TRUE)[1:5] @@ -79,7 +78,7 @@ stem(d_macdo$Total.Fat) d_macdo$Item[as.numeric(rownames(result$multivariateOutliers))] -#page 150 +#page 139 library(GGally) tmp_d_macdo <- d_macdo tmp_d_macdo$indic_outlier <- as.factor(1:nrow(d_macdo) @@ -89,8 +88,8 @@ colnumbers <- which(colnames(tmp_d_macdo) %in% ggscatmat(tmp_d_macdo, columns = colnumbers, color= "indic_outlier") -#page 151 -d_macdo1 <- d_macdo[-(83),] +#page 140 +d_macdo1 <- d_macdo[-83,] dim(d_macdo1) result1 = mvn(data = d_macdo1[,c("Calories","Total.Fat")], @@ -105,7 +104,7 @@ result1$multivariateNormality library(pspearman) spearman.test(d_macdo[,"Calories"],d_macdo[,"Total.Fat"]) -#page 152 +#page 141 library(coin) set.seed(1133) spearman_test(d_macdo[,"Calories"]~d_macdo[,"Total.Fat"], @@ -114,7 +113,7 @@ spearman_test(d_macdo[,"Calories"]~d_macdo[,"Total.Fat"], cor.test(d_macdo[,"Calories"],d_macdo[,"Total.Fat"], method = "kendall") -#page 153 +#page 142 #q8 library(jmuOutlier) set.seed(1133) @@ -127,21 +126,21 @@ r_c_mdo <- perm.cor.mtest(d_macdo[,c("Calories","Total.Fat", "Cholesterol","Sodium","Sugars","Protein")], num.sim = 50000) lapply(r_c_mdo, round, 4) -#page 154 +#page 143 r_c_mdo$p < .05/(6*5/2) source("http://www.sthda.com/upload/rquery_cormat.r") rquery.cormat(d_macdo[,c("Calories","Total.Fat", "Cholesterol","Sodium","Sugars","Protein")])$r -#page 155 +#page 144 #q10 library(rgl) #q11 plot3d(d_macdo$Calories,d_macdo$Total.Fat, d_macdo$Cholesterol,type="s") -#page 156 +#page 145 #q12 list <- c("Calories", "Total.Fat", "Cholesterol") d_macdo.cr <- scale(d_macdo[, list]) @@ -149,17 +148,15 @@ lims <- c(min(d_macdo.cr),max(d_macdo.cr)) plot3d(d_macdo.cr, type = "s", xlim = lims, ylim = lims,zlim = lims) -#page 157 +#page 146 #q13 d_macdo.cr_df <- as.data.frame(d_macdo.cr) - plot3d(d_macdo.cr, type = "s", xlim = lims, ylim = lims, zlim = lims) - plot3d(ellipse3d(cor(cbind(d_macdo.cr_df$Calories, d_macdo.cr_df$Total.Fat,d_macdo.cr_df$Cholesterol))), col="grey",add=TRUE) -#page 159 +#page 148 #q 16 if(!require("ade4")){install.packages("ade4")} library(ade4) @@ -174,18 +171,21 @@ macdo.acp$cw head(macdo.acp$lw) +#page 149 #q18 round(macdo.acp$eig,3) sum(macdo.acp$eig) -#page 161 +#page 150 #q19 round(pve <- 100*macdo.acp$eig/sum(macdo.acp$eig),3) + round(cumsum(pve),2) + round(cumsum(pve),2)[3:4] -#page 162 +#page 151 #q20 screeplot(macdo.acp) if(!require("factoextra")){install.packages("factoextra")} @@ -194,35 +194,35 @@ fviz_eig(macdo.acp) inertia.dudi(macdo.acp) -#page 163 +#page 152 #q21 s.corcircle(macdo.acp$co, xax=1, yax=2) #q22 round(inertia.dudi(macdo.acp, col.inertia = TRUE)$col.abs,4) -#page 164 +#page 153 round(inertia.dudi(macdo.acp, col.inertia = TRUE)$col.rel, 4) -#page 165 +#page 154 round(macdo.acp$co,4) -#page 166 +#page 155 #q25 round(get_pca_var(macdo.acp)$cos2, 4) #q26 round(sort(rowSums(get_pca_var(macdo.acp)$cos2[,1:2])), 4) -#page 167 +#page 156 fviz_pca_var(macdo.acp, col.var="contrib", gradient.cols= c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE) -#page 168 +#page 157 fviz_pca_var(macdo.acp, col.var = "cos2", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE) -#page 169 +#page 158 fviz_pca_var(macdo.acp, axes = c(1, 3), col.var="contrib", gradient.cols=c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE) fviz_pca_var(macdo.acp, axes = c(1, 3), col.var = "cos2", @@ -231,14 +231,14 @@ fviz_pca_var(macdo.acp, axes = c(1, 3), col.var = "cos2", #q27 s.label(macdo.acp$li, xax = 1, yax = 2) -#page 170 +#page 159 s.label(macdo.acp$li, label=as.character(d_macdo$Item), clabel=0.5) library(yarrr) cont_ind <- get_pca_ind(macdo.acp)$contrib pirateplot(data=cont_ind,point.o=.75, ylab="Contribution",xlab="") -#page 171 +#page 160 rownames(cont_ind) <- tmp_d_macdo$Item round(sort(cont_ind[,1],decreasing=TRUE)[1:20], 4) @@ -248,7 +248,7 @@ round(sort(cont_ind[,3],decreasing=TRUE)[1:20], 4) names(sort(cont_ind[cont_ind[,1]>100/260*5,1],decreasing=TRUE)) -#page 172 +#page 161 names(sort(cont_ind[cont_ind[,2]>100/260*5,2],decreasing=TRUE)) names(sort(cont_ind[cont_ind[,3]>100/260*5,3],decreasing=TRUE)) @@ -259,7 +259,7 @@ fviz_pca_ind(macdo.acp, geom = c("point"), col.ind = fviz_pca_ind(macdo.acp, geom = c("point"), col.ind = "cos2", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07")) -#page 174 +#page 163 fviz_pca_ind(macdo.acp, geom = c("point"), alpha.ind = "contrib") fviz_pca_ind(macdo.acp, geom = c("point"), alpha.ind = "cos2") @@ -271,7 +271,7 @@ gcol <- hcl.colors(9, palette = "Dynamic") s.class(dfxy = macdo.acp$li, fac = d_macdo$Category, col = gcol, xax = 1, yax = 2) -#page 176 +#page 165 #q30 fviz_pca_biplot(macdo.acp, col.ind ="contrib", col.var ="contrib") fviz_pca_biplot(macdo.acp, col.ind = "cos2", col.var = "cos2") @@ -285,7 +285,7 @@ tmp_macdo$tab=d_macdo[, list] macdo.acp.dudi <- dudi.pca(tmp_macdo$tab, center = TRUE, scale = FALSE, scan = FALSE, nf = 3) -#page 177 +#page 166 macdo.acp.dudi1 <- dudi.pca(tmp_macdo$tab, center = TRUE, scale = TRUE, scan = FALSE, nf = 3) g1 <- s.class(macdo.acp.dudi$li, tmp_macdo$Category, plot = FALSE) @@ -297,24 +297,24 @@ g4 <- s.corcircle(macdo.acp.dudi1$co, lab = names(macdo.acp$tab), G1 <- rbindADEg(cbindADEg(g1, g2, plot = FALSE), cbindADEg(g3, g4, plot = FALSE), plot = TRUE) -#page 178 +#page 167 head(sort(apply(d_macdo[,-(1:3)],2,var),decreasing=TRUE)) #Analyse factorielle des correspondances -#page 178 +#page 167 d_vac<-read.csv2("https://tinyurl.com/y3emuylu", row.names = 1) rownames(d_vac) -#page 179 +#page 168 str(d_vac) barplot(t(d_vac),beside=TRUE, col = hcl.colors(8, palette = "Dynamic"), legend.text = colnames(d_vac), args.legend = list(bg = "white", x = "topleft")) -#page 180 +#page 169 if(!require(ggpubr)){install.packages("ggpubr")} library(ggpubr) @@ -326,7 +326,7 @@ ggballoonplot(d_vac, fill = "value") + scale_fill_gradientn( library(ade4) table.cont(d_vac) -#page 181 +#page 170 sum(d_vac) khi.test.d_vac <- chisq.test(d_vac) @@ -334,7 +334,7 @@ round(khi.test.d_vac$expected, 2) khi.test.d_vac -#page 182 +#page 171 set.seed(1133) chisq.test(d_vac, simulate.p.value = TRUE, B=50000) @@ -344,7 +344,7 @@ mosaicplot(d_vac, type = "pearson", shade = TRUE, las = 2, mosaicplot(t(d_vac), type = "pearson", shade = TRUE, las = 2, main = "Associations et r\u00e9sidus du test du chi2") -#page 183 +#page 172 if(!require("vcd")){install.packages("vcd")} library(vcd) @@ -354,17 +354,17 @@ main="Associations et r\u00e9sidus du test du test du chi2", labeling_args=list(abbreviate=c(A=TRUE))) -#page 184 +#page 173 if(!require(FactoMineR)){install.packages("FactoMineR")} library(FactoMineR) (res.ca.d_vac<-CA(d_vac, ncp=4, graph=FALSE)) -#page 185 +#page 174 library(factoextra) round(eig.val <- get_eigenvalue(res.ca.d_vac), 3) -#page 186 +#page 175 as.numeric(colSums(eig.val)[1]) sqrt(as.numeric(khi.test.d_vac$statistic)/sum(d_vac)/ @@ -380,7 +380,7 @@ CramerV(d_vac) Phi(d_vac) -#page 187 +#page 176 CramerV(d_vac, conf.level = .95) d_vac.tab <- as.table(as.matrix(d_vac)) @@ -398,7 +398,7 @@ lattice::bwplot(v) quantile(v, probs=c(0.025,0.975)) -#page 189 +#page 178 set.seed(1133) idx.perm <- replicate(n,sample(nrow(d_vac.frm), replace=FALSE)) v.perm <- apply(idx.perm, 2, function(x) CramerV(d_vac.frm[,1], @@ -410,15 +410,16 @@ mean(v.perm>=CramerV(d_vac)) library(factoextra) fviz_eig(res.ca.d_vac) -#page 190 +#page 179 mean(res.ca.d_vac$eig[,1]) res.ca.d_vac$eig[,2]>1/(ncol(d_vac)-1)*100 fviz_ca_row(res.ca.d_vac) fviz_ca_col(res.ca.d_vac) + fviz_ca_biplot(res.ca.d_vac) -#page 191 +#page 180 fviz_ca_row(res.ca.d_vac) fviz_ca_row(res.ca.d_vac, col.row="contrib") fviz_ca_row(res.ca.d_vac, col.row="cos2") @@ -433,7 +434,7 @@ fviz_ca_biplot(res.ca.d_vac) fviz_ca_biplot(res.ca.d_vac, col.row="contrib", col.col="contrib") fviz_ca_biplot(res.ca.d_vac, col.row="cos2", col.col="cos2") -#page 192 +#page 181 rowpr <- fviz_ca_biplot(res.ca.d_vac, map="rowprincipal", arrow = c(TRUE, TRUE), repel=TRUE) colpr <- fviz_ca_biplot(res.ca.d_vac, map="colprincipal", arrow = @@ -443,18 +444,18 @@ ggmatrix(list(rowpr, colpr),1,2) #Analyse non symetrique des correspondances -#page 193 +#page 182 d_TM<-read.csv2("https://tinyurl.com/y55e3k9y", row.names = 1) rownames(d_TM) str(d_TM) -#page 194 +#page 183 barplot(t(d_TM), beside=TRUE, names=d_TM$Task, col = c("red", "green","blue","purple"), legend.text = colnames(d_TM), args.legend = list(bg = "white", x = "top")) -#page 195 +#page 184 if(!require(ggpubr)){install.packages("ggpubr")} library(ggpubr) my_cols <- c("#0D0887FF", "#6A00A8FF", "#B12A90FF", @@ -467,14 +468,14 @@ table.cont(d_TM) sum(d_TM) -#page 196 +#page 185 if(!require(DescTools)){install.packages("DescTools")} library(DescTools) Lambda(d_TM) Lambda(d_TM, conf.level=0.95) -#page 197 +#page 186 Lambda(d_TM, direction="row", conf.level=0.95) Lambda(d_TM, direction="column", conf.level=0.95) @@ -497,14 +498,14 @@ lattice::bwplot(lR.d_TM) lattice::bwplot(lC.d_TM) lattice::bwplot(l.d_TM) -#page 198 +#page 187 quantile(lR.d_TM, probs=c(0.025,0.975)) quantile(lC.d_TM, probs=c(0.025,0.975)) quantile(l.d_TM, probs=c(0.025,0.975)) -#page 198-199 +#page 187-188 n <- 10000; set.seed(1133) idx.perm <- replicate(n,sample(nrow(d_TM.frm), replace=FALSE)) lR.perm.d_TM <- apply(idx.perm, 2, function(x) Lambda(d_TM.frm[,1], @@ -525,26 +526,26 @@ mean(l.perm.d_TM>=Lambda(d_TM)) GoodmanKruskalTau(d_TM.tab, direction="column", conf.level=0.95) -#page 200 +#page 189 GoodmanKruskalTau(d_TM.tab, direction="row", conf.level=0.95) library(ade4) (res.nsc.d_TM <- dudi.nsc(d_TM, scan = FALSE)) -#page 201 +#page 190 library(adegraphics) g1 <- s.label(res.nsc.d_TM$c1, plab.cex = 1.25) g2 <- s.arrow(res.nsc.d_TM$li, add = TRUE, plab.cex = 0.75) #Analyse des correspondances multiples -#page 202 +#page 191 poke<-read.csv("https://tinyurl.com/y4y6a86m",na.strings= c("","NA")) poke<-as.data.frame(poke) poke$Generation<-as.factor(poke$Generation) summary(poke) poke.x<-poke[,c(3,12,13)] -#page +#page 192 library(ade4); library(adegraphics) res.acm.poke<-dudi.acm(poke.x,scannf=FALSE) @@ -555,16 +556,16 @@ fviz_screeplot(res.acm.poke) get_eig(res.acm.poke) -#page 204 +#page 193 res.acm.poke$cr -#page 205 +#page 194 score(res.acm.poke, xax=1) score(res.acm.poke, xax=1, type = "boxplot") boxplot(res.acm.poke) ade4::s.corcircle(res.acm.poke$co, clabel = 0.7) -#page 206 +#page 195 library(devtools) if(!require(JLutils)){install_github("larmarange/JLutils")} library(JLutils) @@ -575,7 +576,7 @@ fviz_mca_biplot(res.acm.poke) scatter(res.acm.poke) #Analyse factorielle des donnees mixtes -#page 208 +#page 197 if(!require("PCAmixdata")){install.packages("PCAmixdata")} library(PCAmixdata) @@ -583,21 +584,21 @@ round(cor(poke[,c(7,8,9,10,11)]),2) mix.poke<-PCAmix(subset(poke,select=7:11),subset(poke,select=3)) -#page 209 +#page 198 round(mix.poke$eig, 2) round(mix.poke$categ.coord, 2) #Classification ascendante hierarchique et methode des K-moyennes -#page 211 +#page 200 data_event <- read.csv("https://tinyurl.com/y2k7mwbr") head(data_event) -#page 212 +#page 201 list_col <- c("sort_order","time","event_team","fthg","odd_h") data_event.x <- data_event[,list_col] -#page 213 +#page 202 data_event.c<-aggregate(.~event_team,data=data_event.x,FUN=mean) str(data_event.c) @@ -608,7 +609,7 @@ head(data_event.c) library(GGally) ggpairs(data_event.c) -#page 215 +#page 204 d.data_event <- dist(data_event.c) cah.ward <- hclust(d.data_event, method="ward.D2") plot(cah.ward, xlab="\u00e9quipe de football", ylab="", @@ -617,40 +618,40 @@ if(!require(ggdendro)){install.packages("ggdendro")} library(ggdendro) ggdendrogram(cah.ward, rotate = FALSE, size = 2) -#page 216 +#page 205 library(JLutils) best.cutree(cah.ward, min = 3, graph = TRUE, xlab = "Nombre de classes", ylab = "Inertie relative") -#page 217 +#page 206 (groupes.cah <- cutree(cah.ward,k=5)) table(groupes.cah) plot(cah.ward, xlab="\u00e9quipe de football", ylab="", main="Dendrogramme", sub="", axes=TRUE, cex=0.5) rect.hclust(cah.ward, 5) -#page 218 +#page 207 library(factoextra) hc.cut <- hcut(d.data_event, k = 5, hc_method = "complete") fviz_dend(hc.cut, show_labels = TRUE, rect = TRUE) fviz_cluster(hc.cut, ellipse.type = "convex",data=d.data_event) -#page 220 +#page 209 if(!require("vegan")){install.packages("vegan")} library(vegan) k.event.cal <- cascadeKM(data_event.c, 3, 10, iter = 100, criterion = "calinski") plot(k.event.cal) -#page 221 +#page 210 set.seed(1133) groupes.kmeans <- kmeans(data_event.c,centers=5,nstart=1000) print(groupes.kmeans) -#page 222 +#page 211 print(table(groupes.cah,groupes.kmeans=groupes.kmeans$cluster)) -#page 223 +#page 212 library(FactoMineR) res.pca <- PCA(data_event.c, ncp = 3, graph = FALSE) get_eig(res.pca) @@ -660,13 +661,13 @@ res.hcpc <- HCPC(res.pca, graph = FALSE) fviz_dend(res.hcpc, cex = 0.7, palette = "jco", rect = TRUE, rect_fill = TRUE, rect_border = "jco", labels_track_height = 0.8) -#page 224 +#page 213 fviz_cluster(res.hcpc, repel = TRUE, show.clust.cent = TRUE, palette = "jco", ggtheme = theme_minimal(), main = "Factor map") plot(res.hcpc, choice = "3D.map") -#page 225 +#page 214 #Exercice 3.1 read.csv("https://tinyurl.com/y3rxbxoo") @@ -674,7 +675,7 @@ read.csv("https://tinyurl.com/y3rxbxoo") read.csv("https://tinyurl.com/yyoowvkl") read.csv("https://tinyurl.com/yyolq665") -#page 226 +#page 215 #Exercice 3.4 read.csv("https://tinyurl.com/y5gffvsb") diff --git a/demo/Chapitre4.R b/demo/Chapitre4.R index b4828fd..e55cefd 100644 --- a/demo/Chapitre4.R +++ b/demo/Chapitre4.R @@ -7,14 +7,14 @@ #Chapitre 4 -#page 266 +#page 248 if(!require("BioStatR")){install.packages("BioStatR")} library(BioStatR) str(Mesures) summary(Mesures) -#page 267 +#page 249 if(!require("ISLR")){install.packages("ISLR")} library(ISLR) summary(Hitters[,17:20]) @@ -25,7 +25,7 @@ summary(model) Mes.B$masse -#page 268 +#page 250 round(fitted(model), 2) round(residuals(model), 2) @@ -34,7 +34,7 @@ if(!require("broom")){install.packages("broom")} library(broom) (model.diag.metrics<-augment(model)) -#page 270 +#page 252 tidy(model) glance(model) @@ -49,7 +49,7 @@ with(Mes.B,segments(taille, masse, taille, fitted(model), lty=2, legend("topleft",lty=c(1,2,4),legend = c("lm","lowess", "smooth.spline"),lwd=2,col=c("red","blue","orange")) -#page 271 +#page 253 if(!require("ggiraphExtra")){install.packages("ggiraphExtra")} library(ggiraphExtra) ggPredict(model) @@ -63,12 +63,12 @@ abline(h=0) with(Mes.B,lines(lowess(fitted(model),masse-fitted(model)),col= "red",lwd=2)) -#page 272 +#page 254 library(MASS) model.r <- lqs(masse ~ taille, data=Mes.B) summary(model.r) -#page 273 +#page 255 coefficients(model.r) model.r$bestone @@ -77,17 +77,22 @@ coef(lm(masse ~ taille, data=Mes.B[model.r$bestone,])) with(Mes.B,1-sum(residuals(model.r)^2)/sum((masse-mean(masse))^2)) -#page 274 +#page 256 with(Mes.B,1-sum(residuals(model)^2)/sum((masse-mean(masse))^2)) plot(masse ~ taille, data=Mes.B, xlab="Taille", ylab="Masse") abline(model.r, lty=1) abline(model, lty=2) -legend("topleft", legend=c("Robuste","Moindres carr\u00e9s"),lty=1:2) +with(Mes.B,points(taille[model.r$bestone],masse[model.r$bestone], + pch=19,col="red")) +abline(lm(masse ~ taille, data=Mes.B[model.r$bestone,]), lty=3) +legend("topleft", legend=c("R\u00e9sistante : moindres carr\u00e9s tronqu\u00e9s", + "Moindres carr\u00e9s ordinaires", + "Droite ajust\u00e9e entre les deux observations"), lty=1:3) shapiro.test(residuals(model.r)) -#page 275 +#page 257 model2<-lm(masse~taille+I(taille^2),data=Mes.B) summary(model2) @@ -103,10 +108,10 @@ with(Mes.B,segments(taille,masse,taille,fitted(model2),lty=2, legend("topleft",legend=c("Moindres carr\u00e9s ordinaires", "Ajustement local"),lty=c(1,3),lwd=2,col=c("red","blue")) -#page 276 +#page 258 shapiro.test(residuals(model2)) -#page 277 +#page 259 confint(model2) anova(model2) @@ -116,12 +121,12 @@ my.confidence.region(model2, which=1) my.confidence.region(model2, which=2) my.confidence.region(model2, which=3) -#page 278 +#page 260 set.seed(314) model2.r<-lqs(masse~taille+I(taille^2),data=Mes.B) rbind(coefficients(model2),coefficients(model2.r)) -#page 279 +#page 261 shapiro.test(residuals(model2.r)) plot(masse~taille,data=Mes.B,xlab="Taille",ylab="Masse",pch=20) @@ -135,7 +140,7 @@ with(Mes.B,segments(taille,masse,taille,fitted(model2.r),lty=2, legend("topleft",legend=c("R\u00e9sistante : moindres carr\u00e9s trim\u00e9s", "Ajustement local"),lty=c(2,3),lwd=2,col=c("green","blue")) -#page 279-280 +#page 261 plot(masse~taille,data=Mes.B,xlab="Taille",ylab="Masse",pch=20) with(Mes.B,lines(lowess(taille,masse),lty=3,lwd=2,col="blue")) with(Mes.B,points(sort(taille),fitted(model2)[order(taille)], @@ -150,18 +155,18 @@ legend("topleft",legend=c("Moindres carr\u00e9s ordinaires", "R\u00e9sistante : moindres carr\u00e9s trim\u00e9s","Ajustement local"),lty=1:3 ,lwd=2,col=c("red","green","blue")) -#page 280 +#page 262 data("mtcars") View(mtcars) help(mtcars) -#page 281 +#page 263 head(mtcars,n=10) ?mtcars str(mtcars) -#page 282 +#page 264 mtcars2 <- within(mtcars, { vs <- factor(vs, labels = c("V", "S")) am <- factor(am, labels = c("automatic", "manual")) @@ -173,7 +178,7 @@ summary(mtcars2) subsetmtcars<-mtcars[,c(1,3,4,5,6,7)] -#page 283 +#page 265 library(MVN) set.seed(1133) result1 = mvn(data = subsetmtcars, @@ -186,14 +191,14 @@ result1$multivariateNormality result1$multivariateOutliers -#page 284 +#page 266 library(corrplot); set.seed(1133) permmtcars <- perm.cor.mtest(subsetmtcars) permmtcars$p<.05/choose(ncol(subsetmtcars),2) corrplot(permmtcars$cor,p.mat=permmtcars$p,pch.col="white",insig= "label_sig",sig.level=.05/choose(ncol(subsetmtcars),2)) -#page 285 +#page 267 fit.mtcars<-lm(mpg~.,data=subsetmtcars) fit.mtcars @@ -207,14 +212,16 @@ shapiro.test(residuals(fit2.mtcars)) covratio(fit2.mtcars) dffits(fit2.mtcars) dfbetas(fit2.mtcars) -car::vif(fit2.mtcars) -perturb::colldiag(fit2.mtcars) +library(car) +vif(fit2.mtcars) -#page 286 +#page 268 +library(perturb) +colldiag(fit2.mtcars) plot(fit2.mtcars) influence.measures(fit2.mtcars) -car::influencePlot(fit2.mtcars) -car::influenceIndexPlot(fit2.mtcars) +influencePlot(fit2.mtcars) +influenceIndexPlot(fit2.mtcars) summary(fit2.mtcars) anova(fit2.mtcars) @@ -226,7 +233,7 @@ if(!require("mice")){install.packages("mice")} library(mice) md.pattern(Hitters) -#page 260 +#page 269 md.pairs(Hitters) library(dplyr) @@ -240,7 +247,7 @@ dependent = "Salary" Hitters %>% missing_pattern(dependent, explanatory) -#page 288 +#page 270 if(!require("naniar")){install.packages("naniar")} library(naniar);library(ggplot2) Hitters %>% @@ -249,16 +256,16 @@ Hitters %>% fill = Salary_NA)) + geom_density(alpha = 0.5) -#page 289 +#page 271 gg_miss_var(Hitters) try(gg_miss_upset(Hitters)) -#page 290 +#page 272 #Exercice 4.1 data(anscombe) str(anscombe) -#page 291 +#page 273 #Exercice 4.2 Hitters = na.omit(Hitters) @@ -276,7 +283,7 @@ points(10, reg.summary$cp[10], pch = 20, col = "red") plot(regfit19.full, scale = "Cp") coef(regfit19.full, 10) -#page 292 +#page 274 #q5 regfit.fwd = regsubsets(Salary ~ ., data = Hitters, nvmax = 19, method = "forward") @@ -298,7 +305,7 @@ for (i in 1:length(summary(regfit.fwd)$rss)) { val.errors = c(val.errors,mean((Hitters$Salary[-train]-pred)^2)) } -#page 293 +#page 275 #q6 (suite) mod.ordre=as.numeric(unlist(sapply(table(n.vars), function(x){1:x}))) @@ -307,7 +314,7 @@ plot(n.vars,sqrt(val.errors), ylab ="Root MSE",pch =as.character( lines(n.vars[mod.ordre==1],sqrt(val.errors)[mod.ordre==1], lwd=2,lty=2) -#page 294 +#page 276 #q7 predict.regsubsets = function(object, newdata, id, ...) { form = as.formula(object$call[[2]]) @@ -325,27 +332,27 @@ plot(lasso_model_cv) n_best_m=which(lasso_model_cv$lambda==lasso_model_cv$lambda.min) lasso_model_cv$glmnet.fit$beta[,n_best_m] -#page 295 +#page 277 #Exercice 4.3 #q1 CancerSein <- read.csv("https://tinyurl.com/y3l6sh59") -#page 297 +#page 279 #Exercice 4.4 #q1 SidaChat <- read.csv("https://tinyurl.com/yxe6yxem") -#page 300 +#page 282 #Exercice 4.5 #q1 Vitamines <- read.csv("https://tinyurl.com/y3shxcsd") -#page 301 +#page 283 #Exercice 4.6 #q1 Beton <- read.csv("https://tinyurl.com/y4w2qv9t") -#page 302 +#page 284 #Exercice 4.7 #q1 chal <- read.csv("https://tinyurl.com/yyb3cztf") @@ -353,7 +360,7 @@ chal <- read.csv("https://tinyurl.com/yyb3cztf") cdplot(as.factor(Defaillance)~Temperature, data=chal, ylab="Defaillance") -#page 303 +#page 285 #q3 chal.glm <- glm(Defaillance~Temperature,data=chal, family="binomial") @@ -361,7 +368,7 @@ chal.glm <- glm(Defaillance~Temperature,data=chal, if(!require(hnp)){install.packages("hnp")} hnp(chal.glm, sim = 99, conf = 0.95) -#page 304 +#page 286 #q6 if(!require(rms)){install.packages("rms")} library(rms) @@ -369,12 +376,11 @@ chal.lrm <- lrm(Defaillance~Temperature, data=chal, x=TRUE, y=TRUE) print(chal.lrm) residuals(chal.lrm, "gof") -#page 305 #Exercice 4.8 #q1 Cypermethrine <- read.csv("https://tinyurl.com/y4deakfd") -#page 306 +#page 288 #Exercice 4.9 #q1 poly <- read.csv("https://tinyurl.com/yyhhcw37") @@ -397,7 +403,7 @@ hnp(poly_glm3, sim = 99, conf = 0.95) summary(poly_glm3) confint(poly_glm3) -#page 307 +#page 289 with(poly,plot(nombre~age,type="n",ylab="Nombre de polypes", xlab="\u00c2ge")) with(poly,points(age[traitement=="placebo"],fitted(poly_glm2)[ diff --git a/demo/SolChapitre3.R b/demo/SolChapitre3.R index d7d4c5c..6cf87d0 100644 --- a/demo/SolChapitre3.R +++ b/demo/SolChapitre3.R @@ -7,7 +7,7 @@ #Chapitre 3 : solution des exercices -#page 227 +#Complement en ligne #Exercice 3.1 d_hotels <- read.csv("https://tinyurl.com/y3rxbxoo") head(d_hotels) @@ -23,13 +23,11 @@ head(d_pres2002[,1:3]) mosaicplot(d_pres2002, type = "pearson", shade = TRUE, las = 2, main = "Associations et r\u00e9sidus du test du chi2") -#page 228 d_pres2007 <- read.csv("https://tinyurl.com/yyolq665",row.names=1) head(d_pres2007[,1:8]) mosaicplot(d_pres2007, type = "pearson", shade = TRUE, las = 2, main = "Associations et r\u00e9sidus du test du chi2") -#page 229 #q1 data(UCBAdmissions) str(UCBAdmissions) @@ -37,7 +35,6 @@ mosaicplot(UCBAdmissions) library(vcd) assoc(UCBAdmissions) -#page 230 #q2 library(FactoMineR) try(MCA(UCBAdmissions)) @@ -50,10 +47,8 @@ head(UCBA.df) str(UCBA.df) -#page 231 MCA(UCBA.df, graph=FALSE) -#page 232 #Exercice 3.4 d_wow <- read.csv("https://tinyurl.com/y5gffvsb", row.names =1) head(d_wow[,1:3]) @@ -62,7 +57,6 @@ wow.cah.ward <- hclust(d.d_wow, method="ward.D2") library(ggdendro) ggdendrogram(wow.cah.ward, labels = FALSE) -#page 233 #Exercice 3.5 d_hotels <- read.csv("https://tinyurl.com/y3rxbxoo", row.names=1) head(d_hotels) diff --git a/demo/SolChapitre4.R b/demo/SolChapitre4.R index a91bba0..e332153 100644 --- a/demo/SolChapitre4.R +++ b/demo/SolChapitre4.R @@ -7,7 +7,7 @@ #Chapitre 4 : solution des exercices -#page 308 +#Complement en ligne #Exercice 4.4 #q1 CancerSein <- read.csv("https://tinyurl.com/y3l6sh59") @@ -44,7 +44,6 @@ plot(Defaillance~Temperature,data=chal) cdplot(as.factor(Defaillance)~Temperature, data=chal, ylab="Defaillance") -#page 309 #q3 chal.glm<-glm(Defaillance~Temperature,data=chal,family="binomial") if(!require("hnp")){install.packages("hnp")} @@ -80,7 +79,6 @@ read.csv("https://tinyurl.com/y4w2qv9t") #q1 read.csv("https://tinyurl.com/y4deakfd") -#page 310 #Exercice 4.10 #q1 poly <- read.csv("https://tinyurl.com/yyhhcw37") diff --git a/docs/articles/Chapitre3.html b/docs/articles/Chapitre3.html index e171001..9c99968 100644 --- a/docs/articles/Chapitre3.html +++ b/docs/articles/Chapitre3.html @@ -377,7 +377,7 @@
#Chapitre 3
#Analyse en composantes principales
-#page 141
+#page 131
#q1
d_macdo<-read.csv("https://tinyurl.com/y3qobgsd")
@@ -490,46 +490,46 @@ 08 septembre, 2019
## 3rd Qu.:30.00 3rd Qu.:15.000
## Max. :70.00 Max. :40.000
##
-#page 143
+#page 132
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
-ggparcoord(d_macdo, columns = 3:ncol(d_macdo), groupColumn=1,
- scale = "std", boxplot = TRUE, alphaLines = 0.5)+facet_grid(
- rows=vars(Category))+ theme(axis.text.x=element_text(angle=45,
- hjust=1,vjust=1),legend.position="top")
+ggparcoord(d_macdo, columns = 3:ncol(d_macdo), groupColumn=1,
+ scale = "std", boxplot = TRUE, alphaLines = 0.5)+facet_grid(
+ rows=vars(Category))+ theme(axis.text.x=element_text(angle=45,
+ hjust=1,vjust=1),legend.position="top")
-if(!require(ggiraphExtra)){install.packages("ggiraphExtra")}
+if(!require("ggiraphExtra")){install.packages("ggiraphExtra")}
## Loading required package: ggiraphExtra
library(ggiraphExtra)
ggRadar(d_macdo[,-c(2,3)],aes(facet=Category))
-#page 145
+#page 134
#q4
cor(d_macdo[,-c(1,2,3)])[1:3,1:3]
## Calories Calories.from.Fat Total.Fat
## Calories 1.0000000 0.9045878 0.9044092
## Calories.from.Fat 0.9045878 1.0000000 0.9996635
## Total.Fat 0.9044092 0.9996635 1.0000000
-#page 146
+#page 135
#q5
tmp_d_macdo <- d_macdo
p_ <- GGally::print_if_interactive
plotList <- list()
-plotList[[1]] <- ggally_smooth_lm(tmp_d_macdo, mapping =
- ggplot2::aes(x = Calories, y = Total.Fat))
-plotList[[2]] <- ggally_smooth_loess(tmp_d_macdo, mapping =
- ggplot2::aes(x = Calories, y = Total.Fat))
-pm <- ggmatrix(plotList, 1, 2, c("Ajustement lin\u00e9aire",
- "R\u00e9gression polynomiale locale"), byrow = TRUE)
+plotList[[1]] <- ggally_smooth_lm(tmp_d_macdo, mapping =
+ ggplot2::aes(x = Calories, y = Total.Fat))
+plotList[[2]] <- ggally_smooth_loess(tmp_d_macdo, mapping =
+ ggplot2::aes(x = Calories, y = Total.Fat))
+pm <- ggmatrix(plotList, 1, 2, c("Ajustement lin\u00e9aire",
+ "R\u00e9gression polynomiale locale"), byrow = TRUE)
p_(pm)
ggduo(tmp_d_macdo, c("Calories"), c("Total.Fat"),
types = list(continuous = "smooth_loess"))
-#page 147
+#page 136
#Pour sauvegarder le graphique enlever les commentaires des quatres lignes ci-dessous
# pdf("ggduo.pdf")
# ggduo(tmp_d_macdo, c("Calories"), c("Total.Fat"),
@@ -539,19 +539,19 @@ 08 septembre, 2019
#q6
library(MVN)
## sROC 0.1-2 loaded
-result = mvn(data = d_macdo[,c("Calories","Total.Fat")],
- mvnTest = "mardia",
- univariateTest = "SW", univariatePlot = "histogram",
- multivariatePlot = "qq",
- multivariateOutlierMethod = "adj",
- showOutliers = TRUE, showNewData = TRUE)
+result = mvn(data = d_macdo[,c("Calories","Total.Fat")],
+ mvnTest = "mardia",
+ univariateTest = "SW", univariatePlot = "histogram",
+ multivariatePlot = "qq",
+ multivariateOutlierMethod = "adj",
+ showOutliers = TRUE, showNewData = TRUE)
result$multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 224.229958917198 2.30464057341325e-47 NO
## 2 Mardia Kurtosis 22.0280366224199 0 NO
## 3 MVN <NA> <NA> NO
-#page 148
+#page 137
d_macdo[83,c("Item","Calories","Total.Fat")]
## Item Calories Total.Fat
## 83 Chicken McNuggets (40 piece) 1880 118
@@ -589,7 +589,7 @@ 08 septembre, 2019
## [10] Big Breakfast with Hotcakes and Egg Whites (Large Biscuit)
## [11] Big Breakfast with Hotcakes and Egg Whites (Regular Biscuit)
## 260 Levels: 1% Low Fat Milk Jug ... Vanilla Shake (Small)
-#page 150
+#page 139
library(GGally)
tmp_d_macdo <- d_macdo
tmp_d_macdo$indic_outlier <- as.factor(1:nrow(d_macdo)
@@ -599,8 +599,8 @@ 08 septembre, 2019
ggscatmat(tmp_d_macdo, columns = colnumbers, color=
"indic_outlier")
-#page 151
-d_macdo1 <- d_macdo[-(83),]
+#page 140
+d_macdo1 <- d_macdo[-83,]
dim(d_macdo1)
## [1] 259 24
result1 = mvn(data = d_macdo1[,c("Calories","Total.Fat")],
@@ -629,7 +629,7 @@ 08 septembre, 2019
## sample estimates:
## rho
## 0.8962887
-#page 152
+#page 141
library(coin)
## Loading required package: survival
set.seed(1133)
@@ -652,7 +652,7 @@ 08 septembre, 2019
## sample estimates:
## tau
## 0.7339707
-#page 153
+#page 142
#q8
library(jmuOutlier)
set.seed(1133)
@@ -691,7 +691,7 @@ 08 septembre, 2019
## [4,] 0.7123 0.8462 0.6244 1.0000 -0.4265 0.8698
## [5,] 0.2596 -0.1154 -0.1355 -0.4265 1.0000 -0.1799
## [6,] 0.7878 0.8078 0.5616 0.8698 -0.1799 1.0000
-#page 154
+#page 143
r_c_mdo$p < .05/(6*5/2)
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE
@@ -711,14 +711,14 @@ 08 septembre, 2019
## Total.Fat -0.12 0.68 0.9 1
## Sodium -0.43 0.62 0.71 0.85 1
## Protein -0.18 0.56 0.79 0.81 0.87 1
-#page 155
+#page 144
#q10
library(rgl)
#q11
plot3d(d_macdo$Calories,d_macdo$Total.Fat, d_macdo$Cholesterol,type="s")
-#page 156
+#page 145
#q12
list <- c("Calories", "Total.Fat", "Cholesterol")
d_macdo.cr <- scale(d_macdo[, list])
@@ -726,17 +726,15 @@ 08 septembre, 2019
plot3d(d_macdo.cr, type = "s", xlim = lims,
ylim = lims,zlim = lims)
-#page 157
+#page 146
#q13
d_macdo.cr_df <- as.data.frame(d_macdo.cr)
-
plot3d(d_macdo.cr, type = "s", xlim = lims,
ylim = lims, zlim = lims)
-
plot3d(ellipse3d(cor(cbind(d_macdo.cr_df$Calories,
d_macdo.cr_df$Total.Fat,d_macdo.cr_df$Cholesterol))), col="grey",add=TRUE)
-#page 159
+#page 148
#q 16
if(!require("ade4")){install.packages("ade4")}
## Loading required package: ade4
@@ -758,14 +756,15 @@ 08 septembre, 2019
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
head(macdo.acp$lw)
## [1] 0.003846154 0.003846154 0.003846154 0.003846154 0.003846154 0.003846154
-#q18
+#page 149
+#q18
round(macdo.acp$eig,3)
## [1] 11.027 3.796 1.667 1.038 0.925 0.821 0.557 0.461 0.360 0.197
## [11] 0.067 0.050 0.020 0.012 0.001 0.000 0.000 0.000 0.000 0.000
## [21] 0.000
sum(macdo.acp$eig)
## [1] 21
-#page 161
+#page 150
#q19
round(pve <- 100*macdo.acp$eig/sum(macdo.acp$eig),3)
## [1] 52.508 18.075 7.938 4.943 4.406 3.911 2.651 2.197 1.715 0.936
@@ -777,7 +776,7 @@ 08 septembre, 2019
## [21] 100.00
round(cumsum(pve),2)[3:4]
## [1] 78.52 83.46
-#page 162
+#page 151
#q20
screeplot(macdo.acp)
@@ -814,7 +813,7 @@ 08 septembre, 2019
## Ax19 2.047e-04 21.00 100.00
## Ax20 1.345e-04 21.00 100.00
## Ax21 6.221e-05 21.00 100.00
-#page 163
+#page 152
#q21
s.corcircle(macdo.acp$co, xax=1, yax=2)
@@ -842,7 +841,7 @@ 08 septembre, 2019
## Vitamin.C....Daily.Value. 0.0534 0.7587 12.0094
## Calcium....Daily.Value. 0.7818 12.0210 1.9947
## Iron....Daily.Value. 6.2362 3.4796 0.2448
-#page 164
+#page 153
round(inertia.dudi(macdo.acp, col.inertia = TRUE)$col.rel, 4)
## Axis1 Axis2 Axis3
## Calories -87.4265 -8.3525 0.3096
@@ -866,7 +865,7 @@ 08 septembre, 2019
## Vitamin.C....Daily.Value. 0.5894 2.8801 20.0201
## Calcium....Daily.Value. -8.6207 -45.6297 3.3252
## Iron....Daily.Value. -68.7639 13.2080 0.4080
-#page 165
+#page 154
round(macdo.acp$co,4)
## Comp1 Comp2 Comp3
## Calories -0.9350 -0.2890 0.0556
@@ -890,7 +889,7 @@ 08 septembre, 2019
## Vitamin.C....Daily.Value. 0.0768 0.1697 0.4474
## Calcium....Daily.Value. -0.2936 -0.6755 0.1824
## Iron....Daily.Value. -0.8292 0.3634 0.0639
-#page 166
+#page 155
#q25
round(get_pca_var(macdo.acp)$cos2, 4)
## Dim.1 Dim.2 Dim.3
@@ -939,15 +938,15 @@ 08 septembre, 2019
## 0.9155 0.9162
## Calories
## 0.9578
-#page 167
+#page 156
fviz_pca_var(macdo.acp, col.var="contrib", gradient.cols=
c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE)
-#page 168
+#page 157
fviz_pca_var(macdo.acp, col.var = "cos2", gradient.cols =
c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE)
-#page 169
+#page 158
fviz_pca_var(macdo.acp, axes = c(1, 3), col.var="contrib",
gradient.cols=c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE)
fviz_pca_var(macdo.acp, axes = c(1, 3), col.var = "cos2",
@@ -956,7 +955,7 @@ 08 septembre, 2019
#q27
s.label(macdo.acp$li, xax = 1, yax = 2)
-#page 170
+#page 159
s.label(macdo.acp$li, label=as.character(d_macdo$Item), clabel=0.5)
library(yarrr)
@@ -990,7 +989,7 @@ 08 septembre, 2019
cont_ind <- get_pca_ind(macdo.acp)$contrib
pirateplot(data=cont_ind,point.o=.75, ylab="Contribution",xlab="")
-#page 171
+#page 160
rownames(cont_ind) <- tmp_d_macdo$Item
round(sort(cont_ind[,1],decreasing=TRUE)[1:20], 4)
## Chicken McNuggets (40 piece)
@@ -1123,7 +1122,7 @@ 08 septembre, 2019
## [5] "Big Breakfast with Hotcakes and Egg Whites (Large Biscuit)"
## [6] "Big Breakfast (Regular Biscuit)"
## [7] "Big Breakfast with Hotcakes and Egg Whites (Regular Biscuit)"
-#page 172
+#page 161
names(sort(cont_ind[cont_ind[,2]>100/260*5,2],decreasing=TRUE))
## [1] "Strawberry Shake (Large)"
## [2] "McFlurry with M&M's™ Candies (Medium)"
@@ -1153,7 +1152,7 @@ 08 septembre, 2019
fviz_pca_ind(macdo.acp, geom = c("point"), col.ind =
"cos2", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))
-#page 174
+#page 163
fviz_pca_ind(macdo.acp, geom = c("point"), alpha.ind = "contrib")
fviz_pca_ind(macdo.acp, geom = c("point"), alpha.ind = "cos2")
@@ -1166,7 +1165,7 @@ 08 septembre, 2019
s.class(dfxy = macdo.acp$li, fac = d_macdo$Category,
col = gcol, xax = 1, yax = 2)
-#page 176
+#page 165
#q30
fviz_pca_biplot(macdo.acp, col.ind ="contrib", col.var ="contrib")
@@ -1209,7 +1208,7 @@ 08 septembre, 2019
macdo.acp.dudi <- dudi.pca(tmp_macdo$tab, center = TRUE, scale =
FALSE, scan = FALSE, nf = 3)
-#page 177
+#page 166
macdo.acp.dudi1 <- dudi.pca(tmp_macdo$tab, center = TRUE, scale =
TRUE, scan = FALSE, nf = 3)
g1 <- s.class(macdo.acp.dudi$li, tmp_macdo$Category, plot = FALSE)
@@ -1221,7 +1220,7 @@ 08 septembre, 2019
G1 <- rbindADEg(cbindADEg(g1, g2, plot = FALSE), cbindADEg(g3, g4,
plot = FALSE), plot = TRUE)
-#page 178
+#page 167
head(sort(apply(d_macdo[,-(1:3)],2,var),decreasing=TRUE))
## Sodium Calories
## 332959.3774 57729.6184
@@ -1230,12 +1229,12 @@ 08 septembre, 2019
## Cholesterol....Daily.Value. Sugars
## 846.3243 822.5307
#Analyse factorielle des correspondances
-#page 178
+#page 167
d_vac<-read.csv2("https://tinyurl.com/y3emuylu", row.names = 1)
rownames(d_vac)
## [1] "Agriculteurs" "Patrons" "Cadres.sup" "Cadre.moy"
## [5] "Employes" "Ouvriers" "Autres.actifs" "Inactifs"
-#page 179
+#page 168
str(d_vac)
## 'data.frame': 8 obs. of 8 variables:
## $ Hotel : int 195 700 961 572 441 783 142 741
@@ -1250,7 +1249,7 @@ 08 septembre, 2019
"Dynamic"), legend.text = colnames(d_vac), args.legend =
list(bg = "white", x = "topleft"))
-#page 180
+#page 169
if(!require(ggpubr)){install.packages("ggpubr")}
## Loading required package: ggpubr
## Loading required package: magrittr
@@ -1264,7 +1263,7 @@ 08 septembre, 2019
library(ade4)
table.cont(d_vac)
-#page 181
+#page 170
sum(d_vac)
## [1] 31079
khi.test.d_vac <- chisq.test(d_vac)
@@ -1293,7 +1292,7 @@ 08 septembre, 2019
##
## data: d_vac
## X-squared = 2292.1, df = 49, p-value < 2.2e-16
-#page 182
+#page 171
set.seed(1133)
chisq.test(d_vac, simulate.p.value = TRUE, B=50000)
##
@@ -1308,7 +1307,7 @@ 08 septembre, 2019
mosaicplot(t(d_vac), type = "pearson", shade = TRUE, las = 2,
main = "Associations et r\u00e9sidus du test du chi2")
-#page 183
+#page 172
if(!require("vcd")){install.packages("vcd")}
## Loading required package: vcd
## Loading required package: grid
@@ -1319,7 +1318,7 @@ 08 septembre, 2019
main="Associations et r\u00e9sidus du test du test du chi2",
labeling_args=list(abbreviate=c(A=TRUE)))
-#page 184
+#page 173
if(!require(FactoMineR)){install.packages("FactoMineR")}
## Loading required package: FactoMineR
##
@@ -1348,7 +1347,7 @@ 08 septembre, 2019
## 10 "$call" "summary called parameters"
## 11 "$call$marge.col" "weights of the columns"
## 12 "$call$marge.row" "weights of the rows"
-#page 185
+#page 174
library(factoextra)
round(eig.val <- get_eigenvalue(res.ca.d_vac), 3)
## eigenvalue variance.percent cumulative.variance.percent
@@ -1359,7 +1358,7 @@ 08 septembre, 2019
## Dim.5 0.002 2.283 99.669
## Dim.6 0.000 0.330 99.999
## Dim.7 0.000 0.001 100.000
-#page 186
+#page 175
as.numeric(colSums(eig.val)[1])
## [1] 0.07375229
sqrt(as.numeric(khi.test.d_vac$statistic)/sum(d_vac)/
@@ -1377,7 +1376,7 @@ 08 septembre, 2019
## [1] 0.1026452
Phi(d_vac)
## [1] 0.2715737
-#page 187
+#page 176
CramerV(d_vac, conf.level = .95)
## Cramer V lwr.ci upr.ci
## 0.1026452 0.0973395 0.1057883
@@ -1404,7 +1403,7 @@ 08 septembre, 2019
quantile(v, probs=c(0.025,0.975))
## 2.5% 97.5%
## 0.09976719 0.10772114
-#page 189
+#page 178
set.seed(1133)
idx.perm <- replicate(n,sample(nrow(d_vac.frm), replace=FALSE))
v.perm <- apply(idx.perm, 2, function(x) CramerV(d_vac.frm[,1],
@@ -1417,7 +1416,7 @@ 08 septembre, 2019
library(factoextra)
fviz_eig(res.ca.d_vac)
-#page 190
+#page 179
mean(res.ca.d_vac$eig[,1])
## [1] 0.01053604
res.ca.d_vac$eig[,2]>1/(ncol(d_vac)-1)*100
@@ -1429,7 +1428,7 @@ 08 septembre, 2019
fviz_ca_biplot(res.ca.d_vac)
-#page 191
+#page 180
fviz_ca_row(res.ca.d_vac)
fviz_ca_row(res.ca.d_vac, col.row="contrib")
@@ -1456,7 +1455,7 @@ 08 septembre, 2019
fviz_ca_biplot(res.ca.d_vac, col.row="cos2", col.col="cos2")
-#page 192
+#page 181
rowpr <- fviz_ca_biplot(res.ca.d_vac, map="rowprincipal", arrow =
c(TRUE, TRUE), repel=TRUE)
colpr <- fviz_ca_biplot(res.ca.d_vac, map="colprincipal", arrow =
@@ -1465,7 +1464,7 @@ 08 septembre, 2019
ggmatrix(list(rowpr, colpr),1,2)
#Analyse non symetrique des correspondances
-#page 193
+#page 182
d_TM<-read.csv2("https://tinyurl.com/y55e3k9y", row.names = 1)
rownames(d_TM)
## [1] "Laundry" "Main_meal" "Dinner" "Breakfeast" "Tidying"
@@ -1477,12 +1476,12 @@ 08 septembre, 2019
## $ Alternating: int 14 20 11 36 11 24 23 46 51 13 ...
## $ Husband : int 2 5 7 15 1 4 9 23 75 21 ...
## $ Jointly : int 4 4 13 7 57 53 55 15 3 66 ...
-#page 194
+#page 183
barplot(t(d_TM), beside=TRUE, names=d_TM$Task, col = c("red",
"green","blue","purple"), legend.text = colnames(d_TM),
args.legend = list(bg = "white", x = "top"))
-#page 195
+#page 184
if(!require(ggpubr)){install.packages("ggpubr")}
library(ggpubr)
my_cols <- c("#0D0887FF", "#6A00A8FF", "#B12A90FF",
@@ -1495,7 +1494,7 @@ 08 septembre, 2019
sum(d_TM)
## [1] 1744
-#page 196
+#page 185
if(!require(DescTools)){install.packages("DescTools")}
library(DescTools)
Lambda(d_TM)
@@ -1503,7 +1502,7 @@ 08 septembre, 2019
Lambda(d_TM, conf.level=0.95)
## lambda lwr.ci upr.ci
## 0.3410767 0.3170578 0.3650956
-#page 197
+#page 186
Lambda(d_TM, direction="row", conf.level=0.95)
## lambda lwr.ci upr.ci
## 0.2193878 0.1977464 0.2410292
@@ -1536,7 +1535,7 @@ 08 septembre, 2019
lattice::bwplot(l.d_TM)
-#page 198
+#page 187
quantile(lR.d_TM, probs=c(0.025,0.975))
## 2.5% 97.5%
## 0.1985655 0.2379738
@@ -1546,7 +1545,7 @@ 08 septembre, 2019
quantile(l.d_TM, probs=c(0.025,0.975))
## 2.5% 97.5%
## 0.3184851 0.3642997
-#page 198-199
+#page 187-188
n <- 10000; set.seed(1133)
idx.perm <- replicate(n,sample(nrow(d_TM.frm), replace=FALSE))
lR.perm.d_TM <- apply(idx.perm, 2, function(x) Lambda(d_TM.frm[,1],
@@ -1573,7 +1572,7 @@ 08 septembre, 2019
GoodmanKruskalTau(d_TM.tab, direction="column", conf.level=0.95)
## tauA lwr.ci upr.ci
## 0.4067139 0.3796921 0.4337358
-#page 200
+#page 189
GoodmanKruskalTau(d_TM.tab, direction="row", conf.level=0.95)
## tauA lwr.ci upr.ci
## 0.10465652 0.09471543 0.11459762
@@ -1598,13 +1597,13 @@ 08 septembre, 2019
## 4 $co 4 2 column coordinates
## 5 $c1 4 2 column normed scores
## other elements: N
-#page 201
+#page 190
library(adegraphics)
g1 <- s.label(res.nsc.d_TM$c1, plab.cex = 1.25)
g2 <- s.arrow(res.nsc.d_TM$li, add = TRUE, plab.cex = 0.75)
#Analyse des correspondances multiples
-#page 202
+#page 191
poke<-read.csv("https://tinyurl.com/y4y6a86m",na.strings= c("","NA"))
poke<-as.data.frame(poke)
poke$Generation<-as.factor(poke$Generation)
@@ -1643,7 +1642,7 @@ 08 septembre, 2019
##
poke.x<-poke[,c(3,12,13)]
-#page
+#page 192
library(ade4); library(adegraphics)
res.acm.poke<-dudi.acm(poke.x,scannf=FALSE)
@@ -1677,13 +1676,13 @@ 08 septembre, 2019
## Dim.21 0.2550264 3.326432 94.209364
## Dim.22 0.2311516 3.015021 97.224384
## Dim.23 0.2127972 2.775616 100.000000
-#page 204
+#page 193
res.acm.poke$cr
## RS1 RS2
## Type.1 0.6285080 0.65023195
## Generation 0.3154106 0.63418729
## Legendary 0.4901764 0.02536001
-#page 205
+#page 194
score(res.acm.poke, xax=1)
score(res.acm.poke, xax=1, type = "boxplot")
@@ -1692,7 +1691,7 @@ 08 septembre, 2019
ade4::s.corcircle(res.acm.poke$co, clabel = 0.7)
-#page 206
+#page 195
library(devtools)
## Loading required package: usethis
if(!require(JLutils)){install_github("larmarange/JLutils")}
@@ -1720,7 +1719,7 @@ 08 septembre, 2019
scatter(res.acm.poke)
#Analyse factorielle des donnees mixtes
-#page 208
+#page 197
if(!require("PCAmixdata")){install.packages("PCAmixdata")}
## Loading required package: PCAmixdata
library(PCAmixdata)
@@ -1734,7 +1733,7 @@ 08 septembre, 2019
## Speed 0.38 0.02 0.47 0.26 1.00
mix.poke<-PCAmix(subset(poke,select=7:11),subset(poke,select=3))
-#page 209
+#page 198
round(mix.poke$eig, 2)
## Eigenvalue Proportion Cumulative
## dim 1 2.57 11.69 11.69
@@ -1780,7 +1779,7 @@ 08 septembre, 2019
## Steel 0.75 2.68 -0.62 -2.04 -0.55
## Water -0.10 -0.05 -0.38 0.14 -0.71
#Classification ascendante hierarchique et methode des K-moyennes
-#page 211
+#page 200
data_event <- read.csv("https://tinyurl.com/y2k7mwbr")
head(data_event)
## X id_odsp id_event sort_order time
@@ -1839,11 +1838,11 @@ 08 septembre, 2019
## 4 2.14 NA NA NA NA
## 5 2.14 NA NA NA NA
## 6 2.14 NA NA NA NA
-#page 212
+#page 201
list_col <- c("sort_order","time","event_team","fthg","odd_h")
data_event.x <- data_event[,list_col]
-#page 213
+#page 202
data_event.c<-aggregate(.~event_team,data=data_event.x,FUN=mean)
str(data_event.c)
## 'data.frame': 30 obs. of 5 variables:
@@ -1865,7 +1864,7 @@ 08 septembre, 2019
library(GGally)
ggpairs(data_event.c)
-#page 215
+#page 204
d.data_event <- dist(data_event.c)
cah.ward <- hclust(d.data_event, method="ward.D2")
plot(cah.ward, xlab="\u00e9quipe de football", ylab="",
@@ -1876,13 +1875,13 @@ 08 septembre, 2019
library(ggdendro)
ggdendrogram(cah.ward, rotate = FALSE, size = 2)
-#page 216
+#page 205
library(JLutils)
best.cutree(cah.ward, min = 3, graph = TRUE, xlab =
"Nombre de classes", ylab = "Inertie relative")
## [1] 3
-#page 217
+#page 206
(groupes.cah <- cutree(cah.ward,k=5))
## AC Ajaccio AJ Auxerre Angers
## 1 2 3
@@ -1912,14 +1911,14 @@ 08 septembre, 2019
main="Dendrogramme", sub="", axes=TRUE, cex=0.5)
rect.hclust(cah.ward, 5)
-#page 218
+#page 207
library(factoextra)
hc.cut <- hcut(d.data_event, k = 5, hc_method = "complete")
fviz_dend(hc.cut, show_labels = TRUE, rect = TRUE)
fviz_cluster(hc.cut, ellipse.type = "convex",data=d.data_event)
-#page 220
+#page 209
if(!require("vegan")){install.packages("vegan")}
## Loading required package: vegan
## Loading required package: permute
@@ -1937,7 +1936,7 @@ 08 septembre, 2019
criterion = "calinski")
plot(k.event.cal)
-#page 221
+#page 210
set.seed(1133)
groupes.kmeans <- kmeans(data_event.c,centers=5,nstart=1000)
print(groupes.kmeans)
@@ -1982,7 +1981,7 @@ 08 septembre, 2019
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault"
-#page 222
+#page 211
print(table(groupes.cah,groupes.kmeans=groupes.kmeans$cluster))
## groupes.kmeans
## groupes.cah 1 2 3 4 5
@@ -1991,7 +1990,7 @@ 08 septembre, 2019
## 3 3 0 0 2 0
## 4 0 0 6 0 1
## 5 0 0 2 4 0
-#page 223
+#page 212
library(FactoMineR)
res.pca <- PCA(data_event.c, ncp = 3, graph = FALSE)
get_eig(res.pca)
@@ -2006,13 +2005,13 @@ 08 septembre, 2019
fviz_dend(res.hcpc, cex = 0.7, palette = "jco", rect = TRUE,
rect_fill = TRUE, rect_border = "jco", labels_track_height = 0.8)
-#page 224
+#page 213
fviz_cluster(res.hcpc, repel = TRUE, show.clust.cent = TRUE,
palette = "jco", ggtheme = theme_minimal(), main = "Factor map")
plot(res.hcpc, choice = "3D.map")
-#page 225
+#page 214
#Exercice 3.1
read.csv("https://tinyurl.com/y3rxbxoo")
## NOM PAYS ETOILE CONFORT CHAMBRE CUISINE SPORT PLAGE PRIX
@@ -2142,7 +2141,7 @@ 08 septembre, 2019
## 21 26467 36963 61968 26538 9935
## 22 38114 51441 57654 32624 10969
## 23 10234 14904 14062 2698 1772
-#page 226
+#page 215
#Exercice 3.4
read.csv("https://tinyurl.com/y5gffvsb")
## Zone_Name Continent Area
diff --git a/docs/articles/Chapitre4.html b/docs/articles/Chapitre4.html
index ebba703..245197d 100644
--- a/docs/articles/Chapitre4.html
+++ b/docs/articles/Chapitre4.html
@@ -377,14 +377,14 @@
Modélisation statistique par la pratique avec R
Chapitre 4 : Analyse de régression
Frédéric Bertrand, Emmanuelle Claeys, Myriam Maumy-Bertrand
-08 septembre, 2019
+19 septembre, 2019
#Chapitre 4
-#page 266
+#page 248
if(!require("BioStatR")){install.packages("BioStatR")}
## Loading required package: BioStatR
library(BioStatR)
@@ -401,7 +401,7 @@ 08 septembre, 2019
## Mean :11.13 Mean :13.37 laurier rose :72
## 3rd Qu.:14.60 3rd Qu.:15.30
## Max. :49.20 Max. :27.00
-#page 267
+#page 249
if(!require("ISLR")){install.packages("ISLR")}
## Loading required package: ISLR
library(ISLR)
@@ -441,7 +441,7 @@ 08 septembre, 2019
## [29] 2.4 10.7 13.8 10.9 10.3 8.8 9.0 8.2 9.6 9.0 5.3 1.5 6.7 2.9
## [43] 2.9 3.5 3.4 4.9 4.7 4.7 5.2 2.1 2.2 1.4 2.7 1.0 2.5 5.5
## [57] 2.7 6.7 7.3 2.9 3.8 7.6 3.6 3.0 5.8 5.3 3.2 4.4 3.4 2.9
-#page 268
+#page 250
round(fitted(model), 2)
## 111 112 113 114 115 116 117 118 119 120 121 122
## 9.64 6.11 17.63 23.00 17.32 15.02 16.71 14.09 16.55 10.56 11.33 12.41
@@ -486,7 +486,7 @@ 08 septembre, 2019
## 9 119 14 17.3 16.6 0.606 -2.55 0.0605 2.46 3.67e-2
## 10 120 8.7 13.4 10.6 0.344 -1.86 0.0195 2.47 5.80e-3
## # … with 60 more rows, and 1 more variable: .std.resid <dbl>
-#page 270
+#page 252
tidy(model)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
@@ -509,7 +509,7 @@ 08 septembre, 2019
legend("topleft",lty=c(1,2,4),legend = c("lm","lowess",
"smooth.spline"),lwd=2,col=c("red","blue","orange"))
-#page 271
+#page 253
if(!require("ggiraphExtra")){install.packages("ggiraphExtra")}
## Loading required package: ggiraphExtra
library(ggiraphExtra)
@@ -528,7 +528,7 @@ 08 septembre, 2019
with(Mes.B,lines(lowess(fitted(model),masse-fitted(model)),col=
"red",lwd=2))
-#page 272
+#page 254
library(MASS)
model.r <- lqs(masse ~ taille, data=Mes.B)
summary(model.r)
@@ -544,7 +544,7 @@ 08 septembre, 2019
## call 3 -none- call
## xlevels 0 -none- list
## model 2 data.frame list
-#page 273
+#page 255
coefficients(model.r)
## (Intercept) taille
## -7.070691 1.217949
@@ -555,21 +555,26 @@ 08 septembre, 2019
## -6.394872 1.217949
with(Mes.B,1-sum(residuals(model.r)^2)/sum((masse-mean(masse))^2))
## [1] 0.760885
-#page 274
+#page 256
with(Mes.B,1-sum(residuals(model)^2)/sum((masse-mean(masse))^2))
## [1] 0.8105863
plot(masse ~ taille, data=Mes.B, xlab="Taille", ylab="Masse")
abline(model.r, lty=1)
abline(model, lty=2)
-legend("topleft", legend=c("Robuste","Moindres carr\u00e9s"),lty=1:2)
-
+with(Mes.B,points(taille[model.r$bestone],masse[model.r$bestone],
+ pch=19,col="red"))
+abline(lm(masse ~ taille, data=Mes.B[model.r$bestone,]), lty=3)
+legend("topleft", legend=c("R\u00e9sistante : moindres carr\u00e9s tronqu\u00e9s",
+ "Moindres carr\u00e9s ordinaires",
+ "Droite ajust\u00e9e entre les deux observations"), lty=1:3)
+
shapiro.test(residuals(model.r))
##
## Shapiro-Wilk normality test
##
## data: residuals(model.r)
## W = 0.81713, p-value = 7.025e-08
-#page 275
+#page 257
model2<-lm(masse~taille+I(taille^2),data=Mes.B)
summary(model2)
@@ -603,14 +608,14 @@ 08 septembre, 2019
legend("topleft",legend=c("Moindres carr\u00e9s ordinaires",
"Ajustement local"),lty=c(1,3),lwd=2,col=c("red","blue"))
-#page 276
+#page 258
shapiro.test(residuals(model2))
##
## Shapiro-Wilk normality test
##
## data: residuals(model2)
## W = 0.98448, p-value = 0.5394
-#page 277
+#page 259
confint(model2)
## 2.5 % 97.5 %
## (Intercept) 1.15143209 9.8242312
@@ -633,14 +638,14 @@ 08 septembre, 2019
my.confidence.region(model2, which=3)
-#page 278
+#page 260
set.seed(314)
model2.r<-lqs(masse~taille+I(taille^2),data=Mes.B)
rbind(coefficients(model2),coefficients(model2.r))
## (Intercept) taille I(taille^2)
## [1,] 5.487832 -1.217922 0.1129755
## [2,] 3.849245 -1.102565 0.1162078
-#page 279
+#page 261
shapiro.test(residuals(model2.r))
##
## Shapiro-Wilk normality test
@@ -658,7 +663,7 @@ 08 septembre, 2019
legend("topleft",legend=c("R\u00e9sistante : moindres carr\u00e9s trim\u00e9s",
"Ajustement local"),lty=c(2,3),lwd=2,col=c("green","blue"))
-#page 279-280
+#page 261
plot(masse~taille,data=Mes.B,xlab="Taille",ylab="Masse",pch=20)
with(Mes.B,lines(lowess(taille,masse),lty=3,lwd=2,col="blue"))
with(Mes.B,points(sort(taille),fitted(model2)[order(taille)],
@@ -673,13 +678,13 @@ 08 septembre, 2019
"R\u00e9sistante : moindres carr\u00e9s trim\u00e9s","Ajustement local"),lty=1:3
,lwd=2,col=c("red","green","blue"))
-#page 280
+#page 262
data("mtcars")
View(mtcars)
help(mtcars)
-#page 281
+#page 263
head(mtcars,n=10)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
@@ -706,7 +711,7 @@ 08 septembre, 2019
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
-#page 282
+#page 264
mtcars2 <- within(mtcars, {
vs <- factor(vs, labels = c("V", "S"))
am <- factor(am, labels = c("automatic", "manual"))
@@ -731,7 +736,7 @@ 08 septembre, 2019
## Max. :5.424 Max. :22.90 8: 1
subsetmtcars<-mtcars[,c(1,3,4,5,6,7)]
-#page 283
+#page 265
library(MVN)
## Registered S3 method overwritten by 'GGally':
## method from
@@ -759,7 +764,7 @@ 08 septembre, 2019
## Merc 240D Merc 240D 20.804 TRUE
## Merc 230 Merc 230 16.813 TRUE
## Fiat 128 Fiat 128 16.123 TRUE
-#page 284
+#page 266
library(corrplot); set.seed(1133)
## corrplot 0.84 loaded
permmtcars <- perm.cor.mtest(subsetmtcars)
@@ -774,7 +779,7 @@ 08 septembre, 2019
corrplot(permmtcars$cor,p.mat=permmtcars$p,pch.col="white",insig=
"label_sig",sig.level=.05/choose(ncol(subsetmtcars),2))
-#page 285
+#page 267
fit.mtcars<-lm(mpg~.,data=subsetmtcars)
fit.mtcars
##
@@ -917,10 +922,14 @@ 08 septembre, 2019
## Ferrari Dino 0.1513489108 -0.188847641 -0.100036080
## Maserati Bora -0.0367873163 0.094199143 0.137913963
## Volvo 142E -0.0639020279 -0.016923245 0.069537042
-car::vif(fit2.mtcars)
+library(car)
+## Loading required package: carData
+vif(fit2.mtcars)
## disp hp drat wt qsec disp:hp
## 48.793223 26.669457 4.462626 7.224276 3.719492 76.606107
-perturb::colldiag(fit2.mtcars)
+#page 268
+library(perturb)
+colldiag(fit2.mtcars)
## Condition
## Index Variance Decomposition Proportions
## intercept disp hp drat wt qsec
@@ -930,8 +939,7 @@ 08 septembre, 2019
## 4 21.905 0.001 0.772 0.182 0.102 0.392 0.007
## 5 32.009 0.032 0.012 0.142 0.606 0.449 0.159
## 6 71.166 0.966 0.136 0.324 0.272 0.120 0.829
-#page 286
-plot(fit2.mtcars)
+plot(fit2.mtcars)
influence.measures(fit2.mtcars)
## Influence measures of
## lm(formula = mpg ~ . + disp:hp, data = subsetmtcars) :
@@ -1002,7 +1010,7 @@ 08 septembre, 2019
## Ferrari Dino -0.10004 0.4163 1.631 0.025320 0.2817
## Maserati Bora 0.13791 0.6974 2.699 0.071236 0.5588 *
## Volvo 142E 0.06954 -0.1713 1.392 0.004323 0.1087
-car::influencePlot(fit2.mtcars)
+influencePlot(fit2.mtcars)
## StudRes Hat CookD
## Merc 230 0.2772254 0.5938467 0.01666835
@@ -1010,7 +1018,7 @@ 08 septembre, 2019
## Toyota Corona -1.8342492 0.1268803 0.06381047
## Pontiac Firebird 1.9028828 0.2215907 0.13328179
## Maserati Bora 0.6197073 0.5587835 0.07123631
-car::influenceIndexPlot(fit2.mtcars)
+influenceIndexPlot(fit2.mtcars)
summary(fit2.mtcars)
##
@@ -1077,7 +1085,7 @@ 08 septembre, 2019
## 263 1 1 1 1 1 1 1 1 0
## 59 1 1 1 1 1 1 1 0 1
## 0 0 0 0 0 0 0 59 59
-#page 260
+#page 269
md.pairs(Hitters)
## $rr
## AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun CRuns
@@ -1341,6 +1349,9 @@ 08 septembre, 2019
library(dplyr)
##
## Attaching package: 'dplyr'
+## The following object is masked from 'package:car':
+##
+## recode
## The following object is masked from 'package:MASS':
##
## select
@@ -1368,7 +1379,7 @@ 08 septembre, 2019
## 263 1 1 1 1 1 1 1 1 0
## 59 1 1 1 1 1 1 1 0 1
## 0 0 0 0 0 0 0 59 59
-#page 288
+#page 270
if(!require("naniar")){install.packages("naniar")}
## Loading required package: naniar
library(naniar);library(ggplot2)
@@ -1378,7 +1389,7 @@ 08 septembre, 2019
fill = Salary_NA)) +
geom_density(alpha = 0.5)
-#page 289
+#page 271
gg_miss_var(Hitters)
try(gg_miss_upset(Hitters))
## Error : upset plots for missing data requre at least two variables to have missing data, only one variable, 'Salary' has missing values.
@@ -1408,7 +1419,7 @@ 08 septembre, 2019
## 22. │ └─base:::tryCatchOne(expr, names, parentenv, handlers[[1L]])
## 23. │ └─base:::doTryCatch(return(expr), name, parentenv, handler)
## 24. └─naniar::gg_miss_upset(Hitters)
-#page 290
+#page 272
#Exercice 4.1
data(anscombe)
str(anscombe)
@@ -1421,7 +1432,7 @@ 08 septembre, 2019
## $ y2: num 9.14 8.14 8.74 8.77 9.26 8.1 6.13 3.1 9.13 7.26 ...
## $ y3: num 7.46 6.77 12.74 7.11 7.81 ...
## $ y4: num 6.58 5.76 7.71 8.84 8.47 7.04 5.25 12.5 5.56 7.91 ...
-#page 291
+#page 273
#Exercice 4.2
Hitters = na.omit(Hitters)
@@ -1490,7 +1501,7 @@ 08 septembre, 2019
## 1.4082490 0.7743122 -0.8308264 -112.3800575 0.2973726
## Assists
## 0.2831680
-#page 292
+#page 274
#q5
regfit.fwd = regsubsets(Salary ~ ., data = Hitters, nvmax = 19,
method = "forward")
@@ -1577,7 +1588,7 @@ 08 septembre, 2019
val.errors = c(val.errors,mean((Hitters$Salary[-train]-pred)^2))
}
-#page 293
+#page 275
#q6 (suite)
mod.ordre=as.numeric(unlist(sapply(table(n.vars),
function(x){1:x})))
@@ -1586,7 +1597,7 @@ 08 septembre, 2019
lines(n.vars[mod.ordre==1],sqrt(val.errors)[mod.ordre==1],
lwd=2,lty=2)
-#page 294
+#page 276
#q7
predict.regsubsets = function(object, newdata, id, ...) {
form = as.formula(object$call[[2]])
@@ -1614,27 +1625,27 @@ 08 septembre, 2019
## 6.594856e-01 3.927505e-01 -5.291586e-01 -3.206508e+01 3.285872e-14
## DivisionW PutOuts Assists Errors NewLeagueN
## -1.192990e+02 2.724045e-01 1.732025e-01 -2.058508e+00 0.000000e+00
-#page 295
+#page 277
#Exercice 4.3
#q1
CancerSein <- read.csv("https://tinyurl.com/y3l6sh59")
-#page 297
+#page 279
#Exercice 4.4
#q1
SidaChat <- read.csv("https://tinyurl.com/yxe6yxem")
-#page 300
+#page 282
#Exercice 4.5
#q1
Vitamines <- read.csv("https://tinyurl.com/y3shxcsd")
-#page 301
+#page 283
#Exercice 4.6
#q1
Beton <- read.csv("https://tinyurl.com/y4w2qv9t")
-#page 302
+#page 284
#Exercice 4.7
#q1
chal <- read.csv("https://tinyurl.com/yyb3cztf")
@@ -1642,7 +1653,7 @@ 08 septembre, 2019
cdplot(as.factor(Defaillance)~Temperature, data=chal,
ylab="Defaillance")
-#page 303
+#page 285
#q3
chal.glm <- glm(Defaillance~Temperature,data=chal,
family="binomial")
@@ -1651,7 +1662,7 @@ 08 septembre, 2019
## Loading required package: hnp
hnp(chal.glm, sim = 99, conf = 0.95)
## Binomial model
-#page 304
+#page 286
#q6
if(!require(rms)){install.packages("rms")}
## Loading required package: rms
@@ -1672,6 +1683,11 @@ 08 septembre, 2019
## The following object is masked from 'package:base':
##
## backsolve
+##
+## Attaching package: 'rms'
+## The following objects are masked from 'package:car':
+##
+## Predict, vif
library(rms)
chal.lrm <- lrm(Defaillance~Temperature, data=chal, x=TRUE, y=TRUE)
print(chal.lrm)
@@ -1697,12 +1713,11 @@ 08 septembre, 2019
## 3.7572772 3.7345914 0.1360759
## Z P
## 0.1667141 0.8675950
-#page 305
-#Exercice 4.8
+#Exercice 4.8
#q1
Cypermethrine <- read.csv("https://tinyurl.com/y4deakfd")
-#page 306
+#page 288
#Exercice 4.9
#q1
poly <- read.csv("https://tinyurl.com/yyhhcw37")
@@ -1821,7 +1836,7 @@ 08 septembre, 2019
## (Intercept) 2.0665705 4.231950890
## traitementplacebo 0.6336661 2.096075160
## age -0.0776086 0.004319903
-#page 307
+#page 289
with(poly,plot(nombre~age,type="n",ylab="Nombre de polypes",
xlab="\u00c2ge"))
with(poly,points(age[traitement=="placebo"],fitted(poly_glm2)[
diff --git a/docs/articles/SolChapitre3.html b/docs/articles/SolChapitre3.html
index 0d58a4e..6611940 100644
--- a/docs/articles/SolChapitre3.html
+++ b/docs/articles/SolChapitre3.html
@@ -377,14 +377,14 @@
Modélisation statistique par la pratique avec R
Chapitre 3 : Analyse exploratoire des données - solution des exercices
Frédéric Bertrand, Emmanuelle Claeys, Myriam Maumy-Bertrand
-08 septembre, 2019
+19 septembre, 2019
#Chapitre 3 : solution des exercices
-#page 227
+#Complement en ligne
#Exercice 3.1
d_hotels <- read.csv("https://tinyurl.com/y3rxbxoo")
head(d_hotels)
@@ -418,8 +418,7 @@ 08 septembre, 2019
mosaicplot(d_pres2002, type = "pearson", shade = TRUE, las = 2,
main = "Associations et r\u00e9sidus du test du chi2")
-#page 228
-d_pres2007 <- read.csv("https://tinyurl.com/yyolq665",row.names=1)
+d_pres2007 <- read.csv("https://tinyurl.com/yyolq665",row.names=1)
head(d_pres2007[,1:8])
## Sarkozy Bayrou Royal Le.Pen Besanc. Villiers Voynet Laguiller
## Alsace 362391 214259 171282 135730 33310 22492 20382 13821
@@ -431,8 +430,7 @@ 08 septembre, 2019
mosaicplot(d_pres2007, type = "pearson", shade = TRUE, las = 2,
main = "Associations et r\u00e9sidus du test du chi2")
-#page 229
-#q1
+#q1
data(UCBAdmissions)
str(UCBAdmissions)
## 'table' num [1:2, 1:2, 1:6] 512 313 89 19 353 207 17 8 120 205 ...
@@ -446,8 +444,7 @@ 08 septembre, 2019
## Loading required package: grid
assoc(UCBAdmissions)
-#page 230
-#q2
+#q2
library(FactoMineR)
try(MCA(UCBAdmissions))
## Error in dimnames(res) <- list(attributes(tab)$row.names, listModa) :
@@ -477,8 +474,7 @@ 08 septembre, 2019
## .. ..$ Admit : chr "Admit=Admitted" "Admit=Rejected"
## .. ..$ Gender: chr "Gender=Male" "Gender=Female"
## .. ..$ Dept : chr "Dept=A" "Dept=B" "Dept=C" "Dept=D" ...
-#page 231
-MCA(UCBA.df, graph=FALSE)
+MCA(UCBA.df, graph=FALSE)
## **Results of the Multiple Correspondence Analysis (MCA)**
## The analysis was performed on 4526 individuals, described by 3 variables
## *The results are available in the following objects:
@@ -497,8 +493,7 @@ 08 septembre, 2019
## 11 "$call" "intermediate results"
## 12 "$call$marge.col" "weights of columns"
## 13 "$call$marge.li" "weights of rows"
-#page 232
-#Exercice 3.4
+#Exercice 3.4
d_wow <- read.csv("https://tinyurl.com/y5gffvsb", row.names =1)
head(d_wow[,1:3])
## Continent Area
@@ -520,8 +515,7 @@ 08 septembre, 2019
library(ggdendro)
ggdendrogram(wow.cah.ward, labels = FALSE)
-#page 233
-#Exercice 3.5
+#Exercice 3.5
d_hotels <- read.csv("https://tinyurl.com/y3rxbxoo", row.names=1)
head(d_hotels)
## PAYS ETOILE CONFORT CHAMBRE CUISINE SPORT PLAGE PRIX
diff --git a/docs/articles/SolChapitre4.html b/docs/articles/SolChapitre4.html
index 96a6fe2..72e8192 100644
--- a/docs/articles/SolChapitre4.html
+++ b/docs/articles/SolChapitre4.html
@@ -377,14 +377,14 @@
Modélisation statistique par la pratique avec R
Chapitre 4 : Analyse de régression - solution des exercices
Frédéric Bertrand, Emmanuelle Claeys, Myriam Maumy-Bertrand
-08 septembre, 2019
+19 septembre, 2019
#Chapitre 4 : solution des exercices
-#page 308
+#Complement en ligne
#Exercice 4.4
#q1
CancerSein <- read.csv("https://tinyurl.com/y3l6sh59")
@@ -441,8 +441,7 @@ 08 septembre, 2019
cdplot(as.factor(Defaillance)~Temperature, data=chal,
ylab="Defaillance")
-#page 309
-#q3
+#q3
chal.glm<-glm(Defaillance~Temperature,data=chal,family="binomial")
if(!require("hnp")){install.packages("hnp")}
## Loading required package: hnp
@@ -489,7 +488,7 @@ 08 septembre, 2019
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggiraphExtra::ggPredict(chal.glm)
-
+
#q4
confint(chal.glm, parm="Temperature")
## Waiting for profiling to be done...
@@ -647,8 +646,7 @@ 08 septembre, 2019
## 10 20 10 8 F
## 11 20 12 16 F
## 12 20 16 32 F
-#page 310
-#Exercice 4.10
+#Exercice 4.10
#q1
poly <- read.csv("https://tinyurl.com/yyhhcw37")
@@ -657,7 +655,7 @@ 08 septembre, 2019
library(hnp)
hnp(poly_glm1, sim = 99, conf = 0.95)
## Poisson model
-
+
summary(poly_glm1)
##
## Call:
@@ -694,7 +692,7 @@ 08 septembre, 2019
library(hnp)
hnp(poly_glm2, sim = 99, conf = 0.95)
## Quasi-Poisson model
-
+
summary(poly_glm2)
##
## Call:
@@ -730,7 +728,7 @@ 08 septembre, 2019
library(hnp)
hnp(poly_glm3, sim = 99, conf = 0.95)
## Negative binomial model (using MASS package)
-
+
summary(poly_glm3)
##
## Call:
diff --git a/docs/reference/ModStatR.html b/docs/reference/ModStatR.html
index 2a59fbc..9716878 100644
--- a/docs/reference/ModStatR.html
+++ b/docs/reference/ModStatR.html
@@ -158,7 +158,7 @@ ModStatR
References
- Modélisation statistique par la pratique avec R, Frédéric Bertrand, Emmanuelle Claeys, Myriam Maumy-Bertrand https://www.dunod.com/sciences-techniques/modelisation-statistique-par-pratique-avec-r-cours-et-exercices-corriges, https://github.com/fbertran/ModStatR et https://fbertran.github.io/ModStatR
+ Modélisation statistique par la pratique avec R, Frédéric Bertrand, Emmanuelle Claeys, Myriam Maumy-Bertrand, 2019, ISBN:9782100793525, Dunod, Paris, https://www.dunod.com/sciences-techniques/modelisation-statistique-par-pratique-avec-r-cours-et-exercices-corriges, https://github.com/fbertran/ModStatR et https://fbertran.github.io/ModStatR
Examples