From 7b9dc778a88c8d6c8365ea5a1a5788bb0d7af651 Mon Sep 17 00:00:00 2001 From: Maximilian Held Date: Tue, 3 Oct 2017 18:54:02 +0200 Subject: [PATCH] first run at complete analysis, delete legacy script, as per #28 --- DESCRIPTION | 10 +- _sublab.Rmd | 518 ---------------------------------------------------- index.Rmd | 84 ++++++++- setup.R | 9 +- 4 files changed, 98 insertions(+), 523 deletions(-) delete mode 100644 _sublab.Rmd diff --git a/DESCRIPTION b/DESCRIPTION index 8bdf0f5..05d9ac4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,6 +20,12 @@ Imports: shinyjs, rdrop2, shinyjqui, - rsconnect + rsconnect, + plyr, + xtable, + pander, + qmethod, + lettercase Remotes: - maxheld83/pensieve + maxheld83/pensieve, + maxheld83/qmethod diff --git a/_sublab.Rmd b/_sublab.Rmd deleted file mode 100644 index a2a5ec0..0000000 --- a/_sublab.Rmd +++ /dev/null @@ -1,518 +0,0 @@ ---- -title: "VWN Analyse" -author: "Maximilian Held" -date: "11 June 2016" -output: pdf_document -library: held_library.bib -header-includes: \usepackage[utf8]{inputenc} ---- - -```{r setup, include=FALSE, echo=FALSE} -knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, cache = TRUE) -options(scipen = 1, digits = 2) -``` - -```{r preamble, echo =FALSE} -library(plyr) -library(babynames) -library(stringr) -library(reshape2) -library(knitr) -library(ggplot2) -library(xtable) -library(pander) -install.packages(repos = NULL, type = "source", INSTALL_opts = c('--no-lock'), pkgs = c("../qmethod")) # notice this is the LOCAL qmethod -library(qmethod) -install.packages(repos = NULL, type = "source", INSTALL_opts = c('--no-lock'), pkgs = c("../pensieve")) # notice this is the LOCAL qmethod -library(pensieve) -``` - -```{r import-items} -sublab <- NULL -sublab$items <- read.csv(file = "qset.csv", header = TRUE, stringsAsFactors = FALSE, sep = ";") -rownames(sublab$items) <- sublab$items$item_handle_english -``` - - -```{r find-distro} -distro <- make.distribution(nstat = nrow(sublab$items), max.bin = 5) -distro -``` - -```{r make-cards, include=FALSE, eval=FALSE, echo = FALSE} -items_print <- as.matrix(sublab$items$item) -rownames(items_print) <- sublab$items$item_handle -colnames(items_print) <- "german" - -items_lookup <- as.matrix(as.character(sublab$items$item_id)) -rownames(items_lookup) <- sublab$items$item_handle -colnames(items_lookup) <- "german" - - -make.cards(q.set = items_print, - manual.lookup = items_lookup, - wording.font.size = "LARGE", - show.handles = FALSE, - babel.language = "ngerman", - duplex.double = TRUE, - output.pdf = TRUE, - file.name = "cards-sublab_participants") -``` - - -```{r data-import, echo = FALSE, include = FALSE} -rawdata <- read.csv(file = "raw/Subjektivitt_der_Arbeit_Pretest_Uni_Hohenheim_STUDENTS.csv", - header = TRUE, - sep = ";", - na.strings = "", - stringsAsFactors = FALSE) - -#fix time -datetime <- strptime(x = rawdata$datetime, format = "%Y-%m-%d %H:%M:%S", tz = "") - -# fix durations -durations <- rawdata[,c("dur0", "dur1", "dur2", "dur3", "dur4", "dur5")] -colnames(durations) <- c("dur_info", "dur_roughsort", "dur_mainsort", "dur_checksort", "dur_extstat", "dur_survey") -durations <- durations/60 # make this into minutes - - -# fix survey data ==== - -# Geschlecht -gender <- rawdata[, "form0"] -gender <- factor(x = gender, - levels = c(0:2), - labels = c("männlich", "weiblich", "andere"), - exclude = 3) - -# Wie alt sind Sie? -alter <- rawdata[, "form1"] -alter <- ordered(x = alter, - levels = c(0:4), - exclude = 5, - labels = c("jünger als 16", "17 bis 18", "19 bis 21", "22 bis 24", "24 oder älter")) - - -# Welche Ausbildungsformen haben Sie absolviert? (Mehrfachnennungen möglich) -dimension <- rawdata[, "form2"] -dimension <- factor(x = dimension, - levels = c(0:1), - labels = c("wünschenswert", "wahrscheinlich")) -dimension - -teilnehmer <- rawdata[, "form3"] - -# fix q data ========== -# let's first get the statements and give them proper handles -# library(XML) -# qset <- xmlParse(file = "tool/settings/statements.xml") -# qset <- xmlToList(node = qset, addAttributes = FALSE, simplify = TRUE) -# qset <- data.frame(item = unlist(qset), item_id = 1:length(qset)) -# qset$item_handle <- NA -# write.csv(x = qset, file = "qset.csv") -sublab$items - -qdata <- rawdata[,3:43] -colnames(qdata) <- sublab$items$item_handle_english - -# now put everything in the right order and back in place ==== -sublab$clean <- cbind(teilnehmer, - datetime, - alter, - gender, - dimension, - qdata, - durations, - deparse.level = 2, - stringsAsFactors = FALSE) - - -# kill early tests (there happen to be none) -# starttime was provided by email from VWN -starttime <- strptime(x = "2016-07-15 10:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "") -sublab$clean <- sublab$clean[sublab$clean$datetime > starttime, ] - -# cleanup ==== -#rm(list = ls()[!ls()=="vwn"]) - -# separate survey and Q === -sublab$q$all <- sublab$clean[, 6:46] -sublab$q$all$teilnemer <- sublab$clean$teilnehmer -sublab$q$wunsch <- sublab$q$all[sublab$clean$dimension == "wünschenswert",] -sublab$q$wahrsch <- sublab$q$all[sublab$clean$dimension == "wahrscheinlich",] -rownames(sublab$q$wunsch) <- sublab$q$wunsch$teilnemer -sublab$q$wunsch$teilnemer <- NULL -rownames(sublab$q$wahrsch) <- sublab$q$wahrsch$teilnemer -sublab$q$wahrsch$teilnemer <- NULL - -sublab$q$wunsch <- t(sublab$q$wunsch) -sublab$q$wahrsch <- t(sublab$q$wahrsch) - -str(sublab$q$wunsch) -qdata3d <- array(data = c(sublab$q$wunsch, sublab$q$wahrsch), dim = c(nrow(sublab$q$wunsch), ncol(sublab$q$wahrsch), 2), dimnames = list(items = rownames(sublab$q$wahrsch), people = colnames(sublab$q$wahrsch), condition = c("desirable", "probable"))) -save(qdata3d, file = "qdata3d.Rdata") -str(qdata3d) - -# extract comments feedback === -rawdata[["comment8"]] -rawdata[["comment18"]] -feedback <- qdata3d -feedback[,,] <- NA -feedback[,,"positive"] -str(feedback) - - - -dim(qdata3d) -qdata3d -``` - -## Q Analysis - -### Q Descriptives - -We first consider simply the correlations of the Q-datamatrix, that is, the correlations *between people-variables* across *item_cases*. -To at least approximate the ordinal (not interval) nature of the Q data collection, especially under a *forced distribution*, the correlations are calculated using Spearman's $\rho$, not the customary Pearson's $\rho$. - -```{r cors, include = FALSE, echo=FALSE} -cor <- NULL -cor$wunsch <- cor(x = sublab$q$wunsch, method = "spearman") -cor$wahrsch <- cor(x = sublab$q$wahrsch, method = "spearman") -df <- NULL -df$wunsch <- data.frame(cors = cor$wunsch[upper.tri(x = cor$wunsch, diag = FALSE)]) -df$wahrsch <- data.frame(cors = cor$wahrsch[upper.tri(x = cor$wahrsch, diag = FALSE)]) -``` - -The correlations, summarized in the below histogram are, on average, relatively high by Q standards, with a mean of around `r median(df$cors)` and a median around `r mean(df$cords)`, though there are quite few high correlations, indicating broadly shared, though not closely similar patterns. -Typical for some Q data, there are also less negative correlations than positive correlations, indicating that people do *not*, on the surface, hold polar opposite as beliefs. - -```{r correlations, fig.cap="Q-Korrelationen zwischen den Teilnehmenden", fig.height=25, fig.width=25} -heatmap <- NULL -heatmap$wunsch <- q.corrplot(corr.matrix = cor$wunsch, quietly = TRUE) + ggtitle("Wünschenswert") -heatmap$wahrsch <- q.corrplot(corr.matrix = cor$wahrsch, quietly = TRUE) + ggtitle("Wahrscheinlich") -library(gridExtra) -do.call(what = "grid.arrange", args = heatmap) -``` - -```{r liveplot} -wunsch <- sublab$q$wunsch -colnames(wunsch)[!colnames(wunsch) %in% c("Bernadette", "Jan", "Christiane")] <- paste("Student", LETTERS[1:16]) - -rawnew <- read.csv(file = "raw/Subjektivitt_der_Arbeit_Pretest_Uni_Hohenheim_1_0.csv", header = TRUE, stringsAsFactors = FALSE, sep = ";") -new <- rawnew[, 3:43] -new <- t(new) -#str(wunsch) # 19 participants -wunsch <- cbind(wunsch, new) -colnames(wunsch)[20:ncol(wunsch)] <- rawnew[,47] -wunsch <- wunsch[,colnames(wunsch) != "MartinTest"] - -str(sublab$items) -qset <- sublab$items[, c("item_id", "item_german")] -qset <- as.matrix(qset) -rownames(qset) <- sublab$items$item_handle_german -lookup <- as.matrix(qset[,1]) -lookup <- as.integer(lookup) -lookup <- as.matrix(lookup) - -byhand <- import.q.sorts(q.sorts.dir = "semiraw", q.set = qset, manual.lookup = lookup, header = TRUE) - - -#quartz(title = "Cors") -dev.set(which = 2) -q.corrplot(corr.matrix = cor(wunsch, method = "spearman")) + ggtitle("Ähnlichkeit der Subjektivität der Teilnehmenden") -#quartz(title = "NFac") -dev.set(which = 3) -nfac <- q.nfactors(dataset = sublab$q$wahrsch, cutoff = 9, cor.method = "spearman", siglevel = 0.01) -nfac$screeplot + ggtitle("Kriterien zur Faktorgüte (Screeplot und Parallelanalyse)") -rownames(wunsch) <- sublab$items$item_german -result <- qmethod(dataset = wunsch, nfactors = 3, rotation = "varimax", forced = TRUE, cor.method = "spearman", threshold = "none", allow.confounded = TRUE) -result <- q.fnames(results = result, fnames = c("Idealtyp_Rot", "Idealtyp_Blau", "Idealtyp_Gruen")) -result <- q.fcolors(results = result) - -#quartz(title = "Loas") -dev.set(which = 4) -#q.loaplot(results = result, density = TRUE, rug = TRUE) -rotblau <- ggplot(data = result$loa[,1:2], mapping = aes(x = Idealtyp_Rot, y = Idealtyp_Blau, label = rownames(result$loa))) + geom_point(color = "black") + geom_text_repel() + geom_density2d() + xlim(-1,1) + ylim(-1,1) + theme(axis.text.x = element_text(colour = result$brief$fcolors[1])) + theme(axis.text.y = element_text(colour = result$brief$fcolors[2])) -blaugruen <- ggplot(data = result$loa[,2:3], mapping = aes(x = Idealtyp_Blau, y = Idealtyp_Gruen, label = rownames(result$loa))) + geom_point(color = "black") + geom_text_repel() + geom_density2d() + xlim(-1,1) + ylim(-1,1) + theme(axis.text.x = element_text(colour = result$brief$fcolors[2])) + theme(axis.text.y = element_text(colour = result$brief$fcolors[3])) -blaugruen -rotgruen <- ggplot(data = result$loa[,c(1,3)], mapping = aes(x = Idealtyp_Rot, y = Idealtyp_Gruen, label = rownames(result$loa))) + geom_point(color = "black") + geom_text_repel() + geom_density2d() + xlim(-1,1) + ylim(-1,1) + theme(axis.text.x = element_text(colour = result$brief$fcolors[1])) + theme(axis.text.y = element_text(colour = result$brief$fcolors[3])) -rotgruen -comps <- q.compplot(results = result) -grid.arrange(rotblau, blaugruen, rotgruen, comps) - -dev.set(5) -#quartz(title = "Scores") -rot <- q.scoreplot.ord(results = result, factor = 1, extreme.labels = c("Nicht Wünschenswert", "Wünschenswert"), incl.qdc = TRUE, label.scale = 700) -blau <- q.scoreplot.ord(results = result, factor = 2, extreme.labels = c("Nicht Wünschenswert", "Wünschenswert"), incl.qdc = TRUE, label.scale = 700) -gruen <- q.scoreplot.ord(results = result, factor = 3, extreme.labels = c("Nicht Wünschenswert", "Wünschenswert"), incl.qdc = TRUE, label.scale = 700) -grid.arrange(rot, blau, gruen) -``` - - -```{r cor-distro, fig.cap="Verteilung der Q-Korrelationen", warning = FALSE, echo = FALSE, warning=FALSE} -corplot <- NULL -corplot$wunsch <- ggplot(data = df$wunsch, mapping = aes(x = cors)) + geom_histogram(binwidth = 0.1) + ggtitle("Wünschenswert") + xlim(-1,1) -corplot$wahrsch <- ggplot(data = df$wahrsch, mapping = aes(x = cors)) + geom_histogram(binwidth = 0.1) + ggtitle("Wahrscheinlich") + xlim(-1,1) -library(gridExtra) -do.call(what = "grid.arrange", args = corplot) - run_parallel(data = sublab$q$wunsch, centile = .95, runs = 10000) -``` - -The same pattern is illustrated in the below correlation heatmap. -Notice that the names are randomly assigned, but consistent throughout this document. - - -## Factor Retention - -We now turn to the question of *how many* distinct viewpoints can be legitimately extracted from the data. -On a technical level, this is the factor retention choice. - -Notice that this a first step, on which all later results depend, especially when rotation is used (as would be expected). - - -```{r nfactors, fig.cap="Scree Plot with Parallel Analysis"} -source(file = "../pensieve/R/run_parallel.R") -source(file = "../pensieve/R/find_distro.R") -r95$wunsch <- run_parallel(data = sublab$q$wunsch, centile = .95, runs = 10000) -r95$wahrsch <- run_parallel(data = sublab$q$wahrsch, centile = .95, runs = 10000) - -nfac <- NULL -nfac$wunsch <- data.frame(Eigenvector = 1:ncol(sublab$q$wunsch), - r95 = r95$wunsch, - observed = prcomp(x = cor$wunsch, center = TRUE, scale. = TRUE)$sdev^2) -nfac$wunsch$corr <- nfac$wunsch$observed - nfac$wunsch$r95 - -nfac.melt <- NULL -nfac.melt$wunsch <- melt(data = nfac$wunsch, value.name = "Eigenvalue", id.vars = "Eigenvector") - -qscree <- NULL -g <- NULL -g <- ggplot(data = nfac.melt$wunsch, - mapping = aes(x = Eigenvector, y = Eigenvalue, color=variable)) + ggtitle("Wünschenswert") -g <- g + geom_line() -g <- g + labs(color = "Trajectory") -g <- g + scale_color_discrete(breaks = c("r95", - "observed", - "corr"), - labels = c("Random Data 95th Percentile", - "Observed Data", - "Corrected Data")) -g <- g + theme(legend.position = "bottom") -g <- g + scale_x_continuous(breaks = c(1:10), limits = c(1, 10)) -g <- g + ylim(0, ncol(sublab$q$wunsch)) -g <- g + geom_point() -g <- g + geom_hline(yintercept = 1, mapping = aes(linetype = "dotted"), show.legend = TRUE) -qscree$wunsch <- g + scale_linetype_manual(values=c("Kaiser-Guttman-Criterion" = "dotted")) - - -nfac$wahrsch <- data.frame(Eigenvector = 1:ncol(sublab$q$wahrsch), - r95 = r95$wahrsch, - observed = prcomp(x = cor$wahrsch, center = TRUE, scale. = TRUE)$sdev^2) -nfac$wahrsch$corr <- nfac$wahrsch$observed - nfac$wahrsch$r95 - -nfac.melt$wahrsch <- melt(data = nfac$wahrsch, value.name = "Eigenvalue", id.vars = "Eigenvector") - -nfac.melt$wahrsch - -g <- NULL -g <- ggplot(data = nfac.melt$wahrsch, - mapping = aes(x = Eigenvector, y = Eigenvalue, color=variable)) + ggtitle("Wahrscheinlich") -g <- g + geom_line() -g <- g + labs(color = "Trajectory") -g <- g + scale_color_discrete(breaks = c("r95", - "observed", - "corr"), - labels = c("Random Data 95th Percentile", - "Observed Data", - "Corrected Data")) -g <- g + theme(legend.position = "bottom") -g <- g + scale_x_continuous(breaks = c(1:10), limits = c(1, 10)) -g <- g + ylim(0, ncol(sublab$q$wahrsch)) -g <- g + geom_point() -g <- g + geom_hline(yintercept = 1, mapping = aes(linetype = "dotted"), show.legend = TRUE) -qscree$wahrsch <- g + scale_linetype_manual(values=c("Kaiser-Guttman-Criterion" = "dotted")) -do.call(what = "grid.arrange", args = qscree) -``` - -## Factor Interpretation Wünschenswert - -```{r q-analyis, echo=TRUE} -wunsch <- qmethod(dataset = sublab$q$wunsch, - nfactors = 1, - rotation = "none", - forced = TRUE, - cor.method = "spearman", - quietly = TRUE, - allow.confounded = TRUE, - threshold = "none") -wunsch$f_char$characteristics -``` - - -### Loadings - -```{r complot} -q.compplot(results = wunsch) -``` - - -### Factor Scores - -```{r factorplot, fig.width=12, fig.height=9, fig.cap="Ordinal Factor Scores for Factor 1"} -q.scoreplot.ord(results = wunsch, factor = "f1", incl.qdc = FALSE, quietly = TRUE, hyph.pattern = "de", hyphenate = FALSE, label.scale = 450) -``` - - -## Factor Interpretation Wahrscheinlich - -```{r q-analyis, echo=TRUE} -wahrsch <- qmethod(dataset = sublab$q$wahrsch, - nfactors = 3, - rotation = "quartimax", - forced = TRUE, - cor.method = "spearman", - quietly = TRUE, - allow.confounded = TRUE, - threshold = "none") -wahrsch <- q.fcolors(results = wahrsch) -wahrsch$f_char$characteristics -``` - - -### Loadings - -```{r q-loaplot, fig.cap="Distribution of Factor Loadings Across Groups"} -q.loaplot(results = wahrsch, names = TRUE, points = TRUE, alpha = 0.5, density = TRUE, grid = TRUE) -``` - -```{r complot} -q.compplot(results = wahrsch) -``` - - -### Factor Scores - -```{r factorplot, fig.width=12, fig.height=9, fig.cap="Ordinal Factor Scores for Factor 1"} -q.scoreplot.ord(results = wahrsch, factor = 3, incl.qdc = TRUE, quietly = TRUE, hyph.pattern = "de", hyphenate = FALSE, label.scale = 450) -``` - - -## Q Factor Loadings and Survey Results - -In the following, we look at any possible relationship between the factor loadings (here only one factor), and other variables measured in the survey part. -Notice that we're now in the "R-world" of statistics again, with people as cases, and items as variables. -The Q loadings used here are simply an indication to what degree the above-described factor is shared by some person, and thus becomes a (composite) variable in itself. - -The below analyses, though robust and simple, may not be considered fully appropriate for the data: - -1. Much of the data is logical or ordinally scaled, and it may not be meaningfully coerced to ratio-scaled data. -2. There may be a fair amount of multicollinearity and autocorrelation in the data (though the methods used below should be quite robust against that). -3. There are lot of missing values in the data, limiting the analysis. - In the below, we use all variables and observations, even when there are missing values (pairwise deletions). - -In the below, we use continuous methods, in effect treating the logical variables as dummies. -A proper analysis would use appropriate inferential tests (Chi-squared test etc.), but is unlikely to yield dramatically different results. - -Before we start, it will be helpful too look at the spread and central tendency of the variables in question. - - -```{r q-r-prep} -vwn$r$qloa <- qres$loa[[1]] # get loadings -vwn$rqanal <- vwn$r[,-c(1, 2, 43:51)] -vwn$rqanal <- sapply(X = vwn$rqanal, FUN = function(x) {as.numeric(x)}) -rownames(vwn$rqanal) <- rownames(vwn$r) - -rqsum <- data.frame( - sd = apply(X = vwn$rqanal, MARGIN = 2, FUN = "sd", na.rm = TRUE), - mean = apply(X = vwn$rqanal, MARGIN = 2, FUN = "mean", na.rm = TRUE), - min = apply(X = vwn$rqanal, MARGIN = 2, FUN = "min", na.rm = TRUE), - max = apply(X = vwn$rqanal, MARGIN = 2, FUN = "max", na.rm = TRUE) -) - -kable(rqsum) -``` - -```{r survey-descriptives, fig.width = 10, fig.height = 10, fig.cap="Binned Distributions of all R-Variables"} -rqanal.melt <- melt(vwn$rqanal) -colnames(rqanal.melt) <- c("Participant", "Variable", "Value") - -vars <- read.csv(file = "variables.csv", header = TRUE, stringsAsFactors = FALSE) - -rqanal.melt$Heading <- apply(X = rqanal.melt, MARGIN = 1, FUN = function(x) { - vars$Überschrift.Kurz[vars$Variable.Kurz == x["Variable"]] -}) - -g <- ggplot(data = rqanal.melt, mapping = aes(x = Value)) -g <- g + geom_histogram() -g <- g + facet_wrap(facets = ~ Variable, scales = "free_x") -g -``` - -The below correlations heatmap reveals very few considerable correlations. -There are some expected demographic associations (Alter, Betriebszugehörigkeit), as well as some autocorrelations (the division where people work as well as their level of education). -Different kinds of teamwork also appear to be correlated. - -There are some weak (negative) association between Q loadings and independent research on "Industry 4.0", though this could be within the margin of sampling error. - -```{r q-r-cor, fig.height=13, fig.width=13, fig.cap="Q Loadings and Survey Items Correlated"} -rqcor <- cor(vwn$rqanal, use = "pairwise.complete.obs") -q.corrplot(rqcor) -``` - -This result is borne out by a preliminary principal components analysis; Q loadings, in particular, are *not* strongly associated with any of the other variables. - -The lack of association is, perhaps, not very surprising given how little variance there is *on* the q loadings to begin with; this one viewpoint is broadly shared by many people, and there is *no* one who disagrees with it in particular (no negative loadings). -A correlate of these loadings would have to vary quite strongly, and systematically to yield a significant pattern. - -```{r q-pca, include = FALSE, eval = FALSE} -library(paran) -parres <- paran(mat = rqcor, iterations = 1000, n = nrow(vwn$r)) -sum(parres$Ev[1:3])/nrow(rqcor) - -library(psych) -rqpca <- principal(r = rqcor, nfactors = 4, rotate = "varimax") -rqpca$loadings -plot(x = rqpca) - -rqpca <- qmethod(dataset = vwn$rqanal, nfactors = 4, rotation = "varimax", forced = FALSE, distribution = c(1:nrow(vwn$rqanal)), cor.method = "pearson", threshold = "none", allow.confounded = TRUE, use = "pairwise.complete.obs") -help(principal) -principal() -str(vwn$rqanal) -help(qmethod) -str(rqcor) -``` - -```{r meanscomp, include = FALSE, eval = FALSE} -vwn$r -ddply(.data = vwn$r, .variables = colnames(vwn$r)[3:42], .fun = summarise, mean = mean(qloa)) -str(vwn$r) -mean(vwn$r$qloa) -``` - -### Selected Mean Q-Loadings per Group - -The below selected mean comparisons reveal *some*, albeit small differences in loadings. -In addition, some of the more pronounced differences are probably due to sampling error, as these divergent subgroups are often very small (N = 1-5). - -```{r meanscomp-sel} -kable(ddply(.data = vwn$r, .variables = "ausb_ausb", .fun = summarize, mean = mean(qloa))) -kable(ddply(.data = vwn$r, .variables = "ausb_stud", .fun = summarize, mean = mean(qloa))) -kable(ddply(.data = vwn$r, .variables = "bet_aktiv", .fun = summarize, mean = mean(qloa))) # only 3 "nevers"! -kable(ddply(.data = vwn$r, .variables = "infomed_dir", .fun = summarize, mean = mean(qloa))) # again, very few nie -kable(ddply(.data = vwn$r, .variables = "team_idisz", .fun = summarize, mean = mean(qloa))) # again, very few nie -kable(ddply(.data = vwn$r, .variables = "i40_bef", .fun = summarize, mean = mean(qloa))) # again, only 4 no -#summary(vwn$r$i40_bef) -kable(ddply(.data = vwn$r, .variables = "i40_bek", .fun = summarize, mean = mean(qloa))) # again, only 2 no -#summary(vwn$r$i40_bek) -``` - - -## Double checking - -```{r tkuse} -kable(summary(vwn$r[,c("tkuse_msg", "tkuse_ent", "tkuse_mobile", "tkuse_news", "tkuse_shop")])) -levels(vwn$r$tkuse_mobile) -``` - - -## References diff --git a/index.Rmd b/index.Rmd index 0984bf6..b091b9a 100644 --- a/index.Rmd +++ b/index.Rmd @@ -14,7 +14,9 @@ site: bookdown::bookdown_site always_allow_html: yes --- -# Concourse +# Method + +## Concourse ```{r setup, cache = FALSE, include = FALSE} source("setup.R") @@ -119,6 +121,15 @@ Bitte nennen Sie uns ein fehlendes Item, dass Sie für *nicht wahrscheinlich* ge \vspace{70pt} + +## Distribution + +```{r distro} +distro <- qmethod::make.distribution(nstat = nrow(concourse), max.bin = 5) +``` + +# Results + ## Data Import ```{r data-import} @@ -372,5 +383,74 @@ comments["live-2-work", peoplevars[peoplevars$userid == "7", "name"], ] <- "Inte comments["own-boss", peoplevars[peoplevars$userid == "9", "name"], ] <- "bezogen auf die Arbeit oder mein Leben?" # cleanup -rm(list = ls()[!(ls() %in% c("peoplevars", "comments", "concourse", "sorts"))]) +rm(list = ls()[!(ls() %in% c("peoplevars", "comments", "concourse", "sorts", "distro"))]) + +# fix names +dimnames(sorts)$items <- lettercase::make_names(names = dimnames(sorts)$items) +dimnames(sorts) +concourse$handle <- lettercase::make_names(concourse$handle) +dimnames(sorts)$people <- lettercase::make_names(names = dimnames(sorts)$people) +peoplevars$name <- lettercase::make_names(names = peoplevars$name) +``` + +## Descriptives + +# Analysis + +## Q Descriptives + +We first consider simply the correlations of the Q-datamatrix, that is, the correlations *between people-variables* across *item_cases*. +To at least approximate the ordinal (not interval) nature of the Q data collection, especially under a *forced distribution*, the correlations are calculated using Spearman's $\rho$, not the customary Pearson's $\rho$. + +```{r kill-nas} +sorts <- sorts[, apply(X = sorts, MARGIN = 2, FUN = function(x) {!(any(is.na(x)))}), ] # kill any NAs +``` + +```{r cors, include = FALSE, echo=FALSE} +cor <- NULL +cor$des <- QCors(cors = cor(x = sorts[,,"desirable"], method = "spearman")) +cor$prob <- QCors(cors = cor(x = sorts[,,"probable"], method = "spearman")) +plot(cor$des, use_js = FALSE) + ggtitle("Ähnlichkeit der Subjektivität der Teilnehmenden (Wünschenswert)") +plot(cor$prob, use_js = FALSE) + ggtitle("Ähnlichkeit der Subjektivität der Teilnehmenden (Wahrscheinlich)") +``` + +Typical for some Q data, there are also less negative correlations than positive correlations, indicating that people do *not*, on the surface, hold polar opposite as beliefs. + +## Factor Extraction + +```{r paran, include = FALSE, eval = FALSE} +# pensieve::run_parallel(dataset = sorts[,,"desirable"]) # does not work b/c of non-integers +allsorts <- cbind(sorts[,,"desirable"], sorts[,,"probable"]) +paranres <- qmethod::q.nfactors(dataset = allsorts, cutoff = 10, siglevel = 0.05, iterations = 10000) +paranres$screeplot + ggtitle("Kriterien zur Faktorgüte (Screeplot und Parallelanalyse)") +paran::paran(x = allsorts, iterations = 10000, centile = 95) +``` + +```{r extraction} +res <- NULL +res$des <- qmethod::qmethod(dataset = sorts[,,"desirable"], nfactors = 2, rotation = "varimax", forced = FALSE, cor.method = "spearman", threshold = "none", allow.confounded = TRUE, quietly = TRUE, distribution = rep(x = c(min(sorts, na.rm = TRUE):max(sorts, na.rm = TRUE)), times = distro)) +res$prob <- qmethod::qmethod(dataset = sorts[,,"probable"], nfactors = 2, rotation = "varimax", forced = FALSE, cor.method = "spearman", threshold = "none", allow.confounded = TRUE, quietly = TRUE, distribution = rep(x = c(min(sorts, na.rm = TRUE):max(sorts, na.rm = TRUE)), times = distro)) +res$des <- qmethod::q.fnames(results = res$des, fnames = c("Idealtyp Rot (Wünschenswert)", "Idealtyp Blau (Wünschenswert)")) +res$des <- qmethod::q.fcolors(results = res$des) +res$prob <- qmethod::q.fnames(results = res$prob, fnames = c("Idealtyp Rot (Wahrscheinlich)", "Idealtyp Blau (Wahrscheinlich)")) +res$prob <- qmethod::q.fcolors(results = res$prob) +``` + +```{r loas} +plot(x = QLoas(loas = as.matrix(res$des$loa)), use_js = FALSE, by = "people", r2 = FALSE) +plot(x = QLoas(loas = as.matrix(res$prob$loa)), use_js = FALSE, by = "people", r2 = FALSE) +qmethod::q.loaplot(results = res$des) +qmethod::q.loaplot(results = res$prob) +``` + +```{r comp} +qmethod::q.compplot(results = res$des) +qmethod::q.compplot(results = res$prob) +``` + +```{r scores} +qmethod::q.scoreplot.ord(results = res$des, factor = 1, incl.qdc = FALSE, quietly = FALSE) +qmethod::q.scoreplot.ord(results = res$des, factor = 2, incl.qdc = FALSE, quietly = FALSE) +qmethod::q.scoreplot.ord(results = res$prob, factor = 1, incl.qdc = FALSE, quietly = FALSE) +qmethod::q.scoreplot.ord(results = res$prob, factor = 2, incl.qdc = FALSE, quietly = FALSE) ``` diff --git a/setup.R b/setup.R index 3505755..e1bbba2 100644 --- a/setup.R +++ b/setup.R @@ -10,8 +10,15 @@ library(grid) library(reshape2) library(readxl) library(stringr) -# install.packages("../pensieve", repos = NULL, type = "source") +library(plyr) +library(xtable) +library(pander) +if (interactive()) { + install.packages("../pensieve", repos = NULL, type = "source", INSTALL_opts = c('--no-lock')) + install.packages("../qmethod", repos = NULL, type = "source", INSTALL_opts = c('--no-lock')) +} library(pensieve) +library(qmethod) # knitr setup ==== #opts_knit$set(root.dir = normalizePath(getwd())) # make sure the knitr path is correct