From 7b9dc778a88c8d6c8365ea5a1a5788bb0d7af651 Mon Sep 17 00:00:00 2001
From: Maximilian Held <info@maxheld.de>
Date: Tue, 3 Oct 2017 18:54:02 +0200
Subject: [PATCH] first run at complete analysis, delete legacy script, as per
 #28

---
 DESCRIPTION |  10 +-
 _sublab.Rmd | 518 ----------------------------------------------------
 index.Rmd   |  84 ++++++++-
 setup.R     |   9 +-
 4 files changed, 98 insertions(+), 523 deletions(-)
 delete mode 100644 _sublab.Rmd

diff --git a/DESCRIPTION b/DESCRIPTION
index 8bdf0f5..05d9ac4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -20,6 +20,12 @@ Imports:
   shinyjs,
   rdrop2,
   shinyjqui,
-  rsconnect
+  rsconnect,
+  plyr,
+  xtable,
+  pander,
+  qmethod,
+  lettercase
 Remotes:
-  maxheld83/pensieve
+  maxheld83/pensieve,
+  maxheld83/qmethod
diff --git a/_sublab.Rmd b/_sublab.Rmd
deleted file mode 100644
index a2a5ec0..0000000
--- a/_sublab.Rmd
+++ /dev/null
@@ -1,518 +0,0 @@
----
-title: "VWN Analyse"
-author: "Maximilian Held"
-date: "11 June 2016"
-output: pdf_document
-library: held_library.bib
-header-includes: \usepackage[utf8]{inputenc}
----
-
-```{r setup, include=FALSE, echo=FALSE}
-knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, cache = TRUE)
-options(scipen = 1, digits = 2)
-```
-
-```{r preamble, echo =FALSE}
-library(plyr)
-library(babynames)
-library(stringr)
-library(reshape2)
-library(knitr)
-library(ggplot2)
-library(xtable)
-library(pander)
-install.packages(repos = NULL, type = "source", INSTALL_opts = c('--no-lock'), pkgs = c("../qmethod"))  # notice this is the LOCAL qmethod
-library(qmethod)
-install.packages(repos = NULL, type = "source", INSTALL_opts = c('--no-lock'), pkgs = c("../pensieve"))  # notice this is the LOCAL qmethod
-library(pensieve)
-```
-
-```{r import-items}
-sublab <- NULL
-sublab$items <- read.csv(file = "qset.csv", header = TRUE, stringsAsFactors = FALSE, sep = ";")
-rownames(sublab$items) <- sublab$items$item_handle_english
-```
-
-
-```{r find-distro}
-distro <- make.distribution(nstat = nrow(sublab$items), max.bin = 5)
-distro
-```
-
-```{r make-cards, include=FALSE, eval=FALSE, echo = FALSE}
-items_print <- as.matrix(sublab$items$item)
-rownames(items_print) <- sublab$items$item_handle
-colnames(items_print) <- "german"
-
-items_lookup <- as.matrix(as.character(sublab$items$item_id))
-rownames(items_lookup) <- sublab$items$item_handle
-colnames(items_lookup) <- "german"
-
-
-make.cards(q.set = items_print,
-           manual.lookup = items_lookup,
-           wording.font.size = "LARGE",
-           show.handles = FALSE,
-           babel.language = "ngerman",
-           duplex.double = TRUE,
-           output.pdf = TRUE,
-           file.name = "cards-sublab_participants")
-```
-
-
-```{r data-import, echo = FALSE, include = FALSE}
-rawdata <- read.csv(file = "raw/Subjektivitt_der_Arbeit_Pretest_Uni_Hohenheim_STUDENTS.csv",
-                    header = TRUE,
-                    sep = ";",
-                    na.strings = "",
-                    stringsAsFactors = FALSE)
-
-#fix time
-datetime <- strptime(x = rawdata$datetime, format = "%Y-%m-%d %H:%M:%S", tz = "")
-
-# fix durations
-durations <- rawdata[,c("dur0", "dur1", "dur2", "dur3", "dur4", "dur5")]
-colnames(durations) <- c("dur_info", "dur_roughsort", "dur_mainsort", "dur_checksort", "dur_extstat", "dur_survey")
-durations <- durations/60  # make this into minutes
-
-
-# fix survey data ====
-
-# Geschlecht
-gender <- rawdata[, "form0"]
-gender <- factor(x = gender,
-                 levels = c(0:2),
-                 labels = c("männlich", "weiblich", "andere"),
-                 exclude = 3)
-
-# Wie alt sind Sie?
-alter <- rawdata[, "form1"]
-alter <- ordered(x = alter,
-                 levels = c(0:4),
-                 exclude = 5,
-                 labels = c("jünger als 16", "17 bis 18", "19 bis 21", "22 bis 24", "24 oder älter"))
-
-
-# Welche Ausbildungsformen haben Sie absolviert? (Mehrfachnennungen möglich)
-dimension <- rawdata[, "form2"]
-dimension <- factor(x = dimension,
-                     levels = c(0:1),
-                     labels = c("wünschenswert", "wahrscheinlich"))
-dimension
-
-teilnehmer <- rawdata[, "form3"]
-
-# fix q data ==========
-# let's first get the statements and give them proper handles
-# library(XML)
-# qset <- xmlParse(file = "tool/settings/statements.xml")
-# qset <- xmlToList(node = qset, addAttributes = FALSE, simplify = TRUE)
-# qset <- data.frame(item = unlist(qset), item_id = 1:length(qset))
-# qset$item_handle <- NA
-# write.csv(x = qset, file = "qset.csv")
-sublab$items
-
-qdata <- rawdata[,3:43]
-colnames(qdata) <- sublab$items$item_handle_english
-
-# now put everything in the right order and back in place ====
-sublab$clean <- cbind(teilnehmer,
-                      datetime,
-                      alter,
-                      gender,
-                      dimension,
-                      qdata,
-                      durations,
-                      deparse.level = 2,
-                      stringsAsFactors = FALSE)
-
-
-# kill early tests (there happen to be none)
-# starttime was provided by email from VWN
-starttime <- strptime(x = "2016-07-15 10:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "")
-sublab$clean <- sublab$clean[sublab$clean$datetime > starttime, ]
-
-# cleanup ====
-#rm(list = ls()[!ls()=="vwn"])
-
-# separate survey and Q ===
-sublab$q$all <- sublab$clean[, 6:46]
-sublab$q$all$teilnemer <- sublab$clean$teilnehmer
-sublab$q$wunsch <- sublab$q$all[sublab$clean$dimension == "wünschenswert",]
-sublab$q$wahrsch <- sublab$q$all[sublab$clean$dimension == "wahrscheinlich",]
-rownames(sublab$q$wunsch) <- sublab$q$wunsch$teilnemer
-sublab$q$wunsch$teilnemer <- NULL
-rownames(sublab$q$wahrsch) <- sublab$q$wahrsch$teilnemer
-sublab$q$wahrsch$teilnemer <- NULL
-
-sublab$q$wunsch <- t(sublab$q$wunsch)
-sublab$q$wahrsch <- t(sublab$q$wahrsch)
-
-str(sublab$q$wunsch)
-qdata3d <- array(data = c(sublab$q$wunsch, sublab$q$wahrsch), dim = c(nrow(sublab$q$wunsch), ncol(sublab$q$wahrsch), 2), dimnames = list(items = rownames(sublab$q$wahrsch), people = colnames(sublab$q$wahrsch), condition = c("desirable", "probable")))
-save(qdata3d, file = "qdata3d.Rdata")
-str(qdata3d)
-
-# extract comments feedback ===
-rawdata[["comment8"]]
-rawdata[["comment18"]]
-feedback <- qdata3d
-feedback[,,] <- NA
-feedback[,,"positive"]
-str(feedback)
-
-
-
-dim(qdata3d)
-qdata3d
-```
-
-## Q Analysis
-
-### Q Descriptives
-
-We first consider simply the correlations of the Q-datamatrix, that is, the correlations *between people-variables*  across *item_cases*.
-To at least approximate the ordinal (not interval) nature of the Q data collection, especially under a *forced distribution*, the correlations are calculated using Spearman's $\rho$, not the customary Pearson's $\rho$.
-
-```{r cors, include = FALSE, echo=FALSE}
-cor <- NULL
-cor$wunsch <- cor(x = sublab$q$wunsch, method = "spearman")
-cor$wahrsch <- cor(x = sublab$q$wahrsch, method = "spearman")
-df <- NULL
-df$wunsch <- data.frame(cors = cor$wunsch[upper.tri(x = cor$wunsch, diag = FALSE)])
-df$wahrsch <- data.frame(cors = cor$wahrsch[upper.tri(x = cor$wahrsch, diag = FALSE)])
-```
-
-The correlations, summarized in the below histogram are, on average, relatively high by Q standards, with a mean of around `r median(df$cors)` and a median around `r mean(df$cords)`, though there are quite few high correlations, indicating broadly shared, though not closely similar patterns.
-Typical for some Q data, there are also less negative correlations than positive correlations, indicating that people do *not*, on the surface, hold polar opposite as beliefs.
-
-```{r correlations, fig.cap="Q-Korrelationen zwischen den Teilnehmenden", fig.height=25, fig.width=25}
-heatmap <- NULL
-heatmap$wunsch <- q.corrplot(corr.matrix = cor$wunsch, quietly = TRUE) + ggtitle("Wünschenswert")
-heatmap$wahrsch <- q.corrplot(corr.matrix = cor$wahrsch, quietly = TRUE) + ggtitle("Wahrscheinlich")
-library(gridExtra)
-do.call(what = "grid.arrange", args = heatmap)
-```
-
-```{r liveplot}
-wunsch <- sublab$q$wunsch
-colnames(wunsch)[!colnames(wunsch) %in% c("Bernadette", "Jan", "Christiane")] <- paste("Student", LETTERS[1:16])
-
-rawnew <- read.csv(file = "raw/Subjektivitt_der_Arbeit_Pretest_Uni_Hohenheim_1_0.csv", header = TRUE, stringsAsFactors = FALSE, sep = ";")
-new <- rawnew[, 3:43]
-new <- t(new)
-#str(wunsch)  # 19 participants
-wunsch <- cbind(wunsch, new)
-colnames(wunsch)[20:ncol(wunsch)] <- rawnew[,47]
-wunsch <- wunsch[,colnames(wunsch) != "MartinTest"]
-
-str(sublab$items)
-qset <- sublab$items[, c("item_id", "item_german")]
-qset <- as.matrix(qset)
-rownames(qset) <- sublab$items$item_handle_german
-lookup <- as.matrix(qset[,1])
-lookup <- as.integer(lookup)
-lookup <- as.matrix(lookup)
-
-byhand <- import.q.sorts(q.sorts.dir = "semiraw", q.set = qset, manual.lookup = lookup, header = TRUE)
-
-
-#quartz(title = "Cors")
-dev.set(which = 2)
-q.corrplot(corr.matrix = cor(wunsch, method = "spearman")) + ggtitle("Ähnlichkeit der Subjektivität der Teilnehmenden")
-#quartz(title = "NFac")
-dev.set(which = 3)
-nfac <- q.nfactors(dataset = sublab$q$wahrsch, cutoff = 9, cor.method = "spearman", siglevel = 0.01)
-nfac$screeplot + ggtitle("Kriterien zur Faktorgüte (Screeplot und Parallelanalyse)")
-rownames(wunsch) <- sublab$items$item_german
-result <- qmethod(dataset = wunsch, nfactors = 3, rotation = "varimax", forced = TRUE, cor.method = "spearman", threshold = "none", allow.confounded = TRUE)
-result <- q.fnames(results = result, fnames = c("Idealtyp_Rot", "Idealtyp_Blau", "Idealtyp_Gruen"))
-result <- q.fcolors(results = result)
-
-#quartz(title = "Loas")
-dev.set(which = 4)
-#q.loaplot(results = result, density = TRUE, rug = TRUE)
-rotblau <- ggplot(data = result$loa[,1:2], mapping = aes(x = Idealtyp_Rot, y = Idealtyp_Blau, label = rownames(result$loa))) + geom_point(color = "black") + geom_text_repel() + geom_density2d() + xlim(-1,1) + ylim(-1,1) + theme(axis.text.x = element_text(colour = result$brief$fcolors[1])) + theme(axis.text.y = element_text(colour = result$brief$fcolors[2]))
-blaugruen <- ggplot(data = result$loa[,2:3], mapping = aes(x = Idealtyp_Blau, y = Idealtyp_Gruen, label = rownames(result$loa))) + geom_point(color = "black") + geom_text_repel() + geom_density2d() + xlim(-1,1) + ylim(-1,1) + theme(axis.text.x = element_text(colour = result$brief$fcolors[2])) + theme(axis.text.y = element_text(colour = result$brief$fcolors[3]))
-blaugruen
-rotgruen <- ggplot(data = result$loa[,c(1,3)], mapping = aes(x = Idealtyp_Rot, y = Idealtyp_Gruen, label = rownames(result$loa))) + geom_point(color = "black") + geom_text_repel() + geom_density2d() + xlim(-1,1) + ylim(-1,1) + theme(axis.text.x = element_text(colour = result$brief$fcolors[1])) + theme(axis.text.y = element_text(colour = result$brief$fcolors[3]))
-rotgruen
-comps <- q.compplot(results = result)
-grid.arrange(rotblau, blaugruen, rotgruen, comps)
-
-dev.set(5)
-#quartz(title = "Scores")
-rot <- q.scoreplot.ord(results = result, factor = 1, extreme.labels = c("Nicht Wünschenswert", "Wünschenswert"), incl.qdc = TRUE, label.scale = 700)
-blau <- q.scoreplot.ord(results = result, factor = 2, extreme.labels = c("Nicht Wünschenswert", "Wünschenswert"), incl.qdc = TRUE, label.scale = 700)
-gruen <- q.scoreplot.ord(results = result, factor = 3, extreme.labels = c("Nicht Wünschenswert", "Wünschenswert"), incl.qdc = TRUE, label.scale = 700)
-grid.arrange(rot, blau, gruen)
-```
-
-
-```{r cor-distro, fig.cap="Verteilung der Q-Korrelationen", warning = FALSE, echo = FALSE, warning=FALSE}
-corplot <- NULL
-corplot$wunsch <- ggplot(data = df$wunsch, mapping = aes(x = cors)) + geom_histogram(binwidth = 0.1) + ggtitle("Wünschenswert") + xlim(-1,1)
-corplot$wahrsch <- ggplot(data = df$wahrsch, mapping = aes(x = cors)) + geom_histogram(binwidth = 0.1) + ggtitle("Wahrscheinlich") + xlim(-1,1)
-library(gridExtra)
-do.call(what = "grid.arrange", args = corplot)
- run_parallel(data = sublab$q$wunsch, centile = .95, runs = 10000)
-```
-
-The same pattern is illustrated in the below correlation heatmap.
-Notice that the names are randomly assigned, but consistent throughout this document.
-
-
-## Factor Retention
-
-We now turn to the question of *how many* distinct viewpoints can be legitimately extracted from the data.
-On a technical level, this is the factor retention choice.
-
-Notice that this a first step, on which all later results depend, especially when rotation is used (as would be expected).
-
-
-```{r nfactors, fig.cap="Scree Plot with Parallel Analysis"}
-source(file = "../pensieve/R/run_parallel.R")
-source(file = "../pensieve/R/find_distro.R")
-r95$wunsch <- run_parallel(data = sublab$q$wunsch, centile = .95, runs = 10000)
-r95$wahrsch <- run_parallel(data = sublab$q$wahrsch, centile = .95, runs = 10000)
-
-nfac <- NULL
-nfac$wunsch <- data.frame(Eigenvector = 1:ncol(sublab$q$wunsch),
-                   r95 = r95$wunsch,
-                   observed = prcomp(x = cor$wunsch, center = TRUE, scale. = TRUE)$sdev^2)
-nfac$wunsch$corr <- nfac$wunsch$observed - nfac$wunsch$r95
-
-nfac.melt <- NULL
-nfac.melt$wunsch <- melt(data = nfac$wunsch, value.name = "Eigenvalue", id.vars = "Eigenvector")
-
-qscree <- NULL
-g <- NULL
-g <- ggplot(data = nfac.melt$wunsch, 
-            mapping = aes(x = Eigenvector, y = Eigenvalue, color=variable)) + ggtitle("Wünschenswert")
-g <- g + geom_line() 
-g <- g + labs(color = "Trajectory")
-g <- g + scale_color_discrete(breaks = c("r95", 
-                                          "observed", 
-                                          "corr"),
-                              labels = c("Random Data 95th Percentile",
-                                          "Observed Data", 
-                                            "Corrected Data"))
-g <- g + theme(legend.position = "bottom")
-g <- g + scale_x_continuous(breaks = c(1:10), limits = c(1, 10))
-g <- g + ylim(0, ncol(sublab$q$wunsch))
-g <- g + geom_point()
-g <- g + geom_hline(yintercept = 1, mapping = aes(linetype = "dotted"), show.legend = TRUE)
-qscree$wunsch <- g + scale_linetype_manual(values=c("Kaiser-Guttman-Criterion" = "dotted"))
-
-
-nfac$wahrsch <- data.frame(Eigenvector = 1:ncol(sublab$q$wahrsch),
-                   r95 = r95$wahrsch,
-                   observed = prcomp(x = cor$wahrsch, center = TRUE, scale. = TRUE)$sdev^2)
-nfac$wahrsch$corr <- nfac$wahrsch$observed - nfac$wahrsch$r95
-
-nfac.melt$wahrsch <- melt(data = nfac$wahrsch, value.name = "Eigenvalue", id.vars = "Eigenvector")
-
-nfac.melt$wahrsch
-
-g <- NULL
-g <- ggplot(data = nfac.melt$wahrsch, 
-            mapping = aes(x = Eigenvector, y = Eigenvalue, color=variable)) + ggtitle("Wahrscheinlich")
-g <- g + geom_line() 
-g <- g + labs(color = "Trajectory")
-g <- g + scale_color_discrete(breaks = c("r95", 
-                                          "observed", 
-                                          "corr"),
-                              labels = c("Random Data 95th Percentile",
-                                          "Observed Data", 
-                                            "Corrected Data"))
-g <- g + theme(legend.position = "bottom")
-g <- g + scale_x_continuous(breaks = c(1:10), limits = c(1, 10))
-g <- g + ylim(0, ncol(sublab$q$wahrsch))
-g <- g + geom_point()
-g <- g + geom_hline(yintercept = 1, mapping = aes(linetype = "dotted"), show.legend = TRUE)
-qscree$wahrsch <- g + scale_linetype_manual(values=c("Kaiser-Guttman-Criterion" = "dotted"))
-do.call(what = "grid.arrange", args = qscree)
-```
-
-## Factor Interpretation Wünschenswert
-
-```{r q-analyis, echo=TRUE}
-wunsch <- qmethod(dataset = sublab$q$wunsch, 
-                  nfactors = 1, 
-                  rotation = "none", 
-                  forced = TRUE, 
-                  cor.method = "spearman", 
-                  quietly = TRUE, 
-                  allow.confounded = TRUE, 
-                  threshold = "none")
-wunsch$f_char$characteristics
-```
-
-
-### Loadings
-
-```{r complot}
-q.compplot(results = wunsch)
-```
-
-
-### Factor Scores
-
-```{r factorplot, fig.width=12, fig.height=9, fig.cap="Ordinal Factor Scores for Factor 1"}
-q.scoreplot.ord(results = wunsch, factor = "f1", incl.qdc = FALSE, quietly = TRUE, hyph.pattern = "de", hyphenate = FALSE, label.scale = 450)
-```
-
-
-## Factor Interpretation Wahrscheinlich
-
-```{r q-analyis, echo=TRUE}
-wahrsch <- qmethod(dataset = sublab$q$wahrsch, 
-                  nfactors = 3, 
-                  rotation = "quartimax", 
-                  forced = TRUE, 
-                  cor.method = "spearman", 
-                  quietly = TRUE, 
-                  allow.confounded = TRUE, 
-                  threshold = "none")
-wahrsch <- q.fcolors(results = wahrsch)
-wahrsch$f_char$characteristics
-```
-
-
-### Loadings
-
-```{r q-loaplot, fig.cap="Distribution of Factor Loadings Across Groups"}
-q.loaplot(results = wahrsch, names = TRUE, points = TRUE, alpha = 0.5, density = TRUE, grid = TRUE)
-```
-
-```{r complot}
-q.compplot(results = wahrsch)
-```
-
-
-### Factor Scores
-
-```{r factorplot, fig.width=12, fig.height=9, fig.cap="Ordinal Factor Scores for Factor 1"}
-q.scoreplot.ord(results = wahrsch, factor = 3, incl.qdc = TRUE, quietly = TRUE, hyph.pattern = "de", hyphenate = FALSE, label.scale = 450)
-```
-
-
-## Q Factor Loadings and Survey Results
-
-In the following, we look at any possible relationship between the factor loadings (here only one factor), and other variables measured in the survey part.
-Notice that we're now in the "R-world" of statistics again, with people as cases, and items as variables.
-The Q loadings used here are simply an indication to what degree the above-described factor is shared by some person, and thus becomes a (composite) variable in itself.
-
-The below analyses, though robust and simple, may not be considered fully appropriate for the data:
-
-1. Much of the data is logical or ordinally scaled, and it may not be meaningfully coerced to ratio-scaled data.
-2. There may be a fair amount of multicollinearity and autocorrelation in the data (though the methods used below should be quite robust against that).
-3. There are lot of missing values in the data, limiting the analysis.
-  In the below, we use all variables and observations, even when there are missing values (pairwise deletions).
-
-In the below, we use continuous methods, in effect treating the logical variables as dummies.
-A proper analysis would use appropriate inferential tests (Chi-squared test etc.), but is unlikely to yield dramatically different results.
-
-Before we start, it will be helpful too look at the spread and central tendency of the variables in question.
-
-
-```{r q-r-prep}
-vwn$r$qloa <- qres$loa[[1]]  # get loadings
-vwn$rqanal <- vwn$r[,-c(1, 2, 43:51)]
-vwn$rqanal <- sapply(X = vwn$rqanal, FUN = function(x) {as.numeric(x)})
-rownames(vwn$rqanal) <- rownames(vwn$r)
-
-rqsum <- data.frame(
-  sd = apply(X = vwn$rqanal, MARGIN = 2, FUN = "sd", na.rm = TRUE),
-  mean = apply(X = vwn$rqanal, MARGIN = 2, FUN = "mean", na.rm = TRUE),
-  min = apply(X = vwn$rqanal, MARGIN = 2, FUN = "min", na.rm = TRUE),
-  max = apply(X = vwn$rqanal, MARGIN = 2, FUN = "max", na.rm = TRUE)
-)
-
-kable(rqsum)
-```
-
-```{r survey-descriptives, fig.width = 10, fig.height = 10, fig.cap="Binned Distributions of all R-Variables"}
-rqanal.melt <- melt(vwn$rqanal)
-colnames(rqanal.melt) <- c("Participant", "Variable", "Value")
-
-vars <- read.csv(file = "variables.csv", header = TRUE, stringsAsFactors = FALSE)
-
-rqanal.melt$Heading <- apply(X = rqanal.melt, MARGIN = 1, FUN = function(x) {
-  vars$Überschrift.Kurz[vars$Variable.Kurz == x["Variable"]]
-})
-
-g <- ggplot(data = rqanal.melt, mapping = aes(x = Value))
-g <- g + geom_histogram()
-g <- g + facet_wrap(facets = ~ Variable, scales = "free_x")
-g
-```
-
-The below correlations heatmap reveals very few considerable correlations.
-There are some expected demographic associations (Alter, Betriebszugehörigkeit), as well as some autocorrelations (the division where people work as well as their level of education).
-Different kinds of teamwork also appear to be correlated.
-
-There are some weak (negative) association between Q loadings and independent research on "Industry 4.0", though this could be within the margin of sampling error.
-
-```{r q-r-cor, fig.height=13, fig.width=13, fig.cap="Q Loadings and Survey Items Correlated"}
-rqcor <- cor(vwn$rqanal, use = "pairwise.complete.obs")
-q.corrplot(rqcor)
-```
-
-This result is borne out by a preliminary principal components analysis; Q loadings, in particular, are *not* strongly associated with any of the other variables.
-
-The lack of association is, perhaps, not very surprising given how little variance there is *on* the q loadings to begin with; this one viewpoint is broadly shared by many people, and there is *no* one who disagrees with it in particular (no negative loadings).
-A correlate of these loadings would have to vary quite strongly, and systematically to yield a significant pattern.
-
-```{r q-pca, include = FALSE, eval = FALSE}
-library(paran)
-parres <- paran(mat = rqcor, iterations = 1000, n = nrow(vwn$r))
-sum(parres$Ev[1:3])/nrow(rqcor)
-
-library(psych)
-rqpca <- principal(r = rqcor, nfactors = 4, rotate = "varimax")
-rqpca$loadings
-plot(x = rqpca)
-
-rqpca <- qmethod(dataset = vwn$rqanal, nfactors = 4, rotation = "varimax", forced = FALSE, distribution = c(1:nrow(vwn$rqanal)), cor.method = "pearson", threshold = "none", allow.confounded = TRUE, use = "pairwise.complete.obs")
-help(principal)
-principal()
-str(vwn$rqanal)
-help(qmethod)
-str(rqcor)
-```
-
-```{r meanscomp, include = FALSE, eval = FALSE}
-vwn$r
-ddply(.data = vwn$r, .variables = colnames(vwn$r)[3:42], .fun = summarise, mean = mean(qloa))
-str(vwn$r)
-mean(vwn$r$qloa)
-```
-
-### Selected Mean Q-Loadings per Group
-
-The below selected mean comparisons reveal *some*, albeit small differences in loadings.
-In addition, some of the more pronounced differences are probably due to sampling error, as these divergent subgroups are often very small (N = 1-5).
-
-```{r meanscomp-sel}
-kable(ddply(.data = vwn$r, .variables = "ausb_ausb", .fun = summarize, mean = mean(qloa)))
-kable(ddply(.data = vwn$r, .variables = "ausb_stud", .fun = summarize, mean = mean(qloa)))
-kable(ddply(.data = vwn$r, .variables = "bet_aktiv", .fun = summarize, mean = mean(qloa)))  # only 3 "nevers"!
-kable(ddply(.data = vwn$r, .variables = "infomed_dir", .fun = summarize, mean = mean(qloa)))  # again, very few nie
-kable(ddply(.data = vwn$r, .variables = "team_idisz", .fun = summarize, mean = mean(qloa)))  # again, very few nie
-kable(ddply(.data = vwn$r, .variables = "i40_bef", .fun = summarize, mean = mean(qloa)))  # again, only 4 no
-#summary(vwn$r$i40_bef)
-kable(ddply(.data = vwn$r, .variables = "i40_bek", .fun = summarize, mean = mean(qloa)))  # again, only 2 no
-#summary(vwn$r$i40_bek)
-```
-
-
-## Double checking
-
-```{r tkuse}
-kable(summary(vwn$r[,c("tkuse_msg", "tkuse_ent", "tkuse_mobile", "tkuse_news", "tkuse_shop")]))
-levels(vwn$r$tkuse_mobile)
-```
-
-
-## References
diff --git a/index.Rmd b/index.Rmd
index 0984bf6..b091b9a 100644
--- a/index.Rmd
+++ b/index.Rmd
@@ -14,7 +14,9 @@ site: bookdown::bookdown_site
 always_allow_html: yes
 ---
   
-# Concourse
+# Method
+
+## Concourse
 
 ```{r setup, cache = FALSE, include = FALSE}
 source("setup.R")
@@ -119,6 +121,15 @@ Bitte nennen Sie uns ein fehlendes Item, dass Sie für *nicht wahrscheinlich* ge
 
 \vspace{70pt}
 
+
+## Distribution
+
+```{r distro}
+distro <- qmethod::make.distribution(nstat = nrow(concourse), max.bin = 5)
+```
+
+# Results
+
 ## Data Import
 
 ```{r data-import}
@@ -372,5 +383,74 @@ comments["live-2-work", peoplevars[peoplevars$userid == "7", "name"], ] <- "Inte
 comments["own-boss", peoplevars[peoplevars$userid == "9", "name"], ] <- "bezogen auf die Arbeit oder mein Leben?"
 
 # cleanup
-rm(list = ls()[!(ls() %in% c("peoplevars", "comments", "concourse", "sorts"))])
+rm(list = ls()[!(ls() %in% c("peoplevars", "comments", "concourse", "sorts", "distro"))])
+
+# fix names
+dimnames(sorts)$items <- lettercase::make_names(names = dimnames(sorts)$items)
+dimnames(sorts)
+concourse$handle <- lettercase::make_names(concourse$handle)
+dimnames(sorts)$people <- lettercase::make_names(names = dimnames(sorts)$people)
+peoplevars$name <- lettercase::make_names(names = peoplevars$name)
+```
+
+## Descriptives
+
+# Analysis
+
+## Q Descriptives
+
+We first consider simply the correlations of the Q-datamatrix, that is, the correlations *between people-variables*  across *item_cases*.
+To at least approximate the ordinal (not interval) nature of the Q data collection, especially under a *forced distribution*, the correlations are calculated using Spearman's $\rho$, not the customary Pearson's $\rho$.
+
+```{r kill-nas}
+sorts <- sorts[, apply(X = sorts, MARGIN = 2, FUN = function(x) {!(any(is.na(x)))}), ]  # kill any NAs
+```
+
+```{r cors, include = FALSE, echo=FALSE}
+cor <- NULL
+cor$des <- QCors(cors = cor(x = sorts[,,"desirable"], method = "spearman"))
+cor$prob <- QCors(cors = cor(x = sorts[,,"probable"], method = "spearman"))
+plot(cor$des, use_js = FALSE) + ggtitle("Ähnlichkeit der Subjektivität der Teilnehmenden (Wünschenswert)")
+plot(cor$prob, use_js = FALSE) + ggtitle("Ähnlichkeit der Subjektivität der Teilnehmenden (Wahrscheinlich)")
+```
+
+Typical for some Q data, there are also less negative correlations than positive correlations, indicating that people do *not*, on the surface, hold polar opposite as beliefs.
+
+## Factor Extraction
+
+```{r paran, include = FALSE, eval = FALSE}
+# pensieve::run_parallel(dataset = sorts[,,"desirable"])  # does not work b/c of non-integers
+allsorts <- cbind(sorts[,,"desirable"], sorts[,,"probable"])
+paranres <- qmethod::q.nfactors(dataset = allsorts, cutoff = 10, siglevel = 0.05, iterations = 10000)
+paranres$screeplot + ggtitle("Kriterien zur Faktorgüte (Screeplot und Parallelanalyse)")
+paran::paran(x = allsorts, iterations = 10000, centile = 95)
+```
+
+```{r extraction}
+res <- NULL
+res$des <- qmethod::qmethod(dataset = sorts[,,"desirable"], nfactors = 2, rotation = "varimax", forced = FALSE, cor.method = "spearman", threshold = "none", allow.confounded = TRUE, quietly = TRUE, distribution = rep(x = c(min(sorts, na.rm = TRUE):max(sorts, na.rm = TRUE)), times = distro))
+res$prob <- qmethod::qmethod(dataset = sorts[,,"probable"], nfactors = 2, rotation = "varimax", forced = FALSE, cor.method = "spearman", threshold = "none", allow.confounded = TRUE, quietly = TRUE, distribution = rep(x = c(min(sorts, na.rm = TRUE):max(sorts, na.rm = TRUE)), times = distro))
+res$des <- qmethod::q.fnames(results = res$des, fnames = c("Idealtyp Rot (Wünschenswert)", "Idealtyp Blau (Wünschenswert)"))
+res$des <- qmethod::q.fcolors(results = res$des)
+res$prob <- qmethod::q.fnames(results = res$prob, fnames = c("Idealtyp Rot (Wahrscheinlich)", "Idealtyp Blau (Wahrscheinlich)"))
+res$prob <- qmethod::q.fcolors(results = res$prob)
+```
+
+```{r loas}
+plot(x = QLoas(loas = as.matrix(res$des$loa)), use_js = FALSE, by = "people", r2 = FALSE)
+plot(x = QLoas(loas = as.matrix(res$prob$loa)), use_js = FALSE, by = "people", r2 = FALSE)
+qmethod::q.loaplot(results = res$des)
+qmethod::q.loaplot(results = res$prob)
+```
+
+```{r comp}
+qmethod::q.compplot(results = res$des)
+qmethod::q.compplot(results = res$prob)
+```
+
+```{r scores}
+qmethod::q.scoreplot.ord(results = res$des, factor = 1, incl.qdc = FALSE, quietly = FALSE)
+qmethod::q.scoreplot.ord(results = res$des, factor = 2, incl.qdc = FALSE, quietly = FALSE)
+qmethod::q.scoreplot.ord(results = res$prob, factor = 1, incl.qdc = FALSE, quietly = FALSE)
+qmethod::q.scoreplot.ord(results = res$prob, factor = 2, incl.qdc = FALSE, quietly = FALSE)
 ```
diff --git a/setup.R b/setup.R
index 3505755..e1bbba2 100644
--- a/setup.R
+++ b/setup.R
@@ -10,8 +10,15 @@ library(grid)
 library(reshape2)
 library(readxl)
 library(stringr)
-# install.packages("../pensieve", repos = NULL, type = "source")
+library(plyr)
+library(xtable)
+library(pander)
+if (interactive()) {
+  install.packages("../pensieve", repos = NULL, type = "source", INSTALL_opts = c('--no-lock'))
+  install.packages("../qmethod", repos = NULL, type = "source", INSTALL_opts = c('--no-lock'))
+}
 library(pensieve)
+library(qmethod)
 
 # knitr setup ====
 #opts_knit$set(root.dir = normalizePath(getwd()))  # make sure the knitr path is correct