-
Notifications
You must be signed in to change notification settings - Fork 2
/
CES_ratings3_production.r
151 lines (122 loc) · 4.93 KB
/
CES_ratings3_production.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# Produce posterior scores and ranks for CES 2021
library(rethinking)
library(cmdstanr)
# raw anonymized ratings
dat_raw <- read.csv( "dat_anon.csv" )
# prep data input
dat_input <- list(
N=nrow(dat_raw),
n_talks=length(unique(dat_raw$talk)),
M=length(unique(dat_raw$judge)),
L=max(dat_raw[,3:4])-1,
Q=2,
y = dat_raw[,3:4],
jid = dat_raw$judge,
tid = dat_raw$talk,
weights=c(1,1) )
# count number of times each judge appears
table(dat_input$jid)
# count number of times each talk appears (number of judges rating each talk)
table(dat_input$tid)
# run model
m_ces <- cstan( file="model1.stan" , data=dat_input , chains=4 , cores=4 , iter=4000 )
# check chains
x <- precis(m_ces,3)
plot( x$n_eff , x$Rhat4 , xlab="n_eff" , ylab="Rhat (v4)" , lwd=2 , col=col.alpha(2,0.8) )
# optional line to manually label outliers
# identify( x$n_eff , x$Rhat4 , labels=rownames(x) )
# extract scores
post <- extract.samples(m_ces)
score <- apply(post$score,2:3,mean)
total_score_est <- apply( post$total_score , 2, mean )
N <- dat_input$n_talks
# compute ranks
rank_post <- sapply( 1:length(post$lp__) , function(i) rank( -post$total_score[i,] ) )
rank_mean <- apply( rank_post , 1 , mean )
# posterior means of each feature
features <- apply(post$score,2:3,mean)
# report
report <- data.frame( talk=1:dat_input$n_talks , rank=NA , score=NA , feature1=NA , feature2=NA )
report$rank <- rank_mean
report$score <- total_score_est
report$feature1 <- features[,1]
report$feature2 <- features[,2]
# sort report by rank
report_o <- report[ order( rank_mean ) , ]
write.csv( report_o , file="report.csv" , row.names=FALSE )
# save RData - reload with load("CES2021_talks.RData")
save.image(file = "CES2021_talks.RData")
#################################################################
##### PLOTS #####
# total score, sorted by rank
blank(w=2)
total_score_est <- apply( post$total_score , 2, mean )
rank_est <- rank( -total_score_est )
rbPal <- colorRampPalette(c('black',2))
talks_col <- rbPal(10)[as.numeric(cut(total_score_est,breaks = 10))]
talks_col <- matrix( talks_col , ncol=2 )
ci <- apply(post$total_score,2,PCI)
plot( rank_est , total_score_est , ylim=range(ci) , col=talks_col )
for ( i in 1:ncol(ci) ) lines( rep(rank_est[i],2) , ci[,i] , lwd=0.8 , col=talks_col[i] )
# posterior rank distribution
blank()
score_naive <- sapply( 1:60 , function(i) mean( unlist(dat_input$y[dat_input$tid==i,]) ) )
ci <- apply(rank_post,1,PCI)
plot( score_naive , rank_mean , ylim=range(ci) , col=2 , xlab="naive average score" , ylab="posterior rank" )
for ( i in 1:ncol(ci) ) lines( rep(score_naive[i],2) , ci[,i] , lwd=0.8 , col=2 )
# features
blank()
post <- extract.samples(m_ces)
score <- apply(post$score,2:3,mean)
plot( score[,1:2] , xlab="posterior mean feature 1" , ylab="posterior mean feature 2" , lwd=2 , col=2 , cex=1.5 )
# text( score[,1] , score[,2] , labels=1:N , cex=0.8 )
mtext( "features of each talk" )
######################
# supplemental plots
# naive average score approach
score_naive <- sapply( 1:60 , function(i) mean( unlist(dat_input$y[dat_input$tid==i,]) ) )
plot( score_naive , total_score_est , xlab="naive score" , ylab="posterior mean score" , lwd=2 , col=2 )
plot( rank( -score_naive ) , rank_mean , xlab="naive rank" , ylab="posterior mean rank" , lwd=2 , col=2 )
# distribution of raw scores
simplehist(dat_input$y , xlab="score" , col=c(1,2) )
text( 2 , 50 , "feature 1" )
text( 2 , 46 , "feature 2" , col=2 )
# show judge variation
# distribution of raw scores for specific judge
simplehist( dat_input$y[ dat_input$jid==1 , ] , xlab="score" , col=c(1,2) )
text( 2 , 50 , "feature 1" )
text( 2 , 46 , "feature 2" , col=2 )
# average judge cut-points
dcuts <- apply( post$dcuts , 2:3 , mean )
# covert to logit scale cut-points
lcuts <- matrix( NA , nrow=2 , ncol=6 )
lcuts[,1] <- dcuts[,1] # these are already logit scale
for ( j in 2:6 ) {
lcuts[,j] <- lcuts[,j-1] + exp( dcuts[,j] )
}
mu_cuts <- lcuts
#plot
blank()
plot( NULL , xlim=c(1,7) , ylim=c(0,1) , xlab="value" , ylab="cumulative probability" )
lines( 1:7 , c( inv_logit(lcuts[1,]) , 1 ) , lwd=2 )
lines( 1:7 , c( inv_logit(lcuts[2,]) , 1 ) , lwd=2 , col=2 )
# sample judge cut-points
blank()
plot( NULL , xlim=c(1,7) , ylim=c(0,1) , xlab="value" , ylab="cumulative probability" )
for ( i in 1:16 ) {
lcuts <- apply( post$cuts_jid[,i,,] , 2:3 , mean )
lines( 1:7 , c( inv_logit(lcuts[1,]) , 1 ) , lwd=1 )
#lines( 1:7 , c( inv_logit(lcuts[2,]) , 1 ) , lwd=1 , col=2 )
}
# again by individual judge
blank(ex=3.2)
par(mfrow=c(4,4))
for ( i in 1:16 ) {
plot( NULL , xlim=c(1,7) , ylim=c(0,1) , xlab="" , ylab="" )
lcuts <- apply( post$cuts_jid[,i,,] , 2:3 , mean )
lines( 1:7 , c( inv_logit(lcuts[1,]) , 1 ) , lwd=2 )
lines( 1:7 , c( inv_logit(lcuts[2,]) , 1 ) , lwd=2 , col=2 )
mtext( concat("Rater ",i) )
lines( 1:7 , c( inv_logit(mu_cuts[1,]) , 1 ) , lwd=0.5 , lty=2 )
#lines( 1:7 , c( inv_logit(mu_cuts[2,]) , 1 ) , lwd=0.5 , lty=2 , col=2 )
}