-
Notifications
You must be signed in to change notification settings - Fork 0
/
bivariate_normal.R
375 lines (336 loc) · 12.7 KB
/
bivariate_normal.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
library(ggplot2)
library(ggthemes)
library(gridExtra)
library(MASS)
source("distributions.R")
source("util.R")
plot_prior <- function(priorFunc, scaling=1) {
epsilon <- 0.001
rho <- seq(-1 + epsilon, 1 - epsilon, by=0.001)
density <- scaling * priorFunc(rho)
df <- as.data.frame(cbind(rho, density))
gg <- ggplot(df, aes(x=rho, y=density)) +
theme_grey(base_size = 22) +
geom_line(size=1.2) +
xlab("Correlation") +
ylab("Density") +
ylim(0, 2)
gg
}
plot_PC_prior <- function(lambda_values=c(0.0818, 1.249, 5.360)) {
epsilon <- 0.001
rho <- seq(-1 + epsilon, 1 - epsilon, by=0.001)
if (length(lambda_values) != 3) {
return(FALSE)
}
density1 <- prior_PC(rho, lambda=lambda_values[1])
density2 <- prior_PC(rho, lambda=lambda_values[2])
density3 <- prior_PC(rho, lambda=lambda_values[3])
index <- rep(c(1, 2, 3), each=length(rho))
df <- as.data.frame(cbind(rho, c(density1, density2, density3), index))
colnames(df)[colnames(df) == "V2"] <- "densities"
legend_title1 <- expression(paste(" ", lambda, " = 0.0818"))
legend_title2 <- expression(paste(" ", lambda, " = 1.249"))
legend_title3 <- expression(paste(" ", lambda, " = 5.360"))
gg <- ggplot(df, aes(x=rho, y=densities, group=index)) +
theme_grey(base_size = 30) +
geom_line(size=1.1, aes(linetype = factor(index))) +
labs(
x = "Correlation",
y = "Density",
colour = "Priors"
) +
scale_linetype_manual(name="", labels = c(legend_title1, legend_title2, legend_title3),
values = c("solid", "twodash", "dotted")) +
theme(legend.position = "top") +
theme(legend.background = element_rect(fill="grey90",
size=0.5, linetype="solid")) +
theme(legend.key.size = unit(3,"line")) +
ylim(0, 3)
gg
}
plot_PC_prior_limit <- function(lambda_values=c(0.01, 0.001, 0.0001)) {
epsilon <- 0.0000001
rho <- seq(0.999, 1 - epsilon, by=epsilon)
if (length(lambda_values) != 3) {
return(FALSE)
}
density1 <- prior_PC(rho, lambda=lambda_values[1])
density2 <- prior_PC(rho, lambda=lambda_values[2])
density3 <- prior_PC(rho, lambda=lambda_values[3])
index <- rep(c(1, 2, 3), each=length(rho))
df <- as.data.frame(cbind(rho, c(density1, density2, density3), index))
colnames(df)[colnames(df) == "V2"] <- "densities"
legend_title1 <- expression(paste(" ", lambda, " = 0.01"))
legend_title2 <- expression(paste(" ", lambda, " = 0.001"))
legend_title3 <- expression(paste(" ", lambda, " = 0.0001"))
gg <- ggplot(df, aes(x=rho, y=densities, group=index)) +
theme_grey(base_size = 30) +
geom_line(size=1.1, aes(linetype = factor(index))) +
labs(
x = "Correlation",
y = "Density",
colour = "Priors"
) +
scale_linetype_manual(name="", labels = c(legend_title1, legend_title2, legend_title3),
values = c("solid", "twodash", "dotted")) +
theme(legend.position = "top") +
theme(legend.background = element_rect(fill="grey90",
size=0.5, linetype="solid")) +
theme(legend.key.size = unit(3,"line")) +
ylim(0, 6)
gg
}
plot_prior_reference <- function(numPoints, numReps, numSamples) {
df <- find_reference_prior_numerically(numPoints, numReps, numSamples)
df$J <- 2.45 * prior_J(df$rho)
gg <- ggplot(df) +
theme_grey(base_size = 30) +
geom_point(aes(x=rho, y=density), size=1.2) +
geom_line(aes(x=rho, y=J), size=1.2) +
xlab("Correlation") +
ylab("Density") +
ylim(0, 15)
gg
}
plot_priors <- function() {
epsilon <- 0.001
rho <- seq(-1 + epsilon, 1 - epsilon, by=0.001)
density_flat <- prior_flat(rho)
density_J <- prior_J(rho) / 2
density_PC1 <- prior_PC(rho, lambda=0.0818)
density_PC2 <- prior_PC(rho, lambda=1.249)
density_PC3 <- prior_PC(rho, lambda=5.360)
index <- rep(c(1, 2, 3, 4, 5), each=length(rho))
df <- as.data.frame(cbind(rho, c(density_flat, density_J, density_PC1, density_PC2, density_PC3), index))
colnames(df)[colnames(df) == "V2"] <- "densities"
legend_title1 <- expression(paste(" PC, ", lambda, " = 0.0818"))
legend_title2 <- expression(paste(" PC, ", lambda, " = 1.249"))
legend_title3 <- expression(paste(" PC, ", lambda, " = 5.360"))
gg <- ggplot(df, aes(x=rho, y=densities, group=index)) +
theme_grey(base_size = 26) +
geom_line(size=1.1, aes(linetype = factor(index))) +
labs(
x = "Correlation",
y = "Density",
colour = "Priors"
) +
scale_linetype_manual(name="", labels = c("Flat", "Jeffreys", legend_title1, legend_title2, legend_title3),
values = c("solid", "dashed", "dotdash", "twodash", "dotted")) +
theme(legend.position = "top") +
theme(legend.key.size = unit(4,"line")) +
ylim(0, 3)
gg
}
plot_posterior <- function(posteriorFunc, statistic, numData) {
rho <- seq(-1, 1, by=0.001)
if (length(statistic) > 2) {
density <- posteriorFunc(rho, statistic[1,1], statistic[1,2], numData) /
normalise_posterior(posteriorFunc, statistic[1, ], numData)
for (i in 2:length(statistic[, 1])) {
density <- density + posteriorFunc(rho, statistic[i, 1], statistic[i, 2], numData) /
normalise_posterior(posteriorFunc, statistic[i, ], numData)
}
density <- density / length(statistic[, 1])
}
else {
density <- posteriorFunc(rho, statistic[1], statistic[2], numData) /
normalise_posterior(posteriorFunc, statistic, numData)
}
df <- as.data.frame(cbind(rho, density))
gg <- ggplot(df, aes(x=rho, y=density)) +
theme_grey(base_size = 22) +
geom_line(size=1.2) +
xlab("Correlation") +
ylab("Density")
return(gg)
}
plot_posterior_PC <- function(statistic, numData, lambda_list) {
rho <- seq(-1, 1, by=0.001)
density <- array(NA, c(length(rho), 3))
for (j in 1:3) {
posteriorFunc <- function(x1, x2, x3, x4) posterior_PC(x1, x2, x3, x4, lambda=lambda_list[j])
density[, j] <- posteriorFunc(rho, statistic[1,1], statistic[1,2], numData) /
normalise_posterior(posteriorFunc, statistic[1, ], numData)
for (i in 2:length(statistic[, 1])) {
density[, j] <- density[, j] + posteriorFunc(rho, statistic[i, 1], statistic[i, 2], numData) /
normalise_posterior(posteriorFunc, statistic[i, ], numData)
}
}
density <- density / length(statistic[, 1])
index <- rep(c(0.0818, 1.249, 5.360), each=length(rho))
df <- as.data.frame(cbind(rho, c(density[, 1], density[, 2], density[, 3]), index))
colnames(df)[colnames(df) == "V2"] <- "densities"
gg <- ggplot(df, aes(x=rho, y=densities, group=index)) +
theme_grey(base_size = 22) +
geom_line(size=1.2, aes(linetype = factor(index))) +
xlab("Correlation") +
ylab("Density") +
scale_linetype_manual(name="PC priors", labels = c("legend_title1", "legend_title2", "legend_title3"),
values = c("solid", "twodash", "dotted")) +
theme(legend.position = c(.75, .8))
return(gg)
}
plot_posteriors_together <- function(gg_list) {
df1 <- gg_list[[1]]$data
df2 <- gg_list[[2]]$data
df3 <- gg_list[[3]]$data
df1$num_data <- rep(3, length(df1$rho))
df2$num_data <- rep(10, length(df1$rho))
df3$num_data <- rep(100, length(df1$rho))
df <- rbind(df1, df2, df3)
gg <- ggplot(data=df, aes(rho, density)) +
theme_grey(base_size = 30) +
geom_line(size=1.2) +
labs(
y="Density",
x="Correlation"
) +
#ylim(c(0.94, 0.96)) +
facet_wrap(~ num_data)
return(gg)
}
plot_posteriors_together_PC <- function(gg_list) {
df1 <- gg_list[[1]]$data
df2 <- gg_list[[2]]$data
df3 <- gg_list[[3]]$data
df1$num_data <- rep(3, length(df1$rho))
df2$num_data <- rep(10, length(df1$rho))
df3$num_data <- rep(100, length(df1$rho))
df <- rbind(df1, df2, df3)
legend_title1 <- expression(paste(" ", lambda, " = 0.0818"))
legend_title2 <- expression(paste(" ", lambda, " = 1.249"))
legend_title3 <- expression(paste(" ", lambda, " = 5.360"))
gg <- ggplot(data=df, aes(x=rho, y=densities, group=index)) +
theme_grey(base_size = 30) +
geom_line(size=1.2, aes(linetype = factor(index))) +
labs(
y="Density",
x="Correlation"
) +
scale_linetype_manual(name="", labels = c(legend_title1, legend_title2, legend_title3),
values = c("solid", "twodash", "dotted")) +
theme(legend.position = "top") +
theme(legend.background = element_rect(fill="grey90",
size=0.5, linetype="solid")) +
theme(legend.key.size = unit(3,"line")) +
facet_grid(. ~ num_data)
return(gg)
}
get_MLE <- function(statistic, numData) {
MLE <- optimize(function(x) likelihood(x, statistic[1], statistic[2], numData), interval=c(-1, 1), maximum=TRUE)
return(MLE$maximum)
}
get_MLE_distribution <- function(rho, numData, numIter) {
MLE_list <- rep(NA, numIter)
for (i in 1:numIter) {
statistic <- simulate_bivariate_normal(rho, numData)
MLE_list[i] <- get_MLE(statistic, numData)
}
return(MLE_list)
}
plot_MLE_distribution <- function(rho, numData, numIter) {
estimate_list <- data.frame(get_MLE_distribution(rho, numData, numIter))
colnames(estimate_list) <- c("value")
gg <- ggplot(estimate_list, aes(x=value)) +
geom_histogram(bins=50, aes(y=..density..)) +
xlab("Correlation") +
ylab("Density")
gg
}
get_empirical_with_known_variances <- function(rho, numData) {
data <- mvrnorm(numData, mu=c(0, 0), Sigma=matrix(c(1, rho, rho, 1), 2))
empirical <- sum(data[, 1] * data[, 2]) / numData
if (empirical > 1) return(1)
if (empirical < -1) return(-1)
return(empirical)
}
get_empirical_with_known_variances_distribution <- function(rho, numData, numIter) {
list <- rep(NA, numIter)
for (i in 1:numIter) {
list[i] <- get_empirical_with_known_variances(rho, numData)
}
return(list)
}
plot_empirical_with_known_variances_distribution <- function(rho, numData, numIter) {
estimate_list <- data.frame(get_empirical_with_known_variances_distribution(rho, numData, numIter))
colnames(estimate_list) <- c("value")
gg <- ggplot(estimate_list, aes(x=value)) +
geom_histogram(bins=50, aes(y=..density..)) +
xlab("Correlation") +
ylab("Density")
gg
}
get_empirical_with_known_means <- function(rho, numData) {
data <- mvrnorm(numData, mu=c(0, 0), Sigma=matrix(c(1, rho, rho, 1), 2))
empirical <- sum(data[, 1] * data[, 2]) / (sqrt(sum(data[, 1]^2) * sum(data[, 2]^2)))
return(empirical)
}
get_empirical_with_known_means_distribution <- function(rho, numData, numIter) {
list <- rep(NA, numIter)
for (i in 1:numIter) {
list[i] <- get_empirical_with_known_means(rho, numData)
}
return(list)
}
get_empirical <- function(rho, numData) {
data <- mvrnorm(numData, mu=c(0, 0), Sigma=matrix(c(1, rho, rho, 1), 2))
empirical <- sum((data[, 1]-mean(data[, 1])) * (data[, 2]-mean(data[, 2]))) /
(sqrt(sum((data[, 1]-mean(data[, 1]))^2) * sum((data[, 2]-mean(data[, 2]))^2)))
return(empirical)
}
get_empirical_distribution <- function(rho, numData, numIter) {
list <- rep(NA, numIter)
for (i in 1:numIter) {
list[i] <- get_empirical(rho, numData)
}
return(list)
}
plot_empirical_distribution <- function(rho, numData, numIter) {
estimate_list <- data.frame(get_empirical_distribution(rho, numData, numIter))
colnames(estimate_list) <- c("value")
gg <- ggplot(estimate_list, aes(x=value)) +
geom_histogram(bins=50, aes(y=..density..)) +
xlab("Correlation") +
ylab("Density")
gg
}
get_SLR_estimate <- function(rho, numData) {
data <- mvrnorm(numData, mu=c(0, 0), Sigma=matrix(c(1, rho, rho, 1), 2))
y <- data[, 1]
x <- data[, 2]
SLR_model <- lm(y ~ 0 + x)
return(as.numeric(SLR_model$coefficients))
}
get_SLR_estimate_distribution <- function(rho, numData, numIter) {
SLR_list <- rep(NA, numIter)
for (i in 1:numIter) {
SLR_list[i] <- get_SLR_estimate(rho, numData)
}
return(SLR_list)
}
plot_SLR_estimate_distribution <- function(rho, numData, numIter) {
estimate_list <- data.frame(get_SLR_estimate_distribution(rho, numData, numIter))
colnames(estimate_list) <- c("value")
gg <- ggplot(estimate_list, aes(x=value)) +
geom_histogram(bins=50, aes(y=..density..)) +
xlab("Correlation") +
ylab("Density")
gg
}
rho <- 0.5
numData <- c(3, 10, 100)
posteriorFunc <- posterior_flat
#posteriorFunc <- function(x1, x2, x3, x4) posterior_PC(x1, x2, x3, x4, lambda=0.0818)
gg <- list()
for (i in 1:length(numData)) {
#statistic <- simulate_bivariate_normal(rho, numData[i])
numPosteriors <- 1000
statistic <- array(numeric(), c(numPosteriors, 2))
for (j in 1:numPosteriors) {
statistic[j,] <- simulate_bivariate_normal(rho, numData[i])
}
#gg[[i]] <- eval(substitute(plot_posterior_PC(statistic, numData[i], c(0.0818, 1.249, 5.360)), list(i = i)))
gg[[i]] <- eval(substitute(plot_posterior(posteriorFunc, statistic, numData[i]), list(i = i)))
}
plot_posteriors_together(gg)