-
Notifications
You must be signed in to change notification settings - Fork 2
/
April2019.R
79 lines (54 loc) · 2.55 KB
/
April2019.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
####
#Companion code for April 2019 Biometrics Bits article on Bayes' theorem
#Brian Clough
####
library(brms)
library(ggplot2)
library(dplyr)
library(tidybayes)
#Simulate a simple dataset for one stand.
standMean <- 75
standSD <- 5
nPlots <- 100
bapaDraws <- rnorm(nPlots, mean = standMean, sd = standSD)
trainingData <- data.frame(plot_id = seq(1:nPlots), bapa = bapaDraws)
#Drop some no tally plots for training remote sensing models
tdNoZero <- trainingData %>%
filter(bapa > 0)
#Informed prior to simulate domain knowledge; sample_prior = 'only' uses only draws from the prior
#distribution. Note that in this example, we are setting the prior as the known mean and variance of
#the population we simulated above. Try setting it to some "wrong" priors and see what happens!
baPriors <- c(set_prior('normal(75, 5)', class = 'Intercept'))
dkOnlyFit <- brm('bapa ~ 1', data = tdNoZero, family = 'gaussian', prior = baPriors,
sample_prior = 'only', seed = 1234)
#A wide, flat prior allows only the data to influence predictions.
widePriors <- c(set_prior('normal(0, 100)', class = 'Intercept'))
invOnlyFit <- brm('bapa ~ 1', data = tdNoZero, family = 'gaussian', prior = widePriors,
seed = 1234)
#Fitted model to data using informed prior specifications.
bothFit <- brm('bapa ~ 1', data = tdNoZero, family = 'gaussian',
prior = baPriors, seed = 1234)
##Generate predictions and summarize.
dkOut <- data.frame(model = 'Experience only') %>%
add_predicted_draws(dkOnlyFit)
invOnlyOut <- data.frame(model = 'Data only') %>%
add_predicted_draws(invOnlyFit)
bothOut <- data.frame(model = 'Data & experience') %>%
add_predicted_draws(invOnlyFit)
plotData <- bind_rows(dkOut, invOnlyOut, bothOut) %>%
filter(.prediction > 0 & .prediction < 250)
plotData$model_f <- factor(plotData$model, levels=c('Experience only', 'Data only',
'Data & experience'))
fig2 <- ggplot(plotData, aes(x = .prediction)) + geom_histogram(col = 'dark green') + facet_wrap(~model_f) +
theme_bw() + xlab('Simulated basal area per acre') + ylab('Number of draws')
ggsave(fig2, file = '/tmp/figure_2.pdf')
statTab <- plotData %>%
group_by(model_f) %>%
summarise(
postMean = mean(.prediction),
postMedian = median(.prediction),
postSD = sd(.prediction),
postLower = quantile(.prediction, 0.05),
postUpper = quantile(.prediction, 0.9)
)