-
Notifications
You must be signed in to change notification settings - Fork 0
/
process_results.R
107 lines (82 loc) · 2.96 KB
/
process_results.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
## Process and combine exceedence data to use for analysis
library(dplyr)
library(tidyr)
library(reshape2)
library(scales)
library(fitdistrplus)
library(ggplot2)
## NOTE: In this codebase, s1 and s2 refer to the verification and ensemble correlation lengths (respectively)
# Collect data ------------------------------------------------------------
nam <- "data/simulation_results/exceed_dat_s"
s_1 <- seq(1,4,0.5)
tau <- seq(0,4,0.5)
N <- 5000
## store verification fte rank data from all simulations
rank_arr <- array(dim = c(N, 11, 9, 7))
## Compute rank of an observation's mean in the distribution
## of the ensemble means
rank_obs <- function(means) {
# means: observation and ensemble means with obs listed first
if(length(unique(means)) == 1) {
# disregard as random
return(NA)
}
r <- rank(means, ties.method = "random")[[1]]
return(r)
}
for (ii in 1:length(s_1)) {
print(paste("s1 = ", s_1[ii]))
s_2 <- seq(0.5*s_1[ii],1.5*s_1[ii],0.1*s_1[ii])
## init rank_cube array
rank_cube <- array(dim = c(N, 11, 9))
## read in data
load(paste(nam, s_1[ii], ".RData", sep=""))
## loop over s_2
for (jj in 1:length(s_2)) {
## loop over tau
for (t in 1:length(tau)) {
## rank each realization
rank_cube[,jj,t] <- apply(arr_dat[,,t,jj], 1, rank_obs)
}
}
## fill in ranks array
rank_arr[,,,ii] <- rank_cube
rm(arr_dat, rank_cube)
}
# Reformat data -----------------------------------------------------------
## flatten data
rank_tab <- melt(rank_arr, value.name='rank',
varnames=c('N', 's2_idx', 'tau_idx', 's1_idx')) %>%
mutate(.,
s1=rescale(s1_idx, to=c(1,4)),
s2=rescale(s2_idx, to=c(0.5,1.5))*s1,
tau=rescale(tau_idx, to=c(0,4))
) %>%
dplyr::select(., s1, s2, tau, N, rank)
## fit beta parameters to rank hists after disaggregation
set.seed(10)
spread_rank <- function(r) {
return(runif(1, r-1/24, r+1/24))
}
cont_fit_tab <- rank_tab %>%
drop_na() %>%
mutate(rank = (rank-0.5)/12, ratio=s2/s1) %>%
mutate(rank = sapply(rank, spread_rank)) %>%
group_by(s1,ratio,tau) %>%
summarise(params=paste(fitdist(rank,'beta')$estimate, collapse=" ")) %>%
separate(params, c('a', 'b'), sep=" ") %>%
mutate(a=as.numeric(a), b=as.numeric(b))
## separate each ratio group into seperate table with cols (ratio, tau, rank)
# this will be used for bootstrap resampling in fig 5
set.seed(10)
listed_ratio_ranks <- rank_tab %>%
drop_na() %>%
mutate(rank = (rank-0.5)/12, ratio=s2/s1) %>%
filter(s1==2, ratio==0.5 | ratio==0.9 | ratio==1.0 | ratio == 1.1 | ratio==1.5) %>%
dplyr::select(c(ratio, tau, rank)) %>%
mutate(rank = sapply(rank, spread_rank)) %>%
group_split(ratio)
# Save to data folder -----------------------------------------------------
write.table(rank_tab, file='data/rank_tab.RData')
write.table(cont_fit_tab, file='data/cont_fit_tab.RData')
save(listed_ratio_ranks, file='data/listed_ratio_ranks.RData')