Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

true paramete-rwise SBC #12

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 75 additions & 66 deletions R/2_Theta_bar_Y/DtrMngSep/DMSep_SBC.R
Original file line number Diff line number Diff line change
@@ -1,80 +1,89 @@
devtools::install_github("hyunjimoon/SBC",ref="api-variant")
library(SBC)
library(mice)
library(cmdstanr)
library(dplyr)
library(posterior)
library(ConnMatTools)
source(file.path(getwd(), "R/1_Y_bar_y/impute/engine_fail_imputation.R"))
source("R/utils/functions.r")
# Three simulators for prior, data, posterior
# 1. prior simulator
true <- as_tibble(read.csv("./data/DMSep_validation_trueparam.csv"))
# 2. data simulator
# DGP with truth-point benchmarking
# Metadata
N = 3069
T = 31
S = 3
P = 4
p21 <-true$x[true$X== "p21"] #0.1 #
p31 <- true$x[true$X== "p31"] #0.2 #
rate <- rep(NA, S);
for (j in 1:S) rate[j] <- true$x[true$X== sprintf("rate[%s]", j)]
true_pars <-list(p21, p31, rate)
source("R/2_Theta_bar_Y/DtrMngSep/DMsep_5param_test.R")
modelName = "DMSep"
modelName = "DMSep_inhomo"
Dir <- set_get_Dir(modelName, "~/Dropbox/21S_paper/defense-reliability/R/2_Theta_bar_Y/DtrMngSep")
DMSep = cmdstanr::cmdstan_model(Dir$file)

mice_imp <- generateMice()
imputed_data<- complete(mice_imp, 1)
imputed_data_gp <- read.csv("data/y_pred_5var.csv")[,-1]
imputed_data$y_data <- unlist(c(imputed_data_gp))
n_state = 3
initial_state = 1
generate_state_matrix <- function(data, n){
state <- cut(data, breaks=quantile(data,c(0,1/3,2/3,1)), labels=1:n, include.lowest = TRUE)
state<-as.numeric(state)
matrix(state,nrow=31)
}
state_matrix <- generate_state_matrix(imputed_data$y_data, n_state)
states <- as.vector(t(state_matrix))

iter=2000
MSE_df<-data.frame(index=rep(0,iter),p=rep(0,iter),q=rep(0,iter),train_MSE=rep(0,iter),test_MSE=rep(0,iter))
rate_array<-data.frame(period=c(),index=c(),rate=c())
D_array <- array(0, dim=c(iter,4,3,3))
ship_ind_df<-matrix(0,nrow=iter,ncol=5)

sampling_res <- gpImputeDMSepFit(1:99)
saveRDS(sampling_res, "sample_genstate_inhomo.RDS")
true_prival <- subset_draws(as_draws_rvars(sampling_res), chain = 1, iteration = seq(1, 981, by = 20))
# Three ways for prior input: truth-point bencmarking, hyperparameter-based truth-dist, sample-based truth-dist,
# The first only simulate with one set of value of parameter values while the second and the third simulate with
# different set of prior values. The second is when the modeler only knows the hyperparmeter of prior values while
# the third is when the modeler have exact n_datasets pairs of prior value in hand.

generator <- function(){
function(iter, true_pars){
# Metadata
N = 3069
T = 31
S = 3
P = 4
obs2time = complete(generateMice(), 1)$age_ind
initial_state = 1
init_state_vec = rep(0, S);
init_state_vec[initial_state] = 1
latent_states = array(NA, dim=c(T,S))
# Hyperparameter -> receive as input
p21 = true_pars[[1]]
p31 = true_pars[[2]]
rate = true_pars[[3]]
##############
# truth-dist. with custom priorval
##############
custom_nprior_generator <- function(true_prival){
n_datasets <- niterations(true_prival)
generated <- list()
N = 3069
T = 31
S = 3
P = 4
obs2time = complete(generateMice(), 1)$age_ind
initial_state = 1
init_state_vec = rep(0, S);
init_state_vec[initial_state] = 1
latent_states = matrix(rep(NA, T*S),nrow = T, ncol = S)
gen_states = array(NA, N)
rate_vec <- rep(NA, 3)
for(iter in 1:n_datasets){
true_prival_i <- subset_draws(true_prival, iteration = iter)
rate_i <- draws_of(true_prival_i$rate)
p21_i <- draws_of(true_prival_i$p21)
p31_i <- draws_of(true_prival_i$p31)
# Maintenance
Mnt = array(c(1, 0, 0, p21, 1- p21, 0, p31, 1-p31,0), dim=c(S,S))
Mnt = array(c(1, 0, 0, p21_i, 1- p21_i, 0, p31_i, 1-p31_i,0), dim=c(S,S))
# Deterioration
tmp_p <- array(rep(0, S * S), dim=c(S,S))
tmp_p[1,1] = exp(-rate[1]- rate[2]);
tmp_p[2,1] = rate[1] * exp(-rate[3]) * (1-exp(-(rate[1]+ rate[2] - rate[3]))) / (rate[1]+ rate[2] - rate[3]);
tmp_p[3,1] = exp(-rate[3]);
Dtr <- array(c(tmp_p[1,1], tmp_p[2,1], 1- tmp_p[1,1], 0, tmp_p[3,1], 1 - tmp_p[3,1], 0,0,1), dim=c(S, S))
latent_states[1,] = Dtr %*% init_state_vec;
tmp_p[1,1] <- exp(- rate_i[,1]- rate_i[,2])
tmp_p[2,1] <- rate_i[,1] * exp(- rate_i[,3]) * (1-exp(-( rate_i[,1] + rate_i[,2] - rate_i[,3]))) / ( rate_i[,1] + rate_i[,2] - rate_i[,3])
tmp_p[3,1] <- exp(- rate_i[,3])
Dtr <- array(c(tmp_p[1,1], tmp_p[2,1], 1- tmp_p[1,1] - tmp_p[2,1], 0, tmp_p[3,1], 1 - tmp_p[3,1], 0,0,1), dim=c(S, S))
latent_states[1,] <- Dtr %*% init_state_vec
for (t in 2:T){
latent_states[t,] = (Dtr %*% Mnt) %*% latent_states[t-1,];
latent_states[t,] <- (Dtr %*% Mnt) %*% latent_states[t-1,]
}
list(
generated = lapply(seq(1:length(obs2time)), function(n){sample(c(1,2,3), 1, prob = latent_states[obs2time[n],])}),
parameters = list(
p21 = p21,
p31 = p31,
rate = rate
)
)
gen_states <- unlist(lapply(seq(1:length(obs2time)), function(n){sample(c(1,2,3), 1, prob = latent_states[obs2time[n],])}))
generated[[iter]] <- list(N = N, T = T, S = S, P = P, states = gen_states, obs2time = obs2time, initial_state = initial_state)
}
SBC_datasets(
parameters = posterior::as_draws_matrix(posterior::draws_rvars(p21 = true_prival$p21, p31 = true_prival$p31, rate = true_prival$rate)),
generated <- generated
)
}

# 3. posterior simulator: prior and data simulator embedded in stan with supplied data
# 3-1. prior and data simulator
modelName = "DMSep"
Dir <- set_get_Dir(modelName, "R/2_Theta_bar_Y/DtrMngSep")
DMSep = cmdstanr::cmdstan_model(Dir$file)

# 3-2. simulated y data template with init as real y
obs2time <- complete(generateMice(), 1)$age_ind # data template only
states <- rep(3, N) # data template
initial_state <- 1
stan_data <- list(N= N,T = max(obs2time), S = 3, P = 4, states=states, obs2time=obs2time, initial_state=initial_state)
SBC_N = 1
SBC_M = 500 # 40
workflow <- SBCWorkflow$new(DMSep, generator())
workflow$simulate(SBC_N, true_pars)
stan_data$states <- unlist(posterior::draws_of(posterior::subset_draws(workflow$simulated_y, variable="y")$y))
write.csv(stan_data$states, file = "sim_y.csv")
summary(stan_data$states)
sampling_res<-DMSep$sample(data = stan_data,iter_warmup =1000, iter_sampling = 1000, parallel_chains = 4)
datasets <- custom_nprior_generator(true_prival)
backend <- cmdstan_sample_SBC_backend(DMSep, iter_warmup = 500, iter_sampling = 200)
results_10 <- compute_results(datasets, backend, thin_ranks = 10)
plot_ecdf_diff(results_10)
69 changes: 34 additions & 35 deletions R/2_Theta_bar_Y/DtrMngSep/DMsep_5param_test.R
Original file line number Diff line number Diff line change
@@ -1,48 +1,47 @@
source(file.path(getwd(), "R/1_Y_bar_y/impute/mice_imputation.R"))
scriptDir <- getwd()
set.seed(210106)
library(rstan)
library(cmdstanr)
library(ggplot2)
library(scales)

#cmdstan: model_DMsep <-stan_m(file = (file.path(getwd(), "R/2_Theta_bar_Y/DtrMngSep/models/DMSep/DMSep.stan")))
model_DMsep <-stan_model(file = (file.path(getwd(), "R/2_Theta_bar_Y/DtrMngSep/models/DMSep/DMSep.stan")))

mice_imp <- generateMice()
imputed_data<- complete(mice_imp, 1)

imputed_data_gp <- read.csv("data/y_pred_5var.csv")[,-1]
imputed_data$y_data <- unlist(c(imputed_data_gp))
#################################### DM_sep
# original policy (wihtout pm)
n_state = 3
initial_state = 1

generate_state_matrix <- function(data, n){
#state <- cut(data, breaks=c(0, 80, 160, max(data)), labels=1:n, include.lowest = TRUE)
state <- cut(data, breaks=quantile(data,c(0,1/3,2/3,1)), labels=1:n, include.lowest = TRUE)
state<-as.numeric(state)
matrix(state,nrow=31)
source(file.path(getwd(), "R/1_Y_bar_y/impute/mice_imputation.R"))
set.seed(210106)
gpImputeDMSepFit <- function(train_ship_ind){
#cmdstan: model_DMsep <-stan_m(file = (file.path(getwd(), "R/2_Theta_bar_Y/DtrMngSep/models/DMSep/DMSep.stan")))
model_DMsep <-stan_model(file = (file.path(getwd(), "R/2_Theta_bar_Y/DtrMngSep/models/DMSep/DMSep.stan")))
mice_imp <- generateMice()
imputed_data<- complete(mice_imp, 1)
imputed_data_gp <- read.csv("data/y_pred_5var.csv")[,-1]
imputed_data$y_data <- unlist(c(imputed_data_gp))
#################################### DM_sep
# original policy (wihtout pm)
n_state = 3
initial_state = 1
generate_state_matrix <- function(data, n){
#state <- cut(data, breaks=c(0, 80, 160, max(data)), labels=1:n, include.lowest = TRUE)
state <- cut(data, breaks=quantile(data,c(0,1/3,2/3,1)), labels=1:n, include.lowest = TRUE)
state<-as.numeric(state)
matrix(state,nrow=31)
}
state_matrix <- generate_state_matrix(imputed_data$y_data, n_state)
states <- as.vector(t(state_matrix))
iter=2000
MSE_df<-data.frame(index=rep(0,iter),p=rep(0,iter),q=rep(0,iter),train_MSE=rep(0,iter),test_MSE=rep(0,iter))
rate_array<-data.frame(period=c(),index=c(),rate=c())
D_array <- array(0, dim=c(iter,4,3,3))
ship_ind_df<-matrix(0,nrow=iter,ncol=5)
train_ind=c(sapply(train_ship_ind,function(x) (x-1)*31+(1:31)))
stan_data <- list(N= length(train_ind),T = max(imputed_data$age_ind[train_ind]), S = 3, P = 4, states=states[train_ind], obs2time=imputed_data$age_ind[train_ind], initial_state=initial_state)
sampling_res<-sampling(model_DMsep,stan_data, iter = 2000, cores=4)
return (sampling_res)
}

state_matrix <- generate_state_matrix(imputed_data$y_data, n_state)
states <- as.vector(t(state_matrix))

iter=2000
MSE_df<-data.frame(index=rep(0,iter),p=rep(0,iter),q=rep(0,iter),train_MSE=rep(0,iter),test_MSE=rep(0,iter))
rate_array<-data.frame(period=c(),index=c(),rate=c())
D_array <- array(0, dim=c(iter,4,3,3))
ship_ind_df<-matrix(0,nrow=iter,ncol=5)

test_ship_ind= c(17,20,24,77,82) #sort(sample(1:99,5)) #c(17,20,24,77,82)
test_ship_ind= c(17,20,24,77,82) #sort(sample(1:99,5)) = c(17,20,24,77,82)
ship_ind_df[1,]=test_ship_ind
test_ind=c(sapply(test_ship_ind,function(x) (x-1)*31+(1:31)))

train_data <- states[-test_ind]
test_data <- states[test_ind]
stan_data <- list(N= length(train_data),T = max(imputed_data$age_ind[-test_ind]), S = 3, P = 4, states=train_data, obs2time=imputed_data$age_ind[-test_ind], initial_state=initial_state)
sampling_res<-sampling(model_DMsep,stan_data, iter = 2000, cores=4)
#sort(sample(1:99,5)) #c(17,20,24,77,82)
gpImputeDMSepFit(c(17,20,24,77,82))

res_df<-as.data.frame(sampling_res)
sample_mean<-apply(res_df,2,mean) #write.csv(sample_mean, "./data/DMSep_validation_trueparam.csv")

Expand Down
24 changes: 16 additions & 8 deletions R/2_Theta_bar_Y/DtrMngSep/models/DMSep/DMSep.stan
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,23 @@ transformed parameters {
matrix[S, S] Dtr; // Deterioration
matrix[S, S] Mnt; // Maintenance
simplex[S] latent_states[T];
matrix[S, S] tmp_p;
real<lower = 0> tmp_p11;
real<lower = 0> tmp_p21;
real<lower = 0> tmp_p31;
// Maintenance
// is left stoch. matrix, transpose of eq.14 from the paper
Mnt = [[1, p21, p31],
[0, 1-p21, 1-p31],
[0, 0, 0]];
// Deterioration by period
// is left stoch. matrix, transpose of eq.13 from the paper
tmp_p[1,1] = exp(-rate[1]- rate[2]);
tmp_p[2,1] = rate[1] * exp(-rate[3]) * (1-exp(-(rate[1]+ rate[2] - rate[3]))) / (rate[1]+ rate[2] - rate[3]);
tmp_p[3,1] = exp(-rate[3]);
Dtr = [[tmp_p[1,1], 0, 0],
[tmp_p[2,1], tmp_p[3,1], 0],
[1 - tmp_p[1,1] - tmp_p[2,1], 1 - tmp_p[3,1], 1]];
// Inhomogenous Dtr
tmp_p11 = exp(-rate[1]- rate[2]);
tmp_p21 = rate[1] * exp(-rate[3]) * (1-exp(-(rate[1]+ rate[2] - rate[3]))) / (rate[1]+ rate[2] - rate[3]);
tmp_p31 = exp(-rate[3]);
Dtr = [[tmp_p11, 0, 0],
[tmp_p21, tmp_p31, 0],
[1 - tmp_p11 - tmp_p21, 1 - tmp_p31, 1]];
// (In) or homogenous Dtr
latent_states[1] = Dtr * initial;
for (t in 2:T){
latent_states[t] = (Dtr * Mnt) *latent_states[t-1]; //matrix_power((Dtr * Mnt), (t-1)) * initial;
Expand All @@ -54,3 +56,9 @@ model {
states[n] ~ categorical(latent_states[obs2time[n]]);
}
}
generated quantities{
int<lower=0, upper =S> gen_states[N];
for (n in 1:N){
gen_states[n] = categorical_rng(latent_states[obs2time[n]]);
}
}
71 changes: 71 additions & 0 deletions R/2_Theta_bar_Y/DtrMngSep/models/DMSep_inhomo/DMSep_inhomo.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
data {
int<lower=0> N; // numbmer of obs
int<lower=0> T; // number of time bins = max age
int<lower=1>S; // number of states
int<lower=1> P; // number of periods for inhomogenous dtr.
int<lower=0, upper =S> states[N]; //observed state
int obs2time[N]; // map obs to age
int<lower=0, upper=S> initial_state;
}

transformed data {
vector[S] initial;
vector<lower=0>[S] alpha;
vector<lower=0>[S] beta;
initial = rep_vector(0, S);
initial[initial_state] = 1;
alpha = rep_vector(3,3);
beta = rep_vector(3,3);
}

parameters {
real<lower=0> rate[P, S];
real<lower=0, upper=1> p21;
real<lower=0, upper=1> p31;
//real<lower=0, upper=1> r;
//simplex[K] theta[K]; // transit probs
}

transformed parameters {
matrix[S, S] Dtr[P]; // Deterioration
matrix[S, S] Dtr_pow[T]; // Deterioration
matrix[S, S] Mnt; // Maintenance
simplex[S] latent_states[T];
real<lower = 0> tmp_p11;
real<lower = 0> tmp_p21;
real<lower = 0> tmp_p31;
// Maintenance
// is left stoch. matrix, transpose of eq.14 from the paper
Mnt = [[1, p21, p31],
[0, 1-p21, 1-p31],
[0, 0, 0]];
// Deterioration by period
// is left stoch. matrix, transpose of eq.13 from the paper
for(p in 1:P){
tmp_p11 = exp(-rate[p,1]- rate[p,2]);
tmp_p21 = rate[p,1] * exp(-rate[p,3]) * (1-exp(-(rate[p,1]+ rate[p,2] - rate[p,3]))) / (rate[p,1]+ rate[p,2] - rate[p,3]);
tmp_p31 = exp(-rate[p,3]);
Dtr[p] = [[tmp_p11, 0, 0],
[tmp_p21, tmp_p31, 0],
[1 - tmp_p11 - tmp_p21, 1 - tmp_p31, 1]];
}
// (In) or homogenous Dtr
latent_states[1] = Dtr[1] * initial;
for (t in 2:T){
if (t <= 8){latent_states[t] = Dtr[1] * Mnt * latent_states[t-1];}
else if (t <=20){latent_states[t] = Dtr[2] * Mnt * latent_states[t-1];}
else if (t <=26){latent_states[t] = Dtr[3] * Mnt * latent_states[t-1];}
else{latent_states[t] = Dtr[4] * Mnt * latent_states[t-1];}
}
}
model {
for (n in 1:N){
states[n] ~ categorical(latent_states[obs2time[n]]);
}
}
generated quantities{
int<lower=0, upper =S> gen_states[N];
for (n in 1:N){
gen_states[n] = categorical_rng(latent_states[obs2time[n]]);
}
}
Loading