hyunjimoon · hyunjimoon · Aug 7, 2021 · Aug 11, 2021 · Aug 11, 2021 · Aug 12, 2021
diff --git a/R/2_Theta_bar_Y/DtrMngSep/DMSep_SBC.R b/R/2_Theta_bar_Y/DtrMngSep/DMSep_SBC.R
@@ -1,80 +1,89 @@
+devtools::install_github("hyunjimoon/SBC",ref="api-variant")
 library(SBC)
 library(mice)
 library(cmdstanr)
 library(dplyr)
+library(posterior)
+library(ConnMatTools)
 source(file.path(getwd(), "R/1_Y_bar_y/impute/engine_fail_imputation.R"))
 source("R/utils/functions.r")
-# Three simulators for prior, data, posterior
-# 1. prior simulator
-true <- as_tibble(read.csv("./data/DMSep_validation_trueparam.csv"))
-# 2. data simulator
-# DGP with truth-point benchmarking
-# Metadata
-N = 3069
-T = 31
-S = 3
-P = 4
-p21 <-true$x[true$X== "p21"]  #0.1 #
-p31 <- true$x[true$X== "p31"] #0.2 #
-rate <- rep(NA, S);
-for (j in 1:S) rate[j] <- true$x[true$X== sprintf("rate[%s]", j)]
-true_pars <-list(p21, p31, rate)
+source("R/2_Theta_bar_Y/DtrMngSep/DMsep_5param_test.R")
+modelName = "DMSep"
+modelName = "DMSep_inhomo"
+Dir <- set_get_Dir(modelName,  "~/Dropbox/21S_paper/defense-reliability/R/2_Theta_bar_Y/DtrMngSep")
+DMSep = cmdstanr::cmdstan_model(Dir$file)
+
+mice_imp <- generateMice()
+imputed_data<- complete(mice_imp, 1)
+imputed_data_gp <- read.csv("data/y_pred_5var.csv")[,-1]
+imputed_data$y_data <- unlist(c(imputed_data_gp))
+n_state = 3
+initial_state = 1
+generate_state_matrix <- function(data, n){
+  state <- cut(data, breaks=quantile(data,c(0,1/3,2/3,1)), labels=1:n, include.lowest = TRUE)
+  state<-as.numeric(state)
+  matrix(state,nrow=31)
+}
+state_matrix <- generate_state_matrix(imputed_data$y_data, n_state)
+states <- as.vector(t(state_matrix))
+
+iter=2000
+MSE_df<-data.frame(index=rep(0,iter),p=rep(0,iter),q=rep(0,iter),train_MSE=rep(0,iter),test_MSE=rep(0,iter))
+rate_array<-data.frame(period=c(),index=c(),rate=c())
+D_array <- array(0, dim=c(iter,4,3,3))
+ship_ind_df<-matrix(0,nrow=iter,ncol=5)
+
+sampling_res <- gpImputeDMSepFit(1:99)
+saveRDS(sampling_res, "sample_genstate_inhomo.RDS")
+true_prival <- subset_draws(as_draws_rvars(sampling_res), chain = 1, iteration = seq(1, 981, by = 20))
+# Three ways for prior input: truth-point bencmarking, hyperparameter-based truth-dist, sample-based truth-dist,
+# The first only simulate with one set of value of parameter values while the second and the third simulate with
+# different set of prior values. The second is when the modeler only knows the hyperparmeter of prior values while
+# the third is when the modeler have exact n_datasets pairs of prior value in hand.
 
-generator <- function(){
-  function(iter, true_pars){
-    # Metadata
-    N = 3069
-    T = 31
-    S = 3
-    P = 4
-    obs2time = complete(generateMice(), 1)$age_ind
-    initial_state = 1
-    init_state_vec = rep(0, S);
-    init_state_vec[initial_state] = 1
-    latent_states = array(NA, dim=c(T,S))
-    # Hyperparameter -> receive as input
-    p21 = true_pars[[1]]
-    p31 = true_pars[[2]]
-    rate = true_pars[[3]]
+##############
+# truth-dist. with custom priorval
+##############
+custom_nprior_generator <- function(true_prival){
+  n_datasets <- niterations(true_prival)
+  generated <- list()
+  N = 3069
+  T = 31
+  S = 3
+  P = 4
+  obs2time = complete(generateMice(), 1)$age_ind
+  initial_state = 1
+  init_state_vec = rep(0, S);
+  init_state_vec[initial_state] = 1
+  latent_states = matrix(rep(NA, T*S),nrow = T, ncol = S)
+  gen_states = array(NA, N)
+  rate_vec <- rep(NA, 3)
+  for(iter in 1:n_datasets){
+    true_prival_i <- subset_draws(true_prival, iteration = iter)
+    rate_i <- draws_of(true_prival_i$rate)
+    p21_i <- draws_of(true_prival_i$p21)
+    p31_i <- draws_of(true_prival_i$p31)
     # Maintenance
-    Mnt = array(c(1, 0, 0, p21, 1- p21, 0, p31, 1-p31,0), dim=c(S,S))
+    Mnt = array(c(1, 0, 0, p21_i, 1- p21_i, 0, p31_i, 1-p31_i,0), dim=c(S,S))
     # Deterioration
     tmp_p <- array(rep(0, S * S), dim=c(S,S))
-    tmp_p[1,1] = exp(-rate[1]- rate[2]);
-    tmp_p[2,1] = rate[1] * exp(-rate[3]) * (1-exp(-(rate[1]+ rate[2] - rate[3]))) / (rate[1]+ rate[2] - rate[3]);
-    tmp_p[3,1] = exp(-rate[3]);
-    Dtr <- array(c(tmp_p[1,1], tmp_p[2,1], 1- tmp_p[1,1], 0, tmp_p[3,1], 1 - tmp_p[3,1], 0,0,1), dim=c(S, S))
-    latent_states[1,] = Dtr %*% init_state_vec;
+    tmp_p[1,1] <- exp(- rate_i[,1]-  rate_i[,2])
+    tmp_p[2,1] <-  rate_i[,1] * exp(- rate_i[,3]) * (1-exp(-( rate_i[,1] +  rate_i[,2] -  rate_i[,3]))) / ( rate_i[,1] +  rate_i[,2] -  rate_i[,3])
+    tmp_p[3,1] <- exp(- rate_i[,3])
+    Dtr <- array(c(tmp_p[1,1], tmp_p[2,1], 1- tmp_p[1,1] - tmp_p[2,1], 0, tmp_p[3,1], 1 - tmp_p[3,1], 0,0,1), dim=c(S, S))
+    latent_states[1,] <- Dtr %*% init_state_vec
     for (t in 2:T){
-      latent_states[t,] =  (Dtr %*% Mnt) %*% latent_states[t-1,];
+      latent_states[t,] <-  (Dtr %*% Mnt) %*% latent_states[t-1,]
     }
-  list(
-    generated = lapply(seq(1:length(obs2time)), function(n){sample(c(1,2,3), 1, prob = latent_states[obs2time[n],])}),
-    parameters = list(
-      p21 = p21,
-      p31 = p31,
-      rate = rate
-    )
-  )
+    gen_states <- unlist(lapply(seq(1:length(obs2time)), function(n){sample(c(1,2,3), 1, prob = latent_states[obs2time[n],])}))
+    generated[[iter]] <- list(N = N, T = T, S = S, P = P, states = gen_states, obs2time = obs2time, initial_state = initial_state)
   }
+  SBC_datasets(
+    parameters = posterior::as_draws_matrix(posterior::draws_rvars(p21 = true_prival$p21, p31 = true_prival$p31, rate = true_prival$rate)),
+    generated <- generated
+  )
 }
-
-# 3. posterior simulator: prior and data simulator embedded in stan with supplied data
-# 3-1. prior and data simulator
-modelName = "DMSep"
-Dir <- set_get_Dir(modelName,  "R/2_Theta_bar_Y/DtrMngSep")
-DMSep = cmdstanr::cmdstan_model(Dir$file)
-
-# 3-2. simulated y data template with init as real y
-obs2time <- complete(generateMice(), 1)$age_ind # data template only
-states <- rep(3, N) # data template
-initial_state <- 1
-stan_data <- list(N= N,T = max(obs2time), S = 3, P = 4, states=states, obs2time=obs2time, initial_state=initial_state)
-SBC_N = 1
-SBC_M = 500 # 40
-workflow <- SBCWorkflow$new(DMSep, generator())
-workflow$simulate(SBC_N, true_pars)
-stan_data$states <- unlist(posterior::draws_of(posterior::subset_draws(workflow$simulated_y, variable="y")$y))
-write.csv(stan_data$states, file = "sim_y.csv")
-summary(stan_data$states)
-sampling_res<-DMSep$sample(data = stan_data,iter_warmup =1000,  iter_sampling = 1000, parallel_chains = 4)
+datasets <- custom_nprior_generator(true_prival)
+backend <- cmdstan_sample_SBC_backend(DMSep, iter_warmup = 500, iter_sampling = 200)
+results_10 <- compute_results(datasets, backend, thin_ranks = 10)
+plot_ecdf_diff(results_10)
diff --git a/R/2_Theta_bar_Y/DtrMngSep/DMsep_5param_test.R b/R/2_Theta_bar_Y/DtrMngSep/DMsep_5param_test.R
@@ -1,48 +1,47 @@
-source(file.path(getwd(), "R/1_Y_bar_y/impute/mice_imputation.R"))
-scriptDir <- getwd()
-set.seed(210106)
 library(rstan)
 library(cmdstanr)
 library(ggplot2)
 library(scales)
-
-#cmdstan: model_DMsep <-stan_m(file = (file.path(getwd(), "R/2_Theta_bar_Y/DtrMngSep/models/DMSep/DMSep.stan")))
-model_DMsep <-stan_model(file = (file.path(getwd(), "R/2_Theta_bar_Y/DtrMngSep/models/DMSep/DMSep.stan")))
-
-mice_imp <- generateMice()
-imputed_data<- complete(mice_imp, 1)
-
-imputed_data_gp <- read.csv("data/y_pred_5var.csv")[,-1]
-imputed_data$y_data <- unlist(c(imputed_data_gp))
-#################################### DM_sep
-# original policy (wihtout pm)
-n_state = 3
-initial_state = 1
-
-generate_state_matrix <- function(data, n){
-  #state <- cut(data, breaks=c(0, 80, 160, max(data)), labels=1:n, include.lowest = TRUE)
-  state <- cut(data, breaks=quantile(data,c(0,1/3,2/3,1)), labels=1:n, include.lowest = TRUE)
-  state<-as.numeric(state)
-  matrix(state,nrow=31)
+source(file.path(getwd(), "R/1_Y_bar_y/impute/mice_imputation.R"))
+set.seed(210106)
+gpImputeDMSepFit <- function(train_ship_ind){
+  #cmdstan: model_DMsep <-stan_m(file = (file.path(getwd(), "R/2_Theta_bar_Y/DtrMngSep/models/DMSep/DMSep.stan")))
+  model_DMsep <-stan_model(file = (file.path(getwd(), "R/2_Theta_bar_Y/DtrMngSep/models/DMSep/DMSep.stan")))
+  mice_imp <- generateMice()
+  imputed_data<- complete(mice_imp, 1)
+  imputed_data_gp <- read.csv("data/y_pred_5var.csv")[,-1]
+  imputed_data$y_data <- unlist(c(imputed_data_gp))
+  #################################### DM_sep
+  # original policy (wihtout pm)
+  n_state = 3
+  initial_state = 1
+  generate_state_matrix <- function(data, n){
+    #state <- cut(data, breaks=c(0, 80, 160, max(data)), labels=1:n, include.lowest = TRUE)
+    state <- cut(data, breaks=quantile(data,c(0,1/3,2/3,1)), labels=1:n, include.lowest = TRUE)
+    state<-as.numeric(state)
+    matrix(state,nrow=31)
+  }
+  state_matrix <- generate_state_matrix(imputed_data$y_data, n_state)
+  states <- as.vector(t(state_matrix))
+  iter=2000
+  MSE_df<-data.frame(index=rep(0,iter),p=rep(0,iter),q=rep(0,iter),train_MSE=rep(0,iter),test_MSE=rep(0,iter))
+  rate_array<-data.frame(period=c(),index=c(),rate=c())
+  D_array <- array(0, dim=c(iter,4,3,3))
+  ship_ind_df<-matrix(0,nrow=iter,ncol=5)
+  train_ind=c(sapply(train_ship_ind,function(x) (x-1)*31+(1:31)))
+  stan_data <- list(N= length(train_ind),T = max(imputed_data$age_ind[train_ind]), S = 3, P = 4, states=states[train_ind], obs2time=imputed_data$age_ind[train_ind], initial_state=initial_state)
+  sampling_res<-sampling(model_DMsep,stan_data, iter = 2000, cores=4)
+  return (sampling_res)
 }
 
-state_matrix <- generate_state_matrix(imputed_data$y_data, n_state)
-states <- as.vector(t(state_matrix))
-
-iter=2000
-MSE_df<-data.frame(index=rep(0,iter),p=rep(0,iter),q=rep(0,iter),train_MSE=rep(0,iter),test_MSE=rep(0,iter))
-rate_array<-data.frame(period=c(),index=c(),rate=c())
-D_array <- array(0, dim=c(iter,4,3,3))
-ship_ind_df<-matrix(0,nrow=iter,ncol=5)
-
-test_ship_ind= c(17,20,24,77,82) #sort(sample(1:99,5)) #c(17,20,24,77,82)
+test_ship_ind= c(17,20,24,77,82) #sort(sample(1:99,5)) = c(17,20,24,77,82)
 ship_ind_df[1,]=test_ship_ind
 test_ind=c(sapply(test_ship_ind,function(x) (x-1)*31+(1:31)))
-
 train_data <- states[-test_ind]
 test_data <- states[test_ind]
-stan_data <- list(N= length(train_data),T = max(imputed_data$age_ind[-test_ind]), S = 3, P = 4, states=train_data, obs2time=imputed_data$age_ind[-test_ind], initial_state=initial_state)
-sampling_res<-sampling(model_DMsep,stan_data, iter = 2000, cores=4)
+#sort(sample(1:99,5)) #c(17,20,24,77,82)
+gpImputeDMSepFit(c(17,20,24,77,82))
+
 res_df<-as.data.frame(sampling_res)
 sample_mean<-apply(res_df,2,mean) #write.csv(sample_mean, "./data/DMSep_validation_trueparam.csv")
 

diff --git a/R/2_Theta_bar_Y/DtrMngSep/models/DMSep/DMSep.stan b/R/2_Theta_bar_Y/DtrMngSep/models/DMSep/DMSep.stan
@@ -28,21 +28,23 @@ transformed parameters {
   matrix[S, S] Dtr; // Deterioration
   matrix[S, S] Mnt; // Maintenance
   simplex[S] latent_states[T];
-  matrix[S, S] tmp_p;
+  real<lower = 0> tmp_p11;
+  real<lower = 0> tmp_p21;
+  real<lower = 0> tmp_p31;
   // Maintenance
   // is left stoch. matrix, transpose of eq.14 from the paper
   Mnt = [[1, p21, p31],
          [0, 1-p21, 1-p31],
          [0, 0, 0]];
   // Deterioration by period
   // is left stoch. matrix, transpose of eq.13 from the paper
-  tmp_p[1,1] = exp(-rate[1]- rate[2]);
-  tmp_p[2,1] = rate[1] * exp(-rate[3]) * (1-exp(-(rate[1]+ rate[2] - rate[3]))) / (rate[1]+ rate[2] - rate[3]);
-  tmp_p[3,1] = exp(-rate[3]);
-  Dtr = [[tmp_p[1,1], 0, 0],
-            [tmp_p[2,1], tmp_p[3,1], 0],
-            [1 - tmp_p[1,1] - tmp_p[2,1], 1 - tmp_p[3,1], 1]];
-  // Inhomogenous Dtr
+  tmp_p11 = exp(-rate[1]- rate[2]);
+  tmp_p21 = rate[1] * exp(-rate[3]) * (1-exp(-(rate[1]+ rate[2] - rate[3]))) / (rate[1]+ rate[2] - rate[3]);
+  tmp_p31 = exp(-rate[3]);
+  Dtr = [[tmp_p11, 0, 0],
+            [tmp_p21, tmp_p31, 0],
+            [1 - tmp_p11 - tmp_p21, 1 - tmp_p31, 1]];
+  // (In) or homogenous Dtr
   latent_states[1] = Dtr * initial;
   for (t in 2:T){
     latent_states[t] =  (Dtr * Mnt) *latent_states[t-1]; //matrix_power((Dtr * Mnt), (t-1)) * initial;
@@ -54,3 +56,9 @@ model {
     states[n] ~ categorical(latent_states[obs2time[n]]);
   }
 }
+generated quantities{
+int<lower=0, upper =S> gen_states[N];
+  for (n in 1:N){
+    gen_states[n] = categorical_rng(latent_states[obs2time[n]]);
+  }
+}
diff --git a/R/2_Theta_bar_Y/DtrMngSep/models/DMSep_inhomo/DMSep_inhomo.stan b/R/2_Theta_bar_Y/DtrMngSep/models/DMSep_inhomo/DMSep_inhomo.stan
@@ -0,0 +1,71 @@
+data {
+  int<lower=0> N; // numbmer of obs
+  int<lower=0> T; // number of time bins = max age
+  int<lower=1>S; // number of states
+  int<lower=1> P; // number of periods for inhomogenous dtr.
+  int<lower=0, upper =S> states[N]; //observed state
+  int obs2time[N]; // map obs to age
+  int<lower=0, upper=S> initial_state;
+}
+
+transformed data {
+  vector[S] initial;
+  vector<lower=0>[S] alpha;
+  vector<lower=0>[S] beta;
+  initial = rep_vector(0, S);
+  initial[initial_state] = 1;
+  alpha = rep_vector(3,3);
+  beta = rep_vector(3,3);
+}
+
+parameters {
+  real<lower=0> rate[P, S];
+  real<lower=0, upper=1> p21;
+  real<lower=0, upper=1> p31;
+  //real<lower=0, upper=1> r;
+  //simplex[K] theta[K];        // transit probs
+}
+
+transformed parameters {
+  matrix[S, S] Dtr[P]; // Deterioration
+  matrix[S, S] Dtr_pow[T]; // Deterioration
+  matrix[S, S] Mnt; // Maintenance
+  simplex[S] latent_states[T];
+  real<lower = 0> tmp_p11;
+  real<lower = 0> tmp_p21;
+  real<lower = 0> tmp_p31;
+    // Maintenance
+    // is left stoch. matrix, transpose of eq.14 from the paper
+    Mnt = [[1, p21, p31],
+           [0, 1-p21, 1-p31],
+           [0, 0, 0]];
+  // Deterioration by period
+  // is left stoch. matrix, transpose of eq.13 from the paper
+    for(p in 1:P){
+      tmp_p11 = exp(-rate[p,1]- rate[p,2]);
+      tmp_p21 = rate[p,1] * exp(-rate[p,3]) * (1-exp(-(rate[p,1]+ rate[p,2] - rate[p,3]))) / (rate[p,1]+ rate[p,2] - rate[p,3]);
+      tmp_p31 = exp(-rate[p,3]);
+      Dtr[p] = [[tmp_p11, 0, 0],
+                [tmp_p21, tmp_p31, 0],
+                [1 - tmp_p11 - tmp_p21, 1 - tmp_p31, 1]];
+  }
+    // (In) or homogenous Dtr
+    latent_states[1] = Dtr[1] * initial;
+    for (t in 2:T){
+      if (t <= 8){latent_states[t] = Dtr[1] * Mnt * latent_states[t-1];}
+      else if (t <=20){latent_states[t] = Dtr[2] * Mnt * latent_states[t-1];}
+      else if (t <=26){latent_states[t] = Dtr[3] * Mnt * latent_states[t-1];}
+      else{latent_states[t] = Dtr[4] * Mnt * latent_states[t-1];}
+    }
+}
+model {
+  for (n in 1:N){
+    states[n] ~ categorical(latent_states[obs2time[n]]);
+  }
+}
+generated quantities{
+int<lower=0, upper =S> gen_states[N];
+  for (n in 1:N){
+    gen_states[n] = categorical_rng(latent_states[obs2time[n]]);
+  }
+}