Skip to content

Latest commit

 

History

History
540 lines (430 loc) · 13.7 KB

external_comp.md

File metadata and controls

540 lines (430 loc) · 13.7 KB

Compatibility with other R packages

In the examples presented below external R packages are used for Bayesian estimation of mixture models while BayesMultiMode is used for mode inference.

library(BayesMultiMode)

rjags

Estimation

set.seed(123)

library(rjags)

# Assuming you have your data in 'data_vector'
# Specify the model
model_string = "
  model {
    for (i in 1:N) {
      data_vector[i] ~ dnorm(mu[component[i]], tau[component[i]])
      component[i] ~ dcat(theta[])
    }

    for (j in 1:K) {
      mu[j] ~ dnorm(0, 0.01)
      tau[j] ~ dgamma(1, 1)
    }

    theta ~ ddirch(rep(1, K))
  }
"

K = 2
# Data for JAGS
y = c(rnorm(100,0,1),
      rnorm(100,5,1))

data_jags = list(data_vector = y, N = length(y), K = K)

# Initialize parameters
inits = function() {
  list(mu = rnorm(K, 0, 10),
       tau = rgamma(K, 1, 1),
       theta = rep(1 / K, K))
}

# Run the model
model = jags.model(textConnection(model_string), data = data_jags, inits = inits)
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 200
##    Unobserved stochastic nodes: 205
##    Total graph size: 811
## 
## Initializing model
update(model, 1000)  # Burn-in
fit = coda.samples(model, variable.names = c("mu", "tau", "theta"), n.iter = 2000)

Create a BayesMixture object

fit_mat = posterior::as_draws_matrix(fit)
bmix = bayes_mixture(mcmc = fit_mat,
                        data = y,
                        burnin = 0, # the burnin has already been discarded
                        dist = "normal",
                        vars_to_keep = c("theta", "mu", "tau"),
                        vars_to_rename = c("eta" = "theta",
                                           "sigma" = "tau"))

# plot the mixture
plot(bmix)

# mode estimation
bayesmode = bayes_mode(bmix)

# plot mode inference
plot(bayesmode)

# Summary of mode inference
summary(bayesmode)
## Posterior probability of multimodality is 1 
## 
## Inference results on the number of modes:
##   p_nb_modes (matrix, dim 1x2): 
##      number of modes posterior probability
## [1,]               2                     1
## 
## Inference results on mode locations:
##   p_loc (matrix, dim 56x2): 
##      mode location posterior probability
## [1,] -3.000000e-01                0.0010
## [2,] -2.000000e-01                0.0000
## [3,] -1.000000e-01                0.0000
## [4,]  5.551115e-17                0.0000
## [5,]  1.000000e-01                0.0000
## [6,]  2.000000e-01                0.1675
## ... (50 more rows)

rstan

Estimation

set.seed(123)
library(rstan)

normal_mixture_model <- "
data {
  int<lower=0> N;         // number of data points
  real y[N];              // observed data
  int<lower=1> K;         // number of mixture components
}

parameters {
  simplex[K] theta;       // mixing proportions
  ordered[K] mu;          // means of the Gaussian components
  vector<lower=0>[K] sigma; // standard deviations of the components
}

model {
  vector[K] log_theta = log(theta); // cache log calculation
  
  // Priors
  mu ~ normal(0, 10);
  sigma ~ cauchy(0, 5);

  // Likelihood
  for (n in 1:N) {
    vector[K] log_lik;
    for (k in 1:K) {
      log_lik[k] = log_theta[k] + normal_lpdf(y[n] | mu[k], sigma[k]);
    }
    target += log_sum_exp(log_lik);
  }
}
"

y = c(rnorm(100,0,1),
      rnorm(100,5,1))

data_list <- list(
  N = length(y),
  y = y,
  K = 2  # Assuming a two-component mixture
)

fit <- stan(model_code = normal_mixture_model, data = data_list, iter = 2000, chains = 1)
## 
## SAMPLING FOR MODEL '07e8533f6a3188d2d50eb989867b63d7' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 4.1e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.41 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.254864 seconds (Warm-up)
## Chain 1:                0.196625 seconds (Sampling)
## Chain 1:                0.451489 seconds (Total)
## Chain 1:

Create a BayesMixture object

fit_mat = posterior::as_draws_matrix(fit)
bmix = bayes_mixture(mcmc = fit_mat,
                        data = y,
                        burnin = 0, # the burnin has already been discarded
                        dist = "normal",
                        vars_to_keep = c("theta", "mu", "sigma"),
                        vars_to_rename = c("eta" = "theta"))

# plot the mixture
plot(bmix)

# mode estimation
bayesmode = bayes_mode(bmix)

# plot mode inference
plot(bayesmode)

# Summary of mode inference
summary(bayesmode)
## Posterior probability of multimodality is 1 
## 
## Inference results on the number of modes:
##   p_nb_modes (matrix, dim 1x2): 
##      number of modes posterior probability
## [1,]               2                     1
## 
## Inference results on mode locations:
##   p_loc (matrix, dim 55x2): 
##      mode location posterior probability
## [1,]          -0.2                 0.007
## [2,]          -0.1                 0.097
## [3,]           0.0                 0.308
## [4,]           0.1                 0.000
## [5,]           0.2                 0.194
## [6,]           0.3                 0.031
## ... (49 more rows)

bayesmix

Estimation

set.seed(123)
library(bayesmix)

## taken from the examples of JAGSrun()
data("fish", package = "bayesmix")
y = unlist(fish)
prefix <- "fish"
variables <- c("mu","tau","eta")
k <- 3
modelFish <- BMMmodel(k = k, priors = list(kind = "independence",
                                           parameter = "priorsFish", hierarchical = "tau"))
controlFish <- JAGScontrol(variables = c(variables, "S"),
                           n.iter = 2000, burn.in = 1000)
z1 <- JAGSrun(fish, prefix, model = modelFish, initialValues = list(S0 = 2),
              control = controlFish, cleanup = TRUE, tmp = FALSE)
## Compiling model graph
##    Declaring variables
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 256
##    Unobserved stochastic nodes: 264
##    Total graph size: 1044
## 
## Initializing model
fit = z1$results

Create a BayesMixture object

# JAGSrun seems to report the variance (sigma2) rather the standard deviation so we need a new pdf :
pdf_func <- function(x, pars) {
  dnorm(x, pars["mu"], sqrt(pars["sigma"]))
}

bmix = bayes_mixture(mcmc = fit,
                        data = y,
                        burnin = 1000, 
                        pdf_func = pdf_func,
                        dist_type = "continuous",
                        loc = "mu",
                        vars_to_keep = c("eta", "mu", "sigma")) #vars_to_keep ignore numbers

# plot the mixture
plot(bmix)

# mode estimation
bayesmode = bayes_mode(bmix)

# plot mode inference
plot(bayesmode)

# Summary of mode inference
summary(bayesmode)
## Posterior probability of multimodality is 0.027 
## 
## Inference results on the number of modes:
##   p_nb_modes (matrix, dim 3x2): 
##      number of modes posterior probability
## [1,]               1                 0.973
## [2,]               2                 0.026
## [3,]               3                 0.001
## 
## Inference results on mode locations:
##   p_loc (matrix, dim 73x2): 
##      mode location posterior probability
## [1,]           4.9                 0.002
## [2,]           5.0                 0.014
## [3,]           5.1                 0.000
## [4,]           5.2                 0.095
## [5,]           5.3                 0.000
## [6,]           5.4                 0.148
## ... (67 more rows)

BNPmix

Estimation

set.seed(123)
library(BNPmix)
library(dplyr)

y = c(rnorm(100,0,1),
      rnorm(100,5,1))

## estimation
fit = PYdensity(y,
                mcmc = list(niter = 2000,
                            nburn = 1000,
                            print_message = FALSE),
                output = list(out_param = TRUE))

Transforming the output into a mcmc matrix with one column per variable

fit_mat = list()

for (i in 1:length(fit$p)) {
  k = length(fit$p[[i]][, 1])
  
  draw = c(fit$p[[i]][, 1],
           fit$mean[[i]][, 1],
           sqrt(fit$sigma2[[i]][, 1]),
           i)
  
  names(draw)[1:k] = paste0("eta", 1:k)
  names(draw)[(k+1):(2*k)] = paste0("mu", 1:k)
  names(draw)[(2*k+1):(3*k)] = paste0("sigma", 1:k)
  names(draw)[3*k + 1] = "draw"
  
  fit_mat[[i]] = draw
}

fit_mat = as.matrix(bind_rows(fit_mat))

Create a BayesMixture object

bmix = bayes_mixture(mcmc = fit_mat,
                        data = y,
                        burnin = 0, # the burnin has already been discarded
                        dist = "normal",
                        vars_to_keep = c("eta", "mu", "sigma"))

# plot the mixture
plot(bmix)

# mode estimation
bayesmode = bayes_mode(bmix)

# plot mode inference
plot(bayesmode)

# Summary of mode inference
summary(bayesmode)
## Posterior probability of multimodality is 1 
## 
## Inference results on the number of modes:
##   p_nb_modes (matrix, dim 2x2): 
##      number of modes posterior probability
## [1,]               2                 0.993
## [2,]               3                 0.007
## 
## Inference results on mode locations:
##   p_loc (matrix, dim 85x2): 
##      mode location posterior probability
## [1,] -3.000000e-01                 0.006
## [2,] -2.000000e-01                 0.000
## [3,] -1.000000e-01                 0.000
## [4,]  5.551115e-17                 0.000
## [5,]  1.000000e-01                 0.000
## [6,]  2.000000e-01                 0.192
## ... (79 more rows)

Mode estimation in mixtures estimated with maximum likelihood

In the examples presented below external R package are used for estimating mixtures with the EM algorithm while BayesMultiMode is used for estimating and plotting modes.

mixtools

set.seed(123)
library(mixtools)

y = c(rnorm(100,0,1),
      rnorm(100,3.5,1.5))

fit = normalmixEM(y)
## number of iterations= 172
pars = c(eta = fit$lambda, mu = fit$mu, sigma = fit$sigma)

mix = mixture(pars, dist = "normal", range = c(min(y), max(y))) # create a new object of class Mixture
modes = mix_mode(mix) # estimate modes and create an object of class Mode

summary(modes)
## Modes of a normal mixture with 2 components.
## - Number of modes found: 2
## - Mode estimation technique: fixed-point algorithm
## - Estimates of mode locations:
##   mode_estimates (numeric vector, dim 2): 
## [1] 3 0
# mode inference
plot(modes)

summary(modes)
## Modes of a normal mixture with 2 components.
## - Number of modes found: 2
## - Mode estimation technique: fixed-point algorithm
## - Estimates of mode locations:
##   mode_estimates (numeric vector, dim 2): 
## [1] 3 0

mclust

set.seed(123)
library(mclust)

y = c(rnorm(100,0,1),
      rnorm(100,3.5,1.5))

fit = Mclust(y)

pars = c(eta = fit$parameters$pro,
         mu = fit$parameters$mean,
         sigma = sqrt(fit$parameters$variance$sigmasq))

mix = mixture(pars, dist = "normal", range = c(min(y), max(y))) # creates a new object of class Mixture

summary(mix)
## Estimated mixture distribution.
## - Mixture type: continuous
## - Number of components: 2
## - Distribution family: normal
## - Number of distribution variables: 2
## - Names of variables: mu sigma
## - Parameter estimates:
##   pars (numeric vector, dim 6): 
##        eta1        eta2        mu.1        mu.2      sigma1      sigma2 
##  0.38684421  0.61315579 -0.03856849  2.82059287  0.83331314  1.75291009
# mode inference
modes = mix_mode(mix) # estimates modes and creates an object of class Mode

plot(modes)

summary(modes)
## Modes of a normal mixture with 2 components.
## - Number of modes found: 2
## - Mode estimation technique: fixed-point algorithm
## - Estimates of mode locations:
##   mode_estimates (numeric vector, dim 2): 
## [1] 0 3