From 4f2ec714eb84f5ba8af4e149ba4afce1c02539e2 Mon Sep 17 00:00:00 2001
From: Ian Grooms <ian.grooms@colorado.edu>
Date: Fri, 12 Jul 2024 12:09:20 -0600
Subject: [PATCH 01/14] Add KQCEF

First pass at adding the kernel density estimation based QCEF into
latest version of DART.
---
 .../assimilation/algorithm_info_mod.f90       |  15 +-
 .../modules/assimilation/assim_tools_mod.f90  | 169 ++-
 .../assimilation/distribution_params_mod.f90  |   4 +-
 .../assimilation/kde_distribution_mod.f90     | 996 ++++++++++++++++++
 .../assimilation/probit_transform_mod.f90     | 181 +++-
 .../modules/assimilation/rootfinding_mod.f90  | 267 +++++
 .../programs/test_kde_dist/test_kde_dist.f90  |   8 +
 .../programs/test_kde_dist/test_kde_dist.rst  |   1 +
 8 files changed, 1632 insertions(+), 9 deletions(-)
 create mode 100644 assimilation_code/modules/assimilation/kde_distribution_mod.f90
 create mode 100644 assimilation_code/modules/assimilation/rootfinding_mod.f90
 create mode 100644 assimilation_code/programs/test_kde_dist/test_kde_dist.f90
 create mode 100644 assimilation_code/programs/test_kde_dist/test_kde_dist.rst

diff --git a/assimilation_code/modules/assimilation/algorithm_info_mod.f90 b/assimilation_code/modules/assimilation/algorithm_info_mod.f90
index 42d5f0c218..1eb01207cd 100644
--- a/assimilation_code/modules/assimilation/algorithm_info_mod.f90
+++ b/assimilation_code/modules/assimilation/algorithm_info_mod.f90
@@ -22,7 +22,7 @@ module algorithm_info_mod
 
 use distribution_params_mod, only : NORMAL_DISTRIBUTION, BOUNDED_NORMAL_RH_DISTRIBUTION, &
    GAMMA_DISTRIBUTION, BETA_DISTRIBUTION, LOG_NORMAL_DISTRIBUTION, UNIFORM_DISTRIBUTION,  &
-   PARTICLE_FILTER_DISTRIBUTION
+   PARTICLE_FILTER_DISTRIBUTION, KDE_DISTRIBUTION
 
 implicit none
 private
@@ -40,12 +40,13 @@ module algorithm_info_mod
 integer, parameter :: OBS_PARTICLE       = 4
 integer, parameter :: UNBOUNDED_RHF      = 8
 integer, parameter :: GAMMA_FILTER       = 11
-integer, parameter :: BOUNDED_NORMAL_RHF = 101 
+integer, parameter :: BOUNDED_NORMAL_RHF = 101
+integer, parameter :: KERNEL_QCEF        = 102
 
 public :: obs_error_info, probit_dist_info, obs_inc_info, &
           init_algorithm_info_mod, end_algorithm_info_mod, &
           EAKF, ENKF, BOUNDED_NORMAL_RHF, UNBOUNDED_RHF, &
-          GAMMA_FILTER, KERNEL, OBS_PARTICLE
+          GAMMA_FILTER, KERNEL, OBS_PARTICLE, KERNEL_QCEF
 
 ! type definitions for the QCF table
 type obs_error_info_type
@@ -223,6 +224,8 @@ subroutine read_qceff_table(qceff_table_filename)
          qceff_table_data(row)%probit_inflation%dist_type = LOG_NORMAL_DISTRIBUTION
       case ('UNIFORM_DISTRIBUTION')
          qceff_table_data(row)%probit_inflation%dist_type = UNIFORM_DISTRIBUTION
+      case ('KDE_DISTRIBUTION')
+         qceff_table_data(row)%probit_inflation%dist_type = KDE_DISTRIBUTION
       !!!case ('PARTICLE_FILTER_DISTRIBUTION')
          !!!qceff_table_data(row)%probit_inflation%dist_type = PARTICLE_FILTER_DISTRIBUTION
       case default
@@ -246,6 +249,8 @@ subroutine read_qceff_table(qceff_table_filename)
          qceff_table_data(row)%probit_state%dist_type = LOG_NORMAL_DISTRIBUTION
       case ('UNIFORM_DISTRIBUTION')
          qceff_table_data(row)%probit_state%dist_type = UNIFORM_DISTRIBUTION
+      case ('KDE_DISTRIBUTION')
+         qceff_table_data(row)%probit_inflation%dist_type = KDE_DISTRIBUTION
       !!!case ('PARTICLE_FILTER_DISTRIBUTION')
          !!!qceff_table_data(row)%probit_state%dist_type = PARTICLE_FILTER_DISTRIBUTION
       case default
@@ -268,6 +273,8 @@ subroutine read_qceff_table(qceff_table_filename)
          qceff_table_data(row)%probit_extended_state%dist_type = LOG_NORMAL_DISTRIBUTION
       case ('UNIFORM_DISTRIBUTION')
          qceff_table_data(row)%probit_extended_state%dist_type = UNIFORM_DISTRIBUTION
+      case ('KDE_DISTRIBUTION')
+         qceff_table_data(row)%probit_inflation%dist_type = KDE_DISTRIBUTION
       !!!case ('PARTICLE_FILTER_DISTRIBUTION')
          !!!qceff_table_data(row)%probit_extended_state%dist_type = PARTICLE_FILTER_DISTRIBUTION
       case default
@@ -290,6 +297,8 @@ subroutine read_qceff_table(qceff_table_filename)
          qceff_table_data(row)%obs_inc_info%filter_kind = GAMMA_FILTER
       case ('BOUNDED_NORMAL_RHF')
          qceff_table_data(row)%obs_inc_info%filter_kind = BOUNDED_NORMAL_RHF
+      case ('KERNEL_QCEF')
+         qceff_table_data(row)%obs_inc_info%filter_kind = KERNEL_QCEF
       case default
          write(errstring, *) 'Invalid filter kind: ', trim(filter_kind_string(row))
          call error_handler(E_ERR, 'read_qceff_table:', errstring, source)
diff --git a/assimilation_code/modules/assimilation/assim_tools_mod.f90 b/assimilation_code/modules/assimilation/assim_tools_mod.f90
index d1b6e2bbfc..6054c0b9b9 100644
--- a/assimilation_code/modules/assimilation/assim_tools_mod.f90
+++ b/assimilation_code/modules/assimilation/assim_tools_mod.f90
@@ -78,13 +78,16 @@ module assim_tools_mod
 
 use algorithm_info_mod, only : probit_dist_info, obs_inc_info, EAKF, ENKF, &
                                BOUNDED_NORMAL_RHF, UNBOUNDED_RHF, GAMMA_FILTER, &
-                               KERNEL, OBS_PARTICLE
+                               KERNEL, OBS_PARTICLE, KERNEL_QCEF
 
 use gamma_distribution_mod, only : gamma_cdf, inv_gamma_cdf, gamma_mn_var_to_shape_scale, &
                                    gamma_gamma_prod
 
 use bnrh_distribution_mod, only   :  inv_bnrh_cdf, bnrh_cdf, inv_bnrh_cdf_like
 
+use kde_distribution_mod, only : kde_cdf_params, inv_kde_cdf_params, obs_dist_types,      &
+                                 pack_kde_params, likelihood_function, separate_ensemble
+
 use distribution_params_mod, only : distribution_params_type, deallocate_distribution_params
                                
 
@@ -887,7 +890,7 @@ end subroutine filter_assim
 !-------------------------------------------------------------
 
 subroutine obs_increment(ens_in, ens_size, obs, obs_var, obs_kind, obs_inc, &
-   inflate, my_cov_inflate, my_cov_inflate_sd, net_a)
+   inflate, my_cov_inflate, my_cov_inflate_sd, net_a, obs_dist_type)
 
 ! Given the ensemble prior for an observation, the observation, and
 ! the observation error variance, computes increments and adjusts
@@ -900,6 +903,7 @@ subroutine obs_increment(ens_in, ens_size, obs, obs_var, obs_kind, obs_inc, &
 type(adaptive_inflate_type), intent(inout) :: inflate
 real(r8),                    intent(inout) :: my_cov_inflate, my_cov_inflate_sd
 real(r8),                    intent(out)   :: net_a
+integer, optional,           intent(in)    :: obs_dist_type
 
 real(r8) :: ens(ens_size), inflate_inc(ens_size)
 real(r8) :: prior_mean, prior_var, new_val(ens_size)
@@ -1025,8 +1029,28 @@ subroutine obs_increment(ens_in, ens_size, obs, obs_var, obs_kind, obs_inc, &
       !!!endif
 
    !--------------------------------------------------------------------------
+
+   else if(filter_kind == KERNEL_QCEF) then
+      ! obs_dist_type is an integer whose value is provided by kde_distribution_mod
+      ! Options are uninformative, normal, binomial, gamma,
+      ! inv_gamma, lognormal, and truncated_normal. Details can be found in the
+      ! definition of the likelihood function in kde_distribution_mod.f90. Different
+      ! types of observations have different interpretations for `obs_var'.
+      if (.not. present(obs_dist_type)) then
+!          call error_handler(E_ERR,'obs_increment', &
+!                  'For kde filter, must specify obs_dist_type', source)
+         ! IG: Hack. Assume that the obs dist type is truncated normal with the same
+         ! bounds as the observed variable.
+         call obs_increment_kde(ens, ens_size, obs, obs_var, &
+            obs_dist_types%truncated_normal, bounded_below, &
+            bounded_above, lower_bound, upper_bound, obs_inc)
+      else
+         call obs_increment_kde(ens, ens_size, obs, obs_var, obs_dist_type, &
+            bounded_below, bounded_above, lower_bound, upper_bound, obs_inc)
+      end if
+
    else
-      call error_handler(E_ERR,'obs_increment', &
+       call error_handler(E_ERR,'obs_increment', &
               'Illegal value of filter_kind', source)
    endif
 endif
@@ -1167,7 +1191,144 @@ subroutine obs_increment_bounded_norm_rhf(ens, ens_like, ens_size, prior_var, &
 end subroutine obs_increment_bounded_norm_rhf
 
 
+subroutine obs_increment_kde(ens, ens_size, y, obs_param, obs_dist_type, &
+   bounded_below, bounded_above, lower_bound, upper_bound, obs_inc)
+   integer,  intent(in)  :: ens_size
+   real(r8), intent(in)  :: ens(ens_size)
+   real(r8), intent(in)  :: y
+   real(r8), intent(in)  :: obs_param
+   integer,  intent(in)  :: obs_dist_type
+   logical,  intent(in)  :: bounded_below, bounded_above
+   real(r8), intent(in)  :: lower_bound,   upper_bound
+   real(r8), intent(out) :: obs_inc(ens_size)
+
+   ! Applies a QCEF based on kernel density estimation & quadrature
+
+   real(r8) :: q
+   real(r8) :: p_lower_prior, p_int_prior, p_upper_prior
+   real(r8) :: p_lower_post, p_int_post, p_upper_post
+   real(r8) :: like_ens_mean ! ensemble mean of the likelihood
+   real(r8) :: ens_interior(ens_size)
+   real(r8) :: unif
+   real(r8) :: d(ens_size), d_max
+   type(distribution_params_type) :: params_interior_prior
+   type(distribution_params_type) :: params_interior_posterior
+   integer  :: ens_size_interior
+   integer  :: i, count_lower, count_upper
+
+   ! If this is first time through, need to initialize the random sequence.
+   ! This will reproduce exactly for multiple runs with the same task count,
+   ! but WILL NOT reproduce for a different number of MPI tasks.
+   ! To make it independent of the number of MPI tasks, it would need to
+   ! use the global ensemble number or something else that remains constant
+   ! as the processor count changes.  this is not currently an argument to
+   ! this function and so we are not trying to make it task-count invariant.
+   if(first_inc_ran_call) then
+      call random_seed()
+      call random_number(q)
+      i = int(q * 1000)
+      call init_random_seq(inc_ran_seq, my_task_id() + i)
+      first_inc_ran_call = .false.
+   endif
+
+   ! If all ensemble members are identical, then there is no update
+   d(:) = abs( ens(:) - ens(1) )
+   d_max = maxval(d)
+   if(d_max .le. 0.0_r8) then
+      obs_inc(:) = 0._r8
+      return
+   endif
+
+   ! Get mixture component probabilities for the prior, and the interior ensemble, and its params
+   call separate_ensemble(ens, ens_size, bounded_below, bounded_above, &
+      lower_bound, upper_bound, ens_interior, ens_size_interior, &
+      p_lower_prior, p_int_prior, p_upper_prior)
+   if (ens_size_interior .gt. 1) then
+      d(1:ens_size_interior) = abs( ens_interior(1:ens_size_interior) - ens_interior(1) )
+      d_max = maxval(d(1:ens_size_interior))
+      call pack_kde_params(ens_size_interior, bounded_below, bounded_above, lower_bound, upper_bound, &
+                           ens_interior, y, obs_param, obs_dist_types%uninformative, &
+                           params_interior_prior)
+      call pack_kde_params(ens_size_interior, bounded_below, bounded_above, lower_bound, upper_bound, &
+                           ens_interior, y, obs_param, obs_dist_type, &
+                           params_interior_posterior)
+   else
+      d_max = 0._r8
+   endif
+
+   ! Get mixture component probabilities for the posterior
+   if (bounded_below) then
+      p_lower_post = p_lower_prior * likelihood_function(lower_bound, y, obs_param, obs_dist_type)
+   else
+      p_lower_post  = 0._r8
+   end if
+   if (bounded_above) then
+      p_upper_post = p_upper_prior * likelihood_function(upper_bound, y, obs_param, obs_dist_type)
+   else
+      p_upper_post  = 0._r8
+   end if
+   p_int_post = 0._r8
+   do i=1,ens_size_interior
+      p_int_post = p_int_post + likelihood_function(ens_interior(i), y, obs_param, obs_dist_type)
+   end do
+   p_int_post = p_int_prior * p_int_post / real(ens_size_interior, r8)
+   ! Get prior ensemble mean of likelihood
+   like_ens_mean = p_lower_post + p_int_post + p_upper_post
+   ! If likelihood underflow, assume flat likelihood, so no increments
+   if(like_ens_mean .le. 0.0_r8) then
+      obs_inc(:) = 0.0_r8
+      return
+   else ! Finish getting mixture component probabilities for the posterior
+      p_lower_post = p_lower_post / like_ens_mean
+      p_upper_post = p_upper_post / like_ens_mean
+      p_int_post   = 1._r8 - p_lower_post - p_upper_post
+   end if
+
+   ! Update ensemble members -> get observation increments
+
+   count_lower = 0
+   count_upper = 0
+   unif = random_uniform(inc_ran_seq) / real(ens_size, r8)
+   do i=1,ens_size
+      ! Map each ensemble member to a quantile using the prior cdf. Members
+      ! on the boundary get random quantiles.
+      if (bounded_below .and. (ens(i) .le. lower_bound)) then
+         ! Rather than draw a random for each member on the boundary,
+         ! I draw one random (above) then add 1/Nb to each subsequent value
+         q = unif + real(count_lower, r8) / real(ens_size, r8)
+         count_lower = count_lower + 1
+         write(*,*) 'lower boundary quantile ', q
+       elseif (bounded_above .and.  (ens(i) .ge. upper_bound)) then
+         ! As above, I draw one random then add 1/Nb to each subsequent value
+         q = 1._r8 - (unif + real(count_upper, r8) / real(ens_size, r8))
+         count_upper = count_upper + 1
+         write(*,*) 'upper boundary quantile ', q
+       elseif ((ens_size_interior .eq. 1) .or. (d_max .le. 0._r8)) then
+         ! Can't use kde with only one ensemble member (or identical ensemble members), so assign a random quantile
+         q = (p_int_prior * random_uniform(inc_ran_seq)) + p_lower_prior
+      else ! Use the interior cdf obtained using kde.
+         q = kde_cdf_params(ens(i), params_interior_prior)
+         q = (p_int_prior * q) + p_lower_prior
+      end if
+      ! Invert the posterior kde cdf to get the updated ensemble member, then
+      ! subtract to get the increment.
+      if (q .le. p_lower_post) then ! Posterior value on the lower boundary
+         obs_inc(i) = lower_bound - ens(i)
+      elseif (q .ge. (1._r8 - p_upper_post)) then ! Posterior value on the upper boundary
+         obs_inc(i) = upper_bound - ens(i)
+      elseif ((ens_size_interior .eq. 1) .or. (d_max .le. 0._r8)) then
+         ! posterior value in the interior, but there's only one prior ensemble member/value
+         ! in the interior, so we just assign the posterior ensemble member equal to the prior one
+         obs_inc(i) = ens_interior(1) - ens(i)
+      else ! posterior value in the interior, can use kde
+         ! Rescale q to be between 0 and 1 before inverting interior cdf
+         q = (q - p_lower_post) / p_int_post
+         obs_inc(i) = inv_kde_cdf_params(q, params_interior_posterior)
+         obs_inc(i) = obs_inc(i) - ens(i)
+      end if
+   end do
 
+end subroutine obs_increment_kde
 
 ! Computes a normal or truncated normal (above and/or below) likelihood.
 function get_truncated_normal_like(x, obs, obs_var, &
@@ -1420,6 +1581,8 @@ subroutine obs_increment_kernel(ens, ens_size, obs, obs_var, obs_inc)
 endif
 
 ! Generate a uniform random number and a Gaussian for each new member
+!! IG: This could be done with a single random number and in a way that
+!! preserves the order of the ensemble members
 do i = 1, ens_size
    unif = random_uniform(inc_ran_seq)
    ! Figure out which kernel it's in
diff --git a/assimilation_code/modules/assimilation/distribution_params_mod.f90 b/assimilation_code/modules/assimilation/distribution_params_mod.f90
index 7951a7c7bd..43849f7fbd 100644
--- a/assimilation_code/modules/assimilation/distribution_params_mod.f90
+++ b/assimilation_code/modules/assimilation/distribution_params_mod.f90
@@ -25,10 +25,12 @@ module distribution_params_mod
 integer, parameter :: LOG_NORMAL_DISTRIBUTION        = 5
 integer, parameter :: UNIFORM_DISTRIBUTION           = 6
 integer, parameter :: PARTICLE_FILTER_DISTRIBUTION   = 7
+integer, parameter :: KDE_DISTRIBUTION            = 8
 
 public :: distribution_params_type, deallocate_distribution_params, &
    NORMAL_DISTRIBUTION, BOUNDED_NORMAL_RH_DISTRIBUTION, GAMMA_DISTRIBUTION, BETA_DISTRIBUTION, &
-      LOG_NORMAL_DISTRIBUTION, UNIFORM_DISTRIBUTION, PARTICLE_FILTER_DISTRIBUTION
+      LOG_NORMAL_DISTRIBUTION, UNIFORM_DISTRIBUTION, PARTICLE_FILTER_DISTRIBUTION, &
+      KDE_DISTRIBUTION
 
 contains
 
diff --git a/assimilation_code/modules/assimilation/kde_distribution_mod.f90 b/assimilation_code/modules/assimilation/kde_distribution_mod.f90
new file mode 100644
index 0000000000..0b1ef1a042
--- /dev/null
+++ b/assimilation_code/modules/assimilation/kde_distribution_mod.f90
@@ -0,0 +1,996 @@
+module kde_distribution_mod
+
+use types_mod,               only : r8, missing_r8
+
+use utilities_mod,           only : E_ERR, E_MSG, error_handler
+
+use sort_mod,                only : sort
+
+use distribution_params_mod, only : distribution_params_type, deallocate_distribution_params, &
+                                    KDE_DISTRIBUTION
+
+use rootfinding_mod,         only : inv_cdf
+
+use normal_distribution_mod, only : normal_cdf
+
+implicit none
+private
+
+public :: kde_cdf, kde_cdf_params, inv_kde_cdf, inv_kde_cdf_params,       &
+          test_kde, obs_dist_types, likelihood_function, pack_kde_params, &
+          separate_ensemble
+
+type available_obs_dist_types
+   integer  :: uninformative, normal, binomial, gamma, &
+               inv_gamma, lognormal, truncated_normal
+end type
+
+type(available_obs_dist_types), parameter :: obs_dist_types = &
+available_obs_dist_types(uninformative=999, normal=998, binomial=997, &
+   gamma=996, inv_gamma=995, lognormal=994, truncated_normal=993)
+character(len=512)          :: errstring
+character(len=*), parameter :: source = 'kde_distribution_mod.f90'
+
+contains
+
+!---------------------------------------------------------------------------
+
+function likelihood_function(x, y, obs_param, obs_dist_type, &
+        bounded_above, bounded_below, upper_bound, lower_bound) result(l)
+   real(r8)             :: l  ! likelihood value
+   real(r8), intent(in) :: x  ! state value
+   real(r8), intent(in) :: y  ! obs value
+   real(r8), intent(in) :: obs_param  ! meaning depends on obs_dist_type
+   integer,  intent(in) :: obs_dist_type ! obs distribution type
+   logical, optional, intent(in)  :: bounded_above, bounded_below
+   real(r8), optional, intent(in) :: upper_bound, lower_bound
+
+   ! Evaluates the likelihood pdf(y | x) for various kinds of observation
+   ! distributions. The value returned is not equal to the observation
+   ! pdf evaluated at y, because normalization constants that don't depend
+   ! on x are omitted.
+
+   real(r8) :: gamma_shape, gamma_scale
+   real(r8) :: inv_gamma_shape, inv_gamma_scale
+   real(r8) :: cdf(2)
+   logical  :: bounded_above_val, bounded_below_val
+
+   l = 1._r8 ! Initialize
+
+   select case (obs_dist_type)
+      case (obs_dist_types%uninformative)
+         ! Uninformative observations have a likelihood equal to one.
+         l = 1._r8
+      case (obs_dist_types%normal)
+         ! For a normal obs distribution, like_param is the obs error variance
+         if (obs_param <= 0._r8) then
+            write(errstring, *) 'obs error sd <= 0', obs_param
+            call error_handler(E_MSG, 'kde_distribution_mod:likelihood', errstring, source)
+         else
+            l = exp( -0.5_r8 * (x - y)**2 / obs_param )
+         end if
+      case (obs_dist_types%binomial)
+         ! For a binomial obs distribution 0<=x<=1 is the probability of
+         ! observing y successes of obs_param total trials
+         if (y < 0._r8) then
+            write(errstring, *) 'y value is negative with a binomial obs model ', y
+            call error_handler(E_MSG, 'kde_distribution_mod:likelihood', errstring, source)
+         elseif (y > obs_param) then
+            write(errstring, *) 'successes greater than total trials with a binomial obs model ', y, obs_param
+            call error_handler(E_MSG, 'kde_distribution_mod:likelihood', errstring, source)
+         elseif ((x < 0._r8) .or. (x > 1._r8)) then
+            write(errstring, *) 'x outside [0,1] with a binomial obs model ', x
+            call error_handler(E_MSG, 'kde_distribution_mod:likelihood', errstring, source)
+         else
+            l = (x**y) * (1._r8 - x)**(obs_param - y)
+         end if
+      case (obs_dist_types%gamma)
+         ! For a gamma obs distribution, the mean is x and the variance is obs_param * x^2, i.e.
+         ! the obs error sd is sqrt(obs_param) times  the true value. If the error sd is p% of x,
+         ! set obs_param = (p/100._r8)**2.
+         ! For a gamma obs distribution, the likelihood is inverse gamma.
+         if (x < 0._r8) then
+            write(errstring, *) 'x value is negative with a gamma obs model ', x
+            call error_handler(E_MSG, 'kde_distribution_mod:likelihood', errstring, source)
+         elseif (y <= 0._r8) then
+            write(errstring, *) 'y value is non-positive with a gamma obs model ', y
+            call error_handler(E_MSG, 'kde_distribution_mod:likelihood', errstring, source)
+         elseif (obs_param <= 0._r8) then
+            write(errstring, *) 'obs variance is non-positive with a gamma obs model ', obs_param
+            call error_handler(E_MSG, 'kde_distribution_mod:likelihood', errstring, source)
+         else
+            gamma_shape = 1._r8 / obs_param ! k
+            gamma_scale = x * obs_param     ! theta
+            if (x .eq. 0._r8) then
+               l = 0._r8 ! Technically x must be > 0, but just setting l=0 is convenient.
+            else
+               l = (y / gamma_scale)**gamma_shape * exp(-y / gamma_scale)
+            end if
+         end if
+      case (obs_dist_types%inv_gamma)
+         ! For an inverse gamma obs distribution, the mean is x and the variance is obs_param * x^2,
+         ! i.e. the obs error sd is sqrt(obs_param) times  the true value. If the error sd is p% of x,
+         ! set obs_param = (p/100._r8)**2.
+         ! For an inverse gamma obs distribution, the likelihood is gamma.
+         if (x < 0._r8) then
+            write(errstring, *) 'x value is negative with an inverse gamma obs model ', x
+            call error_handler(E_MSG, 'kde_distribution_mod:likelihood', errstring, source)
+         elseif (y <= 0._r8) then
+            write(errstring, *) 'y value is non-positive with an inverse gamma obs model ', y
+            call error_handler(E_MSG, 'kde_distribution_mod:likelihood', errstring, source)
+         elseif (obs_param <= 0._r8) then
+            write(errstring, *) 'obs variance is non-positive with an inverse gamma obs model ', obs_param
+            call error_handler(E_MSG, 'kde_distribution_mod:likelihood', errstring, source)
+         else
+            inv_gamma_shape = (1._r8 / obs_param) + 2._r8   ! alpha
+            inv_gamma_scale = x * (inv_gamma_shape - 1._r8) ! beta
+            if (x .eq. 0._r8) then
+               l = 0._r8 ! Technically x must be > 0, but just setting l=0 is convenient.
+            else
+               l = (inv_gamma_scale**inv_gamma_shape) * exp( - inv_gamma_scale / y )
+            end if
+         end if
+      case (obs_dist_types%lognormal)
+         ! For a lognormal obs distribution, ln(y) is normal with mean x and variance obs_param.
+         ! The likelihood is normal.
+         if (y <= 0._r8) then
+            write(errstring, *) 'y value is non-positive with a lognormal obs model', y
+            call error_handler(E_MSG, 'kde_distribution_mod:likelihood', errstring, source)
+         elseif (obs_param <= 0._r8) then
+            write(errstring, *) 'obs error sd <= 0', obs_param
+            call error_handler(E_MSG, 'kde_distribution_mod:likelihood', errstring, source)
+         else
+            l = exp( -0.5_r8 * (x - log(y))**2 / obs_param )
+         end if
+      case (obs_dist_types%truncated_normal)
+         ! Code based on the version in assim_tools_mod.f90
+         ! This implicitly assumes that the bounds on the y distribution are the same
+         ! as the bounds on x, which makes sense in observation space. The factor of
+         ! sigma * sqrt(2 * pi) is ignored.
+         ! A zero observation error variance is a degenerate case
+         if(obs_param <= 0.0_r8) then
+            if(x == y) then
+               l = 1.0_r8
+            else
+               l = 0.0_r8
+            endif
+            return
+         endif
+
+         cdf = [0._r8, 1._r8]
+         if (present(bounded_above)) then
+            bounded_above_val = bounded_above
+         else
+            bounded_above_val = .false.
+         end if
+
+         if (present(bounded_below)) then
+            bounded_below_val = bounded_below
+         else
+            bounded_below_val = .false.
+         end if
+
+         if (bounded_below_val) cdf(1) = normal_cdf(lower_bound, x, sqrt(obs_param))
+         if (bounded_above_val) cdf(2) = normal_cdf(upper_bound, x, sqrt(obs_param))
+
+         l = exp( -0.5_r8 * (x - y)**2 / obs_param ) / (cdf(2) - cdf(1))
+      case DEFAULT
+         write(errstring, *) 'likelihood called with unrecognized obs_dist_type ', obs_dist_type
+         call error_handler(E_MSG, 'kde_distribution_mod:likelihood_function', errstring, source)
+   end select
+
+end function likelihood_function
+
+!---------------------------------------------------------------------------
+
+elemental function epanechnikov_kernel(x) ! The fact that this is elemental is not (yet) used
+   real(r8) :: epanechnikov_kernel
+   real(r8), intent(in) :: x
+   real(r8), parameter  :: norm_const = 3._r8 / 4._r8
+   epanechnikov_kernel = norm_const * max(0._r8, 1._r8 - x**2)
+
+end function epanechnikov_kernel
+
+!---------------------------------------------------------------------------
+
+elemental function epanechnikov_cdf(x) ! The fact that this is elemental is not (yet) used
+   real(r8) :: epanechnikov_cdf
+   real(r8), intent(in) :: x
+   real(r8), parameter  :: norm_const = 1._r8 / 4._r8
+   if (x <= -1._r8) then
+      epanechnikov_cdf = 0._r8
+   elseif (x <= 1._r8) then
+      epanechnikov_cdf = min(1._r8, max(0._r8, norm_const * (2._r8 + 3._r8 * x - x**3)))
+   else
+      epanechnikov_cdf = 1._r8
+   end if
+
+end function epanechnikov_cdf
+
+!---------------------------------------------------------------------------
+
+elemental subroutine boundary_correction(x, lx, mx)
+   real(r8), intent(in)  :: x
+   real(r8), intent(out) :: lx, mx
+
+   ! Boundary correction for kde, from Jones (1993) and Jones & Foster (1996)
+   ! The fact that this is elemental is not (yet) used
+
+   real(r8) :: denom
+
+   denom = ((1._r8 + x)**4) * (19._r8 + 3._r8 * x * (x - 6._r8))
+   lx =-64._r8 * (-2._r8 + x * (4._r8 + 3._r8 * x * (x - 2._r8))) / denom
+   mx = 240._r8 * (x - 1._r8)**2 / denom
+
+end subroutine boundary_correction
+
+!---------------------------------------------------------------------------
+
+function kde_pdf(x, p)
+   real(r8)                                   :: kde_pdf
+   real(r8),                       intent(in) :: x
+   type(distribution_params_type), intent(in) :: p
+
+   ! Returns the kernel estimate of the pdf evaluated at x.
+
+   integer  :: i
+   real(r8) :: u_lower, lx_lower, mx_lower
+   real(r8) :: u_upper, lx_upper, mx_upper
+
+   kde_pdf = 0._r8 ! Initialize
+   if (p%bounded_below .and. (x <= p%lower_bound)) return
+   if (p%bounded_above .and. (x >= p%upper_bound)) return
+   do i=1, p%ens_size ! Reduction loop - parallelizable
+      u_lower = 0._r8; lx_lower = 1._r8 ; mx_lower = 0._r8
+      u_upper = 0._r8; lx_upper = 1._r8 ; mx_upper = 0._r8
+      if (p%bounded_below) then ! Bounded below
+         u_lower = min( 1._r8, max( 0._r8, (x - p%lower_bound) / p%more_params(i) ) ) ! p%more_params(i) holds kernel width for ensemble member i
+         call boundary_correction(u_lower, lx_lower, mx_lower)
+      end if
+      if (p%bounded_above) then ! Bounded above
+         u_upper = min( 1._r8, max( 0._r8, (p%upper_bound - x) / p%more_params(i) ) )
+         call boundary_correction(u_upper, lx_upper, mx_upper)
+      end if
+      u_lower = (x - p%ens(i)) / p%more_params(i) ! Not u_lower any more, just (x-x_i)/h_i
+      kde_pdf = kde_pdf + (1._r8 / p%more_params(i)) * &
+                          (lx_lower + mx_lower * u_lower) * &
+                          (lx_upper - mx_upper * u_lower) * &
+                          epanechnikov_kernel( u_lower )
+   end do
+   kde_pdf = max(0._r8, kde_pdf) / (p%ens_size * p%more_params(p%ens_size + 1)) ! p%more_params(ens_size + 1) normalizes the pdf
+
+end function kde_pdf
+
+!-----------------------------------------------------------------------
+
+subroutine get_kde_bandwidths(ens_size, ens, bandwidths)
+   integer,         intent(in)   :: ens_size
+   real(r8),        intent(in)   :: ens(ens_size)
+   real(r8),        intent(out)  :: bandwidths(ens_size)
+
+   real(r8)                      :: ens_mean, ens_sd, h0, g, d_max
+   real(r8), dimension(ens_size) :: f_tilde, d, lambda
+   integer                       :: i, j, k
+
+
+   d(:) = abs( ens(:) - ens(1) )
+   d_max = maxval(d(:))
+   if (d_max <= 0._r8) then
+      errstring = 'Bandwidth = 0 '
+      call error_handler(E_ERR, 'get_kde_bandwidths', errstring, source)
+   end if
+   ens_mean = sum(ens) / ens_size
+   ens_sd   = sqrt( sum( (ens - ens_mean)**2 ) / (ens_size - 1._r8) )
+   h0 = 2._r8 * ens_sd / (ens_size**0.2_r8) ! This would be the kernel width if the widths were not adaptive.
+                                            ! It would be better to use min(sd, iqr/1.34) but don't want to compute iqr
+   k = floor( sqrt( real(ens_size) ) ) ! distance to kth nearest neighbor used to set bandwidth
+   do i=1,ens_size
+      d(:) = sort( abs( ens(:) - ens(i) ) )
+      d_max = maxval(d(:))
+      where (d < 1.e-3_r8 * d_max) ! set small distances to zero
+         d = 0._r8
+      end where
+      j = 1
+      do while ((d(k+j) <= 0._r8) .and. (k+j < ens_size))
+         j = j + 1
+      end do
+      f_tilde(i) = 0.5_r8 * real(k+j-1, r8) / (real(ens_size, r8) * d(k+j)) ! Initial density estimate
+   end do
+   f_tilde(:) = f_tilde(:) / maxval(f_tilde(:)) ! Avoids overflow in the next line
+   g = product( f_tilde )**( 1._r8 / real(ens_size, r8) )
+   lambda(:) = sqrt( g / f_tilde(:) )
+   bandwidths(:) = h0 * lambda(:)
+   if (maxval(bandwidths(:)) <= 0._r8) then
+      write(*,*) 'Bandwidth breakdown'
+      write(*,*) h0, g, lambda(:), f_tilde(:), ens(:)
+   end if
+
+end subroutine get_kde_bandwidths
+
+!---------------------------------------------------------------------------
+
+function gq5(left, right, p) result(q)
+   real(r8)                                   :: q
+   real(r8),                       intent(in) :: left, right
+   type(distribution_params_type), intent(in) :: p
+
+   ! Uses 5-point Gauss quadrature to approximate \int_left^right l(s; y) p(s) ds
+   !  where p(x) is the prior pdf and l(x; y) is the likelihood. This only works
+   !  correctly if left >= lower_bound and right <= upper_bound. The result is
+   ! **Not Normalized**.
+
+   real(r8) :: y
+   real(r8) :: obs_param ! See likelihood function for interpretation
+   integer  :: obs_dist_type  ! See likelihood function for interpretation
+   real(r8) :: xi ! quadrature point
+   real(r8), save :: chi(5) = [-sqrt(5._r8 + 2._r8 * sqrt(10._r8 / 7._r8)) / 3._r8, &
+                               -sqrt(5._r8 - 2._r8 * sqrt(10._r8 / 7._r8)) / 3._r8, &
+                                0._r8, &
+                                sqrt(5._r8 - 2._r8 * sqrt(10._r8 / 7._r8)) / 3._r8, &
+                                sqrt(5._r8 + 2._r8 * sqrt(10._r8 / 7._r8)) / 3._r8] ! Gauss quadrature points
+   real(r8), save :: w(5)   = [(322._r8 - 13._r8 * sqrt(70._r8)) / 900._r8, &
+                               (322._r8 + 13._r8 * sqrt(70._r8)) / 900._r8, &
+                               128._r8/225._r8, &
+                               (322._r8 + 13._r8 * sqrt(70._r8)) / 900._r8, &
+                               (322._r8 - 13._r8 * sqrt(70._r8)) / 900._r8] ! GQ weights
+   real(r8) :: l ! value of the likelihood function
+   integer  :: k
+
+   ! Unpack obs info from param struct
+   y         = p%more_params(p%ens_size + 2)
+   obs_param = p%more_params(p%ens_size + 3)
+   obs_dist_type  = nint(p%more_params(p%ens_size + 4))
+
+   q = 0._r8
+   do k=1,5
+      xi = 0.5_r8 * ((right - left) * chi(k) + left + right)
+      if (obs_dist_type .eq. obs_dist_types%truncated_normal) then
+         l = likelihood_function(xi, y, obs_param, obs_dist_type, &
+            bounded_above=p%bounded_above, bounded_below=p%bounded_below, &
+            upper_bound=p%upper_bound, lower_bound=p%lower_bound)
+      else
+         l = likelihood_function(xi, y, obs_param, obs_dist_type)
+      end if
+      q  = q + 0.5_r8 * (right - left) * w(k) * kde_pdf(xi, p) * l
+   end do
+
+end function gq5
+
+!---------------------------------------------------------------------------
+
+function integrate_pdf(x, p) result(q)
+   real(r8)                                   :: q
+   real(r8),                       intent(in) :: x
+   type(distribution_params_type), intent(in) :: p
+
+   ! Uses quadrature to approximate \int_{-\infty}^x l(x; y) p(s) ds where
+   ! p(s) is the prior pdf and l(x; y) is the likelihood. The interval is
+   ! broken up into sub-intervals whose boundaries are either one of the bounds,
+   ! or the edge of support of one of the kernels, or the value of x. On each
+   ! sub-interval the integral is approximated using Gauss-Legendre quadrature
+   ! with 5 points. When the likelihood is flat and the boundaries are far
+   ! from the ensemble, the result is exact up to roundoff error.
+
+   real(r8) :: y
+   real(r8) :: obs_param ! See likelihood function for interpretation
+   integer  :: obs_dist_type  ! See likelihood function for interpretation
+   real(r8) :: edges(2*p%ens_size)
+   real(r8) :: left, right ! edges of current sub-interval, quadrature point
+   integer  :: i
+
+   ! Unpack obs info from param struct
+   y         = p%more_params(p%ens_size + 2)
+   obs_param = p%more_params(p%ens_size + 3)
+   obs_dist_type  = nint(p%more_params(p%ens_size + 4))
+
+   edges(1:p%ens_size)                = p%ens(1:p%ens_size) - p%more_params(1:p%ens_size)
+   edges(p%ens_size + 1:2*p%ens_size) = p%ens(1:p%ens_size) + p%more_params(1:p%ens_size)
+   edges(:) = sort(edges(:)) ! If bandwidths were constant we would not need to sort
+
+   ! If x is outside the support of the pdf then we can skip the quadrature.
+   left = edges(1)
+   if (p%bounded_below) left = max(left, p%lower_bound)
+   if (x <= left) then
+      q = 0._r8
+      return
+   end if
+
+   ! Important to use x > upper_bound here because I use
+   ! x = upper_bound to compute the normalization constant.
+   if ((p%bounded_above) .and. (x > p%upper_bound)) then
+      q = 1._r8
+      return
+   end if
+
+   ! If we haven't returned yet, then there is at least one subinterval.
+   i = 1
+   right = min(x, edges(2)) ! left was computed above
+   q = gq5(left, right, p)
+   do while ((x > right) .and. (i+1 < 2*p%ens_size))
+      i     = i + 1
+      left  = right
+      right = min(x, edges(i+1))
+      q     = q + gq5(left, right, p)
+   end do
+   ! Note that it is possible to have maxval(edges) < x < upper_bound,
+   ! but that last sub-interval from maxval(edges) to x has zero integral,
+   ! so it can be safely skipped.
+
+end function integrate_pdf
+
+!-----------------------------------------------------------------------
+
+subroutine pack_kde_params(ens_size, bounded_below, bounded_above, lower_bound, upper_bound, &
+   ens, y, obs_param, obs_dist_type, p)
+   integer,                        intent(in)  :: ens_size
+   logical,                        intent(in)  :: bounded_below,        bounded_above
+   real(r8),                       intent(in)  :: lower_bound,          upper_bound
+   real(r8),                       intent(in)  :: ens(ens_size)
+   real(r8),                       intent(in)  :: y
+   real(r8),                       intent(in)  :: obs_param
+   integer,                        intent(in)  :: obs_dist_type
+   type(distribution_params_type), intent(out) :: p
+
+   real(r8) :: bandwidths(ens_size)
+   real(r8) :: edge, edges(2*ens_size + 2)
+   integer  :: i
+
+   ! Set the fixed storage parameters in the distribution_params_type
+   p%ens_size = ens_size
+   p%distribution_type = KDE_DISTRIBUTION
+
+   ! Allocate space needed for the parameters
+   allocate(p%ens(1:ens_size), source=0._r8)
+   allocate(p%more_params(1:5*ens_size + 8), source=0._r8)
+
+   ! p%more_params(1:ens_size) are the kernel bandwidths
+   ! p%more_params(ens_size + 1) is the normalization constant for the pdf
+   ! p%more_params(ens_size + 2) is the observation value y; not used for prior
+   ! p%more_params(ens_size + 3) is the observation parameter (see likelihood for details); not used for prior
+   ! p%more_params(ens_size + 4) is the observation error distribution type. For prior set to uninformative
+   ! p%more_params(ens_size + 5:3*ens_size + 6) are the edges.
+   ! p%more_params(3*ens_size+7:5*ens_size+8) are the cdf evaluated at the edges.
+
+   ! Save the ensemble values, sorted now so that they don't have to be re-sorted for the first guess
+   p%ens(:) = sort(ens(:))
+
+   ! Store the kernel bandwidths in the more_params array
+   ! Important to make sure that p%ens and p%more_params are sorted in same order by passing p%ens rather than ens
+   call get_kde_bandwidths(ens_size, p%ens, bandwidths)
+   p%more_params(1:ens_size) = bandwidths(:)
+
+   ! Pack obs information
+   p%more_params(ens_size + 2) = y
+   p%more_params(ens_size + 3) = obs_param
+   p%more_params(ens_size + 4) = real(obs_dist_type, r8) ! This is not ideal because it involves a type conversion from int to real
+
+  ! Get the edges of the subintervals on which the pdf is smooth
+   edges(1:ens_size)            = p%ens(:) - bandwidths(:)
+   edges(ens_size+1:2*ens_size) = p%ens(:) + bandwidths(:)
+   if (bounded_below) then
+      edges(2*ens_size+1) = lower_bound
+   else
+      edges(2*ens_size+1) = minval(edges(1:ens_size))
+   end if
+   if (bounded_above) then
+      edges(2*ens_size+2) = upper_bound
+   else
+      edges(2*ens_size+2) = maxval(edges(ens_size+1:2*ens_size))
+   end if
+   edges(:) = sort(edges(:))
+   p%more_params(ens_size+5:3*ens_size+6) = edges(:)
+
+   ! If the ensemble is sufficiently far from the boundary, that we can use the unbounded code, which is cheaper.
+   edge = minval(p%ens(:) - 2*bandwidths(:))
+   if (bounded_below .and. (edge > lower_bound)) then
+      p%bounded_below = .false.
+      p%lower_bound = minval(p%ens(:) - bandwidths(:))
+   else
+      p%bounded_below = bounded_below
+      p%lower_bound = lower_bound
+   end if
+
+   ! If the ensemble is sufficiently far from the boundary, that we can use the unbounded code, which is cheaper.
+   edge = maxval(p%ens(:) + 2*bandwidths(:))
+   if (bounded_above .and. (edge < upper_bound)) then
+      p%bounded_above = .false.
+      p%upper_bound = maxval(p%ens(:) + bandwidths(:))
+   else
+      p%bounded_above = bounded_above
+      p%upper_bound = upper_bound
+   end if
+
+   ! Integrate across all subintervals
+   p%more_params(ens_size+1) = 1._r8 ! Temporary value
+   p%more_params(3*ens_size+7) = 0._r8 ! 0 at left edge
+   do i=2,2*ens_size+2
+      if ((p%bounded_below .and. (edges(i) <= p%lower_bound)) .or. &
+          (edges(i) <= edges(i-1)) .or. &
+          (p%bounded_above .and. (edges(i) > p%upper_bound))) then
+         ! The subinterval [edges(i-1), edges(i)] is empty or outside the bounds
+         p%more_params(3*ens_size+6+i) = p%more_params(3*ens_size+6+i-1) + 0._r8
+      else
+         p%more_params(3*ens_size+6+i) = p%more_params(3*ens_size+6+i-1) + gq5(edges(i-1), edges(i), p)
+      end if
+   end do
+   p%more_params(ens_size+1) = maxval(p%more_params(3*ens_size+7:5*ens_size+8))
+   p%more_params(3*ens_size+7:5*ens_size+8) = p%more_params(3*ens_size+7:5*ens_size+8) / p%more_params(ens_size+1)
+
+end subroutine pack_kde_params
+
+!-----------------------------------------------------------------------
+
+function kde_cdf_params(x, p) result(quantile)
+   real(r8),                       intent(in) :: x
+   type(distribution_params_type), intent(in) :: p
+   real(r8)                                   :: quantile
+
+   ! Returns the cumulative distribution of the kernel density estimate
+   ! at the value x.
+
+   integer  :: i
+   logical  :: use_analytical_cdf
+   ! It is a waste of memory to unpack p%more_params, but it aids readability
+   real(r8) :: bandwidths(p%ens_size)
+   real(r8) :: y, obs_param
+   integer  :: obs_dist_type
+   real(r8), dimension(2*p%ens_size+2) :: edges, cdfs
+
+   bandwidths(:) = p%more_params(1:p%ens_size)
+   y             = p%more_params(p%ens_size + 2)
+   obs_param     = p%more_params(p%ens_size + 3)
+   obs_dist_type = nint(p%more_params(p%ens_size + 4))
+   edges(:)      = p%more_params(  p%ens_size+5:3*p%ens_size+6)
+   cdfs(:)       = p%more_params(3*p%ens_size+7:5*p%ens_size+8)
+
+   quantile = 0._r8
+   ! If the likelihood is uninformative and the distribution is unbounded, we can evaluate
+   ! the cdf analytically (instead of using quadrature) to save computation.
+   use_analytical_cdf = ( (.not.p%bounded_below) .and. (.not.p%bounded_above) .and. &
+                          (obs_dist_type .eq. obs_dist_types%uninformative) )
+   if (use_analytical_cdf) then
+      do i=1,p%ens_size
+         quantile = quantile + epanechnikov_cdf( (x - p%ens(i)) / bandwidths(i) ) / p%ens_size
+      end do
+   else ! Compute cdf using quadrature
+      !quantile  = min(1._r8, max(0._r8, integrate_pdf(x, p)))
+      if (x < edges(1)) then
+         return
+      end if
+      do i=2,2*p%ens_size+2
+         if ((edges(i-1) <= x) .and. (x <= edges(i))) then
+            quantile = min(1._r8, max(0._r8, cdfs(i-1) + gq5(edges(i-1), x, p)))
+            return
+         else
+            cycle
+         end if
+      end do
+      quantile = 1._r8
+   end if
+
+end function kde_cdf_params
+
+!---------------------------------------------------------------------------
+
+function kde_cdf(x, ens, ens_size, bounded_below, bounded_above, &
+   lower_bound, upper_bound, y, obs_param, obs_dist_type) result(quantile)
+   real(r8),                       intent(in)  :: x
+   integer,                        intent(in)  :: ens_size
+   real(r8),                       intent(in)  :: ens(ens_size)
+   logical,                        intent(in)  :: bounded_below,        bounded_above
+   real(r8),                       intent(in)  :: lower_bound,          upper_bound
+   real(r8),                       intent(in)  :: y
+   real(r8),                       intent(in)  :: obs_param
+   integer,                        intent(in)  :: obs_dist_type
+   real(r8)                                    :: quantile
+
+   ! Returns the cumulative distribution of the kernel density estimate
+   ! at the value x.
+
+   type(distribution_params_type)    :: p
+
+   call pack_kde_params(ens_size, bounded_below, bounded_above, lower_bound, upper_bound, &
+      ens, y, obs_param, obs_dist_type, p)
+   quantile = kde_cdf_params(x,p)
+
+end function kde_cdf
+
+!---------------------------------------------------------------------------
+
+function inv_kde_cdf(quantile, ens, ens_size, bounded_below, bounded_above, &
+   lower_bound, upper_bound, y, obs_param, obs_dist_type) result(x)
+   real(r8),                       intent(in)  :: quantile
+   integer,                        intent(in)  :: ens_size
+   real(r8),                       intent(in)  :: ens(ens_size)
+   logical,                        intent(in)  :: bounded_below,        bounded_above
+   real(r8),                       intent(in)  :: lower_bound,          upper_bound
+   real(r8),                       intent(in)  :: y
+   real(r8),                       intent(in)  :: obs_param
+   integer,                        intent(in)  :: obs_dist_type
+   real(r8)                                    :: x
+
+   ! Returns the value x such that cdf(x) = quantile.
+
+   type(distribution_params_type) :: p
+
+   call pack_kde_params(ens_size, bounded_below, bounded_above, lower_bound, upper_bound, &
+      ens, y, obs_param, obs_dist_type, p)
+
+   x = inv_cdf(quantile, kde_cdf_params, inv_kde_first_guess_params, p)
+
+end function inv_kde_cdf
+
+!---------------------------------------------------------------------------
+
+function inv_kde_cdf_params(quantile, p) result(x)
+   real(r8)                                   :: x
+   real(r8),                       intent(in) :: quantile
+   type(distribution_params_type), intent(in) :: p
+
+   ! Returns the value x such that cdf(x) = quantile.
+
+   x = inv_cdf(quantile, kde_cdf_params, inv_kde_first_guess_params, p)
+
+end function inv_kde_cdf_params
+
+!---------------------------------------------------------------------------
+
+function inv_kde_first_guess_params(quantile, p) result(x)
+   real(r8)                                   :: x
+   real(r8),                       intent(in) :: quantile
+   type(distribution_params_type), intent(in) :: p
+   real(r8) :: edge ! edge of support of the cdf
+
+   ! This first-guess subroutine evaluates the cdf at the ensemble members,
+   ! then finds a pair of ensemble members whose quantiles bracket the
+   ! target quantile, then sets the first guess to a convex combination of
+   ! these two ensemble members. If the target quantile is outside the
+   ! ensemble, the first guess is a combination of the nearest ensemble
+   ! member and the edge of the support of the pdf. If the
+   ! target quantile is 0 or 1, the appropriate bound is returned.
+
+   integer  :: i
+   real(r8) :: dq
+   real(r8), dimension(2*p%ens_size+2) :: edges, cdfs
+
+   edges(:)      = p%more_params(  p%ens_size+5:3*p%ens_size+6)
+   cdfs(:)       = p%more_params(3*p%ens_size+7:5*p%ens_size+8)
+
+   if (quantile <= 0._r8) then
+      x = minval(edges(:))
+      if (p%bounded_below) then
+         x = max(x, p%lower_bound)
+      end if
+      return
+   end if
+
+   if (quantile >= 1._r8) then
+      x = maxval(edges(:))
+      if (p%bounded_above) then
+         x = min(x, p%upper_bound)
+      end if
+      return
+   end if
+
+   do i=2,2*p%ens_size+2
+      if (quantile <= cdfs(i)) then
+         dq = cdfs(i) - cdfs(i-1)
+         x = (cdfs(i  ) - quantile) * edges(i-1) / dq &
+           + (quantile - cdfs(i-1)) * edges(i  ) / dq
+         return
+      end if
+   end do
+
+end function inv_kde_first_guess_params
+
+!-----------------------------------------------------------------------
+
+subroutine separate_ensemble(ens, ens_size, bounded_below, bounded_above, &
+   lower_bound, upper_bound, ens_interior, ens_size_interior, &
+   q_lower, q_int, q_upper)
+
+   ! Extracts the ensemble members that are not on the boundaries, and
+   ! also returns the fraction of ensemble members on each boundary and in
+   ! the interior.
+
+   integer,   intent(in) :: ens_size
+   real(r8),  intent(in) :: ens(ens_size)
+   logical,   intent(in) :: bounded_below, bounded_above
+   real(r8),  intent(in) :: lower_bound,   upper_bound
+   real(r8), intent(out) :: ens_interior(ens_size)
+   integer,  intent(out) :: ens_size_interior
+   real(r8), intent(out) :: q_lower, q_int, q_upper
+
+   ! local variables
+   integer  :: i, j
+   logical  :: is_interior(ens_size)
+   real(r8), parameter :: eps = 0._r8 !epsilon(1._r8)
+
+   ! Initialize
+   ens_interior(:) = missing_r8
+
+   q_lower = 0._r8
+   q_int   = 0._r8
+   q_upper = 0._r8
+   is_interior(:) = .true.
+   j = 0
+   do i=1,ens_size
+      if (bounded_below) then
+         if (ens(i) <= lower_bound + eps) then
+            q_lower        = q_lower + 1._r8
+            is_interior(i) = .false.
+         end if
+      end if
+      if (bounded_above) then
+         if (ens(i) >= upper_bound - eps) then
+            q_upper        = q_upper + 1._r8
+            is_interior(i) = .false.
+         end if
+      end if
+      if (is_interior(i)) then
+         j = j + 1
+         ens_interior(j) = ens(i)
+      end if
+   end do
+   ens_size_interior = ens_size - nint(q_lower + q_upper)
+   if (j .ne. ens_size_interior) then
+      errstring = 'Total number of interior ensemble members incorrect '
+      call error_handler(E_ERR, 'separate_ensemble', errstring, source)
+   end if
+   q_lower = q_lower / real(ens_size, r8)
+   q_upper = q_upper / real(ens_size, r8)
+   q_int   = 1._r8 - (q_lower + q_upper)
+
+end subroutine separate_ensemble
+
+!-----------------------------------------------------------------------
+
+subroutine test_kde
+   ! This routine provides limited tests of the numerics in this module. It tests
+   ! the boundary correction function, the likelihood, the bandwidth selection,
+   ! and the cdf inversion. It uses an ensemble [-1, 1]. It tests the bandwidth
+   ! selection and the cdf inversion with zero, one, or two bounds at [-2, 2].
+   ! Failing these tests suggests a serious problem. Passing them does not indicate
+   ! that there are acceptable results for all possible inputs.
+
+   integer,             parameter :: ens_size = 2
+   real(r8),  dimension(ens_size) :: ensemble, bandwidths, target_bandwidths
+   type(distribution_params_type) :: p
+   real(r8)                       :: x, y, inv, max_diff, lx, mx, like, obs_param
+   integer                        :: i, obs_dist_type
+
+   ! Test the boundary correction code against a Mathematica implementation
+   x = 0.5_r8
+   call boundary_correction(x, lx, mx)
+   write(*, *) '----------------------------'
+   write(*, *) 'test boundary correction'
+   write(*, *) 'abs difference in lx is ', abs(1.322997416020672_r8 - lx)
+   write(*, *) 'abs difference in mx is ', abs(1.102497846683893_r8 - mx)
+   write(*, *) 'abs differences should be less than 1e-15'
+
+   ! Test uninformative likelihood
+   max_diff = -1.0_r8
+   y = 1._r8
+   obs_param = 1._r8
+   obs_dist_type = obs_dist_types%uninformative
+   do i = 0, 1000
+      x = (real(i, r8) - 500.0_r8) / 500.0_r8
+      like = likelihood_function(x, y, obs_param, obs_dist_type)
+      max_diff = max(abs(1._r8 - like), max_diff)
+   end do
+   write(*, *) '----------------------------'
+   write(*, *) 'Uninformative likelihood test'
+   write(*, *) 'max difference in likelihood is ', max_diff
+   write(*, *) 'max difference should be less than 1e-15'
+
+   ! Test normal likelihood
+   x = 0.5_r8
+   y = 0._r8
+   obs_param = 1._r8
+   obs_dist_type = obs_dist_types%normal
+   like = likelihood_function(x, y, obs_param, obs_dist_type)
+   write(*, *) '----------------------------'
+   write(*, *) 'Normal likelihood test'
+   write(*, *) 'abs difference in likelihood is ', abs(0.8824969025845955_r8 - like)
+   write(*, *) 'abs difference should be less than 1e-15'
+
+   ! Test binomial obs distribution (beta likelihood)
+   x = 0.25_r8
+   y = 3._r8
+   obs_param = 5._r8
+   obs_dist_type = obs_dist_types%binomial
+   like = likelihood_function(x, y, obs_param, obs_dist_type)
+   write(*, *) '----------------------------'
+   write(*, *) 'Binomial obs distribution test'
+   write(*, *) 'abs difference in likelihood is ', abs(0.0087890625_r8 - like)
+   write(*, *) 'abs difference should be less than 1e-15'
+
+   ! Test gamma obs distribution (inverse gamma likelihood)
+   y = 1._r8
+   x = 2._r8
+   obs_param = 3._r8
+   obs_dist_type = obs_dist_types%gamma
+   like = likelihood_function(x, y, obs_param, obs_dist_type)
+   write(*, *) '----------------------------'
+   write(*, *) 'Gamma obs distribution test'
+   write(*, *) 'abs difference in likelihood is ', abs(0.4658368455179406_r8 - like)
+   write(*, *) 'abs difference should be less than 1e-15'
+
+   ! Test inverse gamma obs distribution (gamma likelihood)
+   y = 3._r8
+   x = 2._r8
+   obs_param = 4._r8
+   obs_dist_type = obs_dist_types%inv_gamma
+   like = likelihood_function(x, y, obs_param, obs_dist_type)
+   write(*, *) '----------------------------'
+   write(*, *) 'Inverse gamma obs distribution test'
+   write(*, *) 'abs difference in likelihood is ', abs(3.415489474106968_r8 - like)
+   write(*, *) 'abs difference should be less than 1e-15'
+
+   ! Test lognormal obs distribution (normal likelihood)
+   y = 1._r8
+   x = 2._r8
+   obs_param = 4._r8
+   obs_dist_type = obs_dist_types%lognormal
+   like = likelihood_function(x, y, obs_param, obs_dist_type)
+   write(*, *) '----------------------------'
+   write(*, *) 'Lognormal obs distribution test'
+   write(*, *) 'abs difference in likelihood is ', abs(0.6065306597126334_r8 - like)
+   write(*, *) 'abs difference should be less than 1e-15'
+
+   ! Test truncated-normal obs distribution
+   y = 1._r8
+   x = 2._r8
+   obs_param = 4._r8
+   obs_dist_type = obs_dist_types%truncated_normal
+   like = likelihood_function(x, y, obs_param, obs_dist_type, &
+         bounded_above=.true.,bounded_below=.true.,upper_bound=4._r8,lower_bound=0._r8)
+   write(*, *) '----------------------------'
+   write(*, *) 'Truncated-normal obs distribution test, doubly-bounded'
+   write(*, *) 'abs difference in likelihood is ', abs(1.292676850528392_r8 - like)
+   write(*, *) 'abs difference should be less than 1e-15'
+   like = likelihood_function(x, y, obs_param, obs_dist_type, &
+         bounded_above=.false.,bounded_below=.true.,upper_bound=4._r8,lower_bound=0._r8)
+   write(*, *) '----------------------------'
+   write(*, *) 'Truncated-normal obs distribution test, bounded below'
+   write(*, *) 'abs difference in likelihood is ', abs(1.048912359301403_r8 - like)
+   write(*, *) 'abs difference should be less than 1e-15'
+   like = likelihood_function(x, y, obs_param, obs_dist_type, &
+         bounded_above=.true.,bounded_below=.false.,upper_bound=4._r8,lower_bound=0._r8)
+   write(*, *) '----------------------------'
+   write(*, *) 'Truncated-normal obs distribution test, bounded above'
+   write(*, *) 'abs difference in likelihood is ', abs(1.048912359301403_r8 - like)
+   write(*, *) 'abs difference should be less than 1e-15'
+
+   ! Test bandwidth selection
+   target_bandwidths(:) = 2.462288826689833_r8 * [1._r8, 1._r8]
+
+   ensemble(:) = [-1._r8, 1._r8]
+
+   call get_kde_bandwidths(ens_size, ensemble, bandwidths)
+
+   ! Compare computed bandwidths to exact
+   write(*, *) '----------------------------'
+   write(*, *) 'kde bandwidths test: Absolute value of differences should be less than 1e-15'
+   do i = 1, ens_size
+      write(*, *) i, abs(bandwidths(i) - target_bandwidths(i))
+   end do
+
+   ! Test the inversion of the cdf over the support of the pdf, unbounded
+   write(*, *) '----------------------------'
+   write(*, *) 'Unbounded cdf/icdf test'
+   call pack_kde_params(ens_size, .false., .false., 0._r8, 0._r8, &
+      ensemble, 0._r8, 1._r8, obs_dist_types%uninformative, p)
+
+   max_diff = -1.0_r8
+   do i = 0, 1000
+      x = (real(i - 500, r8) / 500.0_r8) * (1._r8 + target_bandwidths(1))
+      y = kde_cdf_params(x, p)
+      inv = inv_kde_cdf_params(y, p)
+      ! write(*,*) 'Quantile: ', y, 'Exact x: ', x, 'Inverted x: ', inv, 'Error: ', abs(inv - x)
+      max_diff = max(abs(x-inv), max_diff)
+   end do
+
+   write(*, *) 'max difference in inversion is ', max_diff
+   write(*, *) 'Errors should be small, max around 1E-8'
+   ! The accuracy degrades near the boundary. The slope of the cdf goes to zero on the boundary
+   ! But at least the second derivative is nonzero. So near the boundary it behaves like a
+   ! double root, and the accuracy of the rootfinding degrades.
+
+   call deallocate_distribution_params(p)
+
+   ! Test the inversion of the cdf over the entire support of the pdf, bounded below
+   write(*, *) '----------------------------'
+   write(*, *) 'cdf/icdf test bounded below'
+   call pack_kde_params(ens_size, .true., .false., -2._r8, 0._r8, &
+      ensemble, 0._r8, 1._r8, obs_dist_types%uninformative, p)
+
+   max_diff = -1.0_r8
+   do i = 0, 1000
+      x = (real(i - 500, r8) / 500.0_r8) * (1._r8 + target_bandwidths(1))
+      if (x <= -2._r8) then
+         cycle
+      else
+         y = kde_cdf_params(x, p)
+         inv = inv_kde_cdf_params(y, p)
+         ! write(*,*) 'Quantile: ', y, 'Exact x: ', x, 'Inverted x: ', inv, 'Error: ', abs(inv - x)
+         max_diff = max(abs(x-inv), max_diff)
+      end if
+   end do
+
+   write(*, *) 'max difference in inversion is ', max_diff
+   write(*, *) 'Errors should be small, max below 1E-8'
+
+   call deallocate_distribution_params(p)
+
+
+   ! Test the inversion of the cdf over the entire support of the pdf, bounded above
+   write(*, *) '----------------------------'
+   write(*, *) 'cdf/icdf test bounded above'
+   call pack_kde_params(ens_size, .false., .true., 0._r8, 2._r8, &
+      ensemble, 0._r8, 1._r8, obs_dist_types%uninformative, p)
+
+   max_diff = -1.0_r8
+   do i = 0, 1000
+      x = ((real(i, r8) - 500.0_r8) / 500.0_r8) * (1._r8 + target_bandwidths(1))
+      if (x >= 2._r8) then
+         cycle
+      else
+         y = kde_cdf_params(x, p)
+         inv = inv_kde_cdf_params(y, p)
+         ! write(*,*) 'Quantile: ', y, 'Exact x: ', x, 'Inverted x: ', inv, 'Error: ', abs(inv - x)
+         max_diff = max(abs(x-inv), max_diff)
+      end if
+   end do
+
+   write(*, *) 'max difference in inversion is ', max_diff
+   write(*, *) 'Errors should be small, max below 1E-8'
+
+   call deallocate_distribution_params(p)
+
+   ! Test the inversion of the cdf over the entire support of the pdf, doubly bounded
+   write(*, *) '----------------------------'
+   write(*, *) 'cdf/icdf test doubly-bounded'
+   call pack_kde_params(ens_size, .true., .true., -2._r8, 2._r8, &
+      ensemble, 0._r8, 1._r8, obs_dist_types%uninformative, p)
+
+   max_diff = -1.0_r8
+   do i = 0, 1000
+      x = ((real(i, r8) - 500.0_r8) / 500.0_r8) * (1._r8 + target_bandwidths(1))
+      if ((x <= -2._r8) .or. (x >= 2._r8)) then
+         cycle
+      else
+         y = kde_cdf_params(x, p)
+         inv = inv_kde_cdf_params(y, p)
+         ! write(*,*) 'Quantile: ', y, 'Exact x: ', x, 'Inverted x: ', inv, 'Error: ', abs(inv - x)
+         max_diff = max(abs(x-inv), max_diff)
+      end if
+   end do
+
+   write(*, *) 'max difference in inversion is ', max_diff
+   write(*, *) 'Errors should be small, max below 1E-8'
+
+   call deallocate_distribution_params(p)
+
+   ! Test the quadrature: Construct a case with bounds, but where the bounds are
+   ! far enough from the data that they are not used. In this case the kernel
+   ! density estimate should integrate exactly to one.
+   call pack_kde_params(ens_size, .true., .true., -2._r8, 2._r8, &
+      ensemble, 0._r8, 1._r8, obs_dist_types%uninformative, p)
+   p%lower_bound = -20._r8
+   p%upper_bound =  20._r8
+   p%more_params(ens_size + 1) = 1._r8
+
+   y = integrate_pdf(0._r8, p)
+   write(*, *) '----------------------------'
+   write(*, *) 'test quadrature'
+   write(*, *) 'abs difference is ', abs(0.5_r8 - y)
+   write(*, *) 'abs difference should be less than 1e-15'
+
+   call deallocate_distribution_params(p)
+
+end subroutine test_kde
+
+!------------------------------------------------------------------------
+
+end module kde_distribution_mod
diff --git a/assimilation_code/modules/assimilation/probit_transform_mod.f90 b/assimilation_code/modules/assimilation/probit_transform_mod.f90
index f0ea534a27..03cf0401b3 100644
--- a/assimilation_code/modules/assimilation/probit_transform_mod.f90
+++ b/assimilation_code/modules/assimilation/probit_transform_mod.f90
@@ -18,7 +18,7 @@ module probit_transform_mod
                                     NORMAL_DISTRIBUTION, BOUNDED_NORMAL_RH_DISTRIBUTION, &
                                     GAMMA_DISTRIBUTION, BETA_DISTRIBUTION,               &
                                     LOG_NORMAL_DISTRIBUTION, UNIFORM_DISTRIBUTION,       &
-                                    PARTICLE_FILTER_DISTRIBUTION
+                                    PARTICLE_FILTER_DISTRIBUTION, KDE_DISTRIBUTION
 
 use normal_distribution_mod, only : normal_cdf, inv_std_normal_cdf
 
@@ -31,6 +31,9 @@ module probit_transform_mod
 use bnrh_distribution_mod,    only : bnrh_cdf_initialized_vector, bnrh_cdf_params, &
                                      inv_bnrh_cdf_params, get_bnrh_sd
 
+use kde_distribution_mod,  only : kde_cdf_params, inv_kde_cdf_params, pack_kde_params, &
+                                  obs_dist_types, separate_ensemble
+
 implicit none
 private
 
@@ -183,7 +186,9 @@ subroutine transform_to_probit(ens_size, state_ens_in, distribution_type, p, &
    endif
 !----------------------------------------------------------------------------------
 
-
+elseif(p%distribution_type == KDE_DISTRIBUTION) then
+   call to_probit_kde(ens_size, state_ens, p, probit_ens, &
+      use_input_p, bounded_below, bounded_above, lower_bound, upper_bound)
 !!!elseif(p%distribution_type == PARTICLE_FILTER_DISTRIBUTION) then
    !!!call to_probit_particle(ens_size, state_ens, p, probit_ens, use_input_p, &
        !!!bounded_below, bounded_above, lower_bound, upper_bound)
@@ -446,6 +451,101 @@ end subroutine to_probit_bounded_normal_rh
 !!!
 !------------------------------------------------------------------------
 
+subroutine to_probit_kde(ens_size, state_ens, p, probit_ens, use_input_p, &
+   bounded_below, bounded_above, lower_bound, upper_bound)
+
+   ! Transforms the values in state_ens. The transform is either defined
+   ! by state_ens (when use_input_p is false), or by p%ens (when use_input_p
+   ! is true). Handles the case where ensemble members are on the boundary.
+
+   integer,                           intent(in) :: ens_size
+   real(r8),                          intent(in) :: state_ens(ens_size)
+   type(distribution_params_type), intent(inout) :: p
+   real(r8),                         intent(out) :: probit_ens(ens_size)
+   logical,                           intent(in) :: use_input_p
+   logical,                           intent(in) :: bounded_below, bounded_above
+   real(r8),                          intent(in) :: lower_bound,   upper_bound
+
+   ! local variables
+   real(r8) :: quantile
+   integer  :: i
+   real(r8) :: y = 0._r8         ! Dummy value, not used
+   real(r8) :: obs_param = 1._r8 ! Dummy value, not used
+   real(r8) :: ens(ens_size)     ! Ensemble that defines the transform
+   real(r8) :: ens_interior(ens_size) ! Ensemble members that are not on the boundaries
+   integer  :: ens_size_interior ! Number of ensemble members that are not on the boundaries
+   type(distribution_params_type) :: p_interior
+   real(r8) :: d(ens_size), d_max
+   real(r8) :: p_lower, p_int, p_upper
+   real(r8), parameter :: eps = 0._r8 !epsilon(1._r8)
+
+   ! Get the ensemble that defines the transform
+   if(use_input_p) then
+      ens(:) = p%ens(:)
+   else
+      ! The input ensemble (state_ens) defines the transform.
+      ens(:) = state_ens(:)
+   endif
+
+   ! If all ensemble members that define the transform are identical, then we can't
+   ! really define the transform, so we simply set all probit values to 0, pack the
+   ! ensemble members into p, and return.
+   d(:) = abs( ens(:) - ens(1) )
+   d_max = maxval(d)
+   if(d_max .le. 0.0_r8) then
+      probit_ens(:) = 0._r8
+      if(.not. use_input_p) then
+         allocate(p%ens(1:ens_size))
+         p%ens(:) = ens(:)
+      endif
+      return
+   endif
+
+   ! If we reach this point then the ensemble members that define the transform are
+   ! not all identical, and if we are not using the input p then we need to define it.
+   if (.not. use_input_p) then
+      call pack_kde_params(ens_size, bounded_below, bounded_above, lower_bound, &
+         upper_bound, ens, y, obs_param, obs_dist_types%uninformative, p)
+   endif
+
+   ! Get mixture component probabilities and interior ensemble
+   call separate_ensemble(ens, ens_size, bounded_below, bounded_above, &
+      lower_bound, upper_bound, ens_interior, ens_size_interior, &
+      p_lower, p_int, p_upper)
+
+   ! Get parameters of the kde distribution for the interior
+   if (ens_size_interior .gt. 1) then
+      d(1:ens_size_interior) = abs( ens_interior(1:ens_size_interior) - ens_interior(1) )
+      d_max = maxval(d(1:ens_size_interior))
+      if (d_max .gt. 0._r8) then
+         call pack_kde_params(ens_size_interior, bounded_below, bounded_above, lower_bound, &
+            upper_bound, ens_interior(1:ens_size_interior), y, obs_param, &
+            obs_dist_types%uninformative, p_interior)
+      endif
+   else
+      d_max = 0._r8
+   endif
+   do i=1,ens_size
+      ! Map each state_ens member to a quantile using the prior cdf. Members
+      ! on the boundary map to p_lower and p_upper.
+      if (bounded_below .and. (state_ens(i) .le. lower_bound + eps)) then
+         quantile = p_lower
+      elseif (bounded_above .and. (state_ens(i) .ge. upper_bound - eps)) then
+         quantile = 1._r8 - p_upper
+      elseif ((ens_size_interior .le. 1) .or. (d_max .le. 0._r8)) then
+         ! Can't use kde with only one ensemble member, so assign to middle of interior range
+         quantile = (p_int * 0.5_r8) + p_lower
+      else ! Use the interior cdf obtained using kde.
+         quantile = kde_cdf_params(state_ens(i), p_interior)
+         quantile = (p_int * quantile) + p_lower
+      endif
+      ! Transform to probit/logit space
+      probit_ens(i) = probit_or_logit_transform(quantile)
+   end do
+
+end subroutine to_probit_kde
+
+
 subroutine transform_all_from_probit(ens_size, num_vars, probit_ens, p, state_ens)
 
 integer, intent(in)                  :: ens_size
@@ -493,6 +593,8 @@ subroutine transform_from_probit(ens_size, probit_ens, p, state_ens)
    call from_probit_bounded_normal_rh(ens_size, probit_ens, p, state_ens)
 !!!elseif(p%distribution_type == PARTICLE_FILTER_DISTRIBUTION) then
    !!!call from_probit_particle(ens_size, probit_ens, p, state_ens)
+elseif(p%distribution_type == KDE_DISTRIBUTION) then
+   call from_probit_kde(ens_size, probit_ens, p, state_ens)
 else
    write(errstring, *) 'Illegal distribution type', p%distribution_type
    call error_handler(E_ERR, 'transform_from_probit', errstring, source)
@@ -643,6 +745,81 @@ end subroutine from_probit_bounded_normal_rh
 
 !------------------------------------------------------------------------
 
+subroutine from_probit_kde(ens_size, probit_ens, p, state_ens)
+
+   ! Handles the case where ensemble members are on the boundary.
+
+   integer,                           intent(in) :: ens_size
+   real(r8),                          intent(in) :: probit_ens(ens_size)
+   type(distribution_params_type), intent(inout) :: p
+   real(r8),                         intent(out) :: state_ens(ens_size)
+
+   ! local variables
+   integer :: i
+   real(r8) :: quantile
+   real(r8) :: ens(ens_size)     ! Ensemble that defines the transform
+   real(r8) :: ens_interior(ens_size) ! Ensemble members that are not on the boundaries
+   integer  :: ens_size_interior ! Number of ensemble members that are not on the boundaries
+   type(distribution_params_type) :: p_interior
+   real(r8) :: p_lower, p_int, p_upper
+   real(r8) :: y, obs_param
+   real(r8) :: d(ens_size), d_max
+
+   ! The ensemble that defines the transform is contained in p, so unpack it
+   ens(:) = p%ens(:)
+
+   ! If all ensemble members are identical, then there is no update
+   d(:) = abs( ens(:) - ens(1) )
+   d_max = maxval(d)
+   if(d_max .le. 0.0_r8) then
+      state_ens(:) = ens(1)
+      return
+   endif
+   ! Get mixture components on the boundaries and interior ensemble
+   call separate_ensemble(ens, ens_size, p%bounded_below, p%bounded_above, &
+      p%lower_bound, p%upper_bound, ens_interior, ens_size_interior, &
+      p_lower, p_int, p_upper)
+
+   ! Get parameters of the kde distribution for the interior
+   if (ens_size_interior .gt. 1) then
+      d(1:ens_size_interior) = abs( ens_interior(1:ens_size_interior) - ens_interior(1) )
+      d_max = maxval(d(1:ens_size_interior))
+      if (d_max .gt. 0._r8) then
+         ! Unpack obs info from param struct
+         y         = p%more_params(p%ens_size + 2)
+         obs_param = p%more_params(p%ens_size + 3)
+         call pack_kde_params(ens_size_interior, p%bounded_below, p%bounded_above, p%lower_bound, &
+            p%upper_bound, ens_interior(1:ens_size_interior), y, obs_param, &
+            obs_dist_types%uninformative, p_interior)
+      endif
+   else
+      d_max = 0._r8
+   endif
+
+   ! Transform each probit ensemble member back to physical space
+   do i = 1, ens_size
+      ! First, invert the probit/logit to get quantiles
+      quantile = inv_probit_or_logit_transform(probit_ens(i))
+      ! Next invert the mixture CDF to get the physical space ensemble
+      if (p%bounded_below .and. (quantile .le. p_lower)) then
+         state_ens(i) = p%lower_bound
+      elseif (p%bounded_above .and. (quantile .ge. 1._r8-p_upper)) then
+         state_ens(i) = p%upper_bound
+      elseif ((ens_size_interior .eq. 1) .or. (d_max .le. 0._r8)) then
+         ! If there is only one interior ensemble member in the prior then any
+         ! ensemble member that moves into the interior just gets mapped to the
+         ! one interior value from the prior.
+         state_ens(i) = ens_interior(1)
+      else
+         quantile = (quantile - p_lower) / p_int
+         state_ens(i) = inv_kde_cdf_params(quantile, p_interior)
+      endif
+   end do
+
+end subroutine from_probit_kde
+
+!------------------------------------------------------------------------
+
 function probit_or_logit_transform(quantile)
 
 real(r8)             :: probit_or_logit_transform
diff --git a/assimilation_code/modules/assimilation/rootfinding_mod.f90 b/assimilation_code/modules/assimilation/rootfinding_mod.f90
new file mode 100644
index 0000000000..226637bb68
--- /dev/null
+++ b/assimilation_code/modules/assimilation/rootfinding_mod.f90
@@ -0,0 +1,267 @@
+module rootfinding_mod
+
+use types_mod,               only : r8
+
+use utilities_mod,           only : E_ERR, E_MSG, error_handler
+
+use distribution_params_mod, only : distribution_params_type, deallocate_distribution_params
+
+implicit none
+private
+
+public :: inv_cdf
+
+character(len=512)          :: errstring
+character(len=*), parameter :: source = 'rootfinding_mod.f90'
+
+contains
+
+!------------------------------------------------------------------------
+
+function inv_cdf(quantile_in, cdf, first_guess, p) result(x)
+
+   interface
+      function cdf(x, p)
+         use types_mod, only : r8
+         use distribution_params_mod, only : distribution_params_type
+         real(r8)                                   :: cdf
+         real(r8), intent(in)                       :: x
+         type(distribution_params_type), intent(in) :: p
+      end function
+   end interface
+
+   interface
+      function first_guess(quantile, p)
+         use types_mod, only : r8
+         use distribution_params_mod, only : distribution_params_type
+         real(r8)                                   :: first_guess
+         real(r8), intent(in)                       :: quantile
+         type(distribution_params_type), intent(in) :: p
+      end function
+   end interface
+
+   real(r8)                                   :: x
+   real(r8), intent(in)                       :: quantile_in
+   type(distribution_params_type), intent(in) :: p
+
+   ! Given a quantile q, finds the value of x for which cdf(x) is approximately q
+   ! Starts with Newton's method using an approximate gradient until either convergence
+   ! or overshoot. If it overshoots then it switches to the bisection-like ITP method
+   ! from Oliveira & Takahashi, ACM Trans Math Soft, 2020. Here f(x) = cdf(x) - quantile
+
+   ! Local variables:
+   integer,  parameter :: max_iterations = 50 ! Limit on the total number of iterations.
+   real(r8), parameter :: min_quantile = 0.0_r8,  max_quantile = 0.999999999999999_r8
+   real(r8), parameter :: tol = epsilon(1._r8)**(0.75_r8) ! Absolute on q, relative on x
+   real(r8) :: quantile, q_err
+   real(r8) :: delta_x, delta_f
+   real(r8) :: x_guess, q_guess, x0, x1, f0, f1
+   real(r8) :: a, b, fa, fb
+   integer  :: iter
+
+   real(r8) :: lower_bound,   upper_bound
+   logical  :: bounded_below, bounded_above
+
+   ! Extract the required information from the p type
+   bounded_below = p%bounded_below;   bounded_above = p%bounded_above
+   lower_bound   = p%lower_bound;     upper_bound   = p%upper_bound
+
+   quantile = quantile_in
+
+   ! Do a test for illegal values on the quantile
+   if(quantile <  0.0_r8 .or. quantile > 1.0_r8) then
+      write(errstring, *) 'Illegal Quantile input', quantile
+      call error_handler(E_ERR, 'inv_cdf', errstring, source)
+   endif
+
+   ! If the distribution is bounded, quantiles at the limits have values at the bounds
+   if(bounded_below .and. quantile == 0.0_r8) then
+      x = lower_bound
+      return
+   endif
+   if(bounded_above .and. quantile == 1.0_r8) then
+      x = upper_bound
+      return
+   endif
+
+   ! If input quantiles are outside the numerically supported range, move them to the extremes
+   quantile = min(quantile, max_quantile)
+   ! code tests stably for many distributions with min_quantile of 0.0, could remove this
+   quantile = max(quantile, min_quantile)
+
+   ! Get first guess from functional approximation
+   x_guess = first_guess(quantile, p)
+
+   ! Evaluate the cdf
+   q_guess = cdf(x_guess, p)
+
+   ! Check if first guess is close enough
+   q_err = quantile - q_guess
+   if (abs(q_err) .le. tol) then
+      x = x_guess
+      return
+   end if
+
+   ! If bounded below and target quantile is between 0 and q_guess, go to ITP
+   if (bounded_below .and. (quantile .lt. q_guess)) then
+      a  = lower_bound
+      b  = x_guess
+      fa = 0._r8 - quantile
+      fb = q_guess - quantile
+      x  = inv_cdf_ITP(cdf, quantile, a, b, fa, fb, max_iterations, p)
+      return
+   end if
+
+   ! If bounded above and target quantile is between q_guess and 1, go to ITP
+   if (bounded_above .and. (q_guess .lt. quantile)) then
+      a  = x_guess
+      b  = upper_bound
+      fa = q_guess
+      fb = 1._r8
+      x  = inv_cdf_ITP(cdf, quantile, a, b, fa, fb, max_iterations, p)
+      return
+   end if
+
+   ! If we reach this line, then we can't yet bracket the true value of x, so we start
+   ! doing steps of the secant method, either until convergence or until we bracket
+   ! the root. If we bracket the root then we finish using the ITP method.
+   x0 = x_guess
+   f0 = q_guess - quantile
+   delta_x = max(1e-2_r8, 1e-2_r8 * abs(x_guess))
+   if (f0 .gt. 0._r8) then
+      delta_x = -delta_x
+   end if
+   do iter=1,7
+      x1 = x_guess + delta_x
+      if(bounded_below .and. (x1 .lt. lower_bound)) x1 = lower_bound
+      if(bounded_above .and. (x1 .gt. upper_bound)) x1 = upper_bound
+      f1 = cdf(x1, p) - quantile
+      delta_f = abs(f1 - f0)
+      if (delta_f .le. 0._r8) then
+         delta_x = 2._r8 * delta_x
+      else
+         exit
+      end if
+   end do
+   do iter = 1, max_iterations
+      ! If f0 * f1 < 0 then x0 and x1 bracket the root, so go to ITP
+      if (f0 * f1 .lt. 0._r8 ) then
+         a  = min(x0, x1)
+         b  = max(x0, x1)
+         fa = min(f0, f1)
+         fb = max(f0, f1)
+         x = inv_cdf_ITP(cdf, quantile, a, b, fa, fb, max_iterations - iter + 1, p)
+         return
+      else ! Do a secant step
+         delta_f = f1 - f0
+         if (abs(delta_f) .eq. 0._r8) then ! Stop. Failed step: Flat CDF
+            write(errstring, *)  'Failed to converge; flat cdf for quantile ', quantile
+            write(*,*) p%bounded_below, p%bounded_above
+            write(*,*) 'More params:', p%more_params(:)
+            write(*,*) 'iter, x0, x1, f0, f1:', iter, x0, x1, f0, f1
+            write(*,*) 'ens:', p%ens(:)
+            call error_handler(E_MSG, 'rootfinding_mod:inv_cdf', errstring, source)
+            x = x1
+            return
+         else
+            x_guess = x1 - f1 * delta_x / delta_f
+            if (bounded_above .and. (x_guess .gt. upper_bound)) x_guess = upper_bound
+            if (bounded_below .and. (x_guess .lt. lower_bound)) x_guess = lower_bound
+            x0 = x1
+            f0 = f1
+            x1 = x_guess
+            delta_x = x1 - x0
+            f1 = cdf(x1, p) - quantile
+            x = x1
+         end if
+      end if
+
+      ! Finished secant step. Check for convergence.
+      if (abs(delta_x) .le. tol * max(abs(x0), abs(x1)) .or. (abs(f1) .le. tol)) then
+         return
+      endif
+   end do
+
+   ! For now, have switched a failed convergence to return the latest guess
+   ! This has implications for stability of probit algorithms that require further study
+   ! Not currently happening for any of the test cases on gfortran
+   write(errstring, *)  'Failed to converge for quantile ', quantile
+   call error_handler(E_MSG, 'inv_cdf', errstring, source)
+   !!!call error_handler(E_ERR, 'inv_cdf', errstring, source)
+
+end function inv_cdf
+
+!-----------------------------------------------------------------------
+
+function inv_cdf_ITP(cdf, quantile, a, b, fa, fb, max_iterations, p) result(x)
+   interface
+      function cdf(x, p)
+         use types_mod, only : r8
+         use distribution_params_mod, only : distribution_params_type
+         real(r8)                                   :: cdf
+         real(r8), intent(in)                       :: x
+         type(distribution_params_type), intent(in) :: p
+      end function
+   end interface
+
+   real(r8),                       intent(in) :: quantile
+   real(r8),                    intent(inout) :: a, b, fa, fb
+   integer,                        intent(in) :: max_iterations
+   type(distribution_params_type), intent(in) :: p
+   real(r8)                                   :: x
+
+   ! Given a quantile q, finds the value of x for which cdf(x) is approximately q
+   ! Uses the bisection-like ITP method from Oliveira & Takahashi, ACM Trans Math
+   ! Soft, 2020. Here f(x) = cdf(x) - quantile. Assumes f(a) < 0 and f(b) > 0 on
+   ! input.
+
+   ! Local variables:
+   integer,  parameter :: n0 = 1._r8
+   real(r8), parameter :: kappa2 = 2._r8
+   real(r8), parameter :: phi = 0.5_r8 * (1._r8 + sqrt(5._r8)) ! Golden ratio
+   real(r8), parameter :: tol = epsilon(1._r8)**(0.75_r8) ! abs on f, rel on x
+   real(r8) :: kappa1
+   real(r8) :: delta
+   real(r8) :: x_t, x_f, x_half, f_ITP
+   real(r8) :: eps, r
+   integer  :: i, n_max, n_half
+
+   kappa1 = 0.2_r8 / (b - a)
+   eps = tol * max(abs(a), abs(b))
+   ! Max number of iterations is either (i) input max, or (ii) number of bisection
+   ! iterations (plus n0) needed to achieve the desired relative tolerance on x.
+   n_half = max(0, ceiling(log(0.5_r8 * (b - a) / eps) / log(2._r8)))
+   n_max = min(max_iterations, n0 + n_half)
+
+   do i=1,n_max
+      x_half = 0.5_r8 * (a + b) ! Bisection guess
+      x_f    = (b* fa - a * fb) / (fa - fb) ! Secant/Regula-Falsi guess
+      delta  = kappa1 * abs(b - a)**kappa2
+      if (delta .le. abs(x_half - x_f)) then
+         x_t = x_f + sign(delta, x_half - x_f)
+      else
+         x_t = x_half
+      end if
+      r = max(0._r8, eps * 2**(n_half - i) - 0.5_r8 * (b - a))
+      if (abs(x_t - x_half) .le. r) then
+         x = x_t
+      else
+         x = x_half - sign(r, x_half - x_f)
+      end if
+      f_ITP = cdf(x, p) - quantile
+      if (abs(f_ITP) .le. tol) then
+         return
+      elseif (f_ITP .gt. 0._r8) then
+         b  = x
+         fb = f_ITP
+      else
+         a  = x
+         fa = f_ITP
+      end if
+   end do
+
+end function inv_cdf_ITP
+
+!-----------------------------------------------------------------------
+
+end module rootfinding_mod
diff --git a/assimilation_code/programs/test_kde_dist/test_kde_dist.f90 b/assimilation_code/programs/test_kde_dist/test_kde_dist.f90
new file mode 100644
index 0000000000..b190390e1a
--- /dev/null
+++ b/assimilation_code/programs/test_kde_dist/test_kde_dist.f90
@@ -0,0 +1,8 @@
+program test_kde_mod
+   use  utilities_mod, only : initialize_utilities, finalize_utilities
+   use kde_distribution_mod, only : test_kde
+   call initialize_utilities()
+   call test_kde()
+   call finalize_utilities()
+
+end program test_kde_mod
\ No newline at end of file
diff --git a/assimilation_code/programs/test_kde_dist/test_kde_dist.rst b/assimilation_code/programs/test_kde_dist/test_kde_dist.rst
new file mode 100644
index 0000000000..47401fe34a
--- /dev/null
+++ b/assimilation_code/programs/test_kde_dist/test_kde_dist.rst
@@ -0,0 +1 @@
+documentation should be here

From d95b6a4127ece64d1e16688cf7e6544d84b6acd1 Mon Sep 17 00:00:00 2001
From: Ian Grooms <ian.grooms@colorado.edu>
Date: Fri, 12 Jul 2024 15:00:39 -0600
Subject: [PATCH 02/14] Change names

Fixes how the word 'quantile' is used in the code, along with
variable names. The output of the inverse cdf (aka the quantile
function) is a quantile. The input to the quantile function is
a probability.
---
 .../modules/assimilation/assim_tools_mod.f90  |  37 ++++---
 .../assimilation/probit_transform_mod.f90     |  30 ++---
 .../modules/assimilation/rootfinding_mod.f90  | 104 +++++++++---------
 3 files changed, 86 insertions(+), 85 deletions(-)

diff --git a/assimilation_code/modules/assimilation/assim_tools_mod.f90 b/assimilation_code/modules/assimilation/assim_tools_mod.f90
index 6054c0b9b9..af20abf925 100644
--- a/assimilation_code/modules/assimilation/assim_tools_mod.f90
+++ b/assimilation_code/modules/assimilation/assim_tools_mod.f90
@@ -1204,7 +1204,7 @@ subroutine obs_increment_kde(ens, ens_size, y, obs_param, obs_dist_type, &
 
    ! Applies a QCEF based on kernel density estimation & quadrature
 
-   real(r8) :: q
+   real(r8) :: u
    real(r8) :: p_lower_prior, p_int_prior, p_upper_prior
    real(r8) :: p_lower_post, p_int_post, p_upper_post
    real(r8) :: like_ens_mean ! ensemble mean of the likelihood
@@ -1225,8 +1225,8 @@ subroutine obs_increment_kde(ens, ens_size, y, obs_param, obs_dist_type, &
    ! this function and so we are not trying to make it task-count invariant.
    if(first_inc_ran_call) then
       call random_seed()
-      call random_number(q)
-      i = int(q * 1000)
+      call random_number(unif)
+      i = int(unif * 1000)
       call init_random_seq(inc_ran_seq, my_task_id() + i)
       first_inc_ran_call = .false.
    endif
@@ -1290,40 +1290,41 @@ subroutine obs_increment_kde(ens, ens_size, y, obs_param, obs_dist_type, &
    count_upper = 0
    unif = random_uniform(inc_ran_seq) / real(ens_size, r8)
    do i=1,ens_size
-      ! Map each ensemble member to a quantile using the prior cdf. Members
-      ! on the boundary get random quantiles.
+      ! Map each ensemble member to a probability u in [0,1] using the prior cdf. Members
+      ! on the boundary get random probabilities.
       if (bounded_below .and. (ens(i) .le. lower_bound)) then
          ! Rather than draw a random for each member on the boundary,
          ! I draw one random (above) then add 1/Nb to each subsequent value
-         q = unif + real(count_lower, r8) / real(ens_size, r8)
+         u = unif + real(count_lower, r8) / real(ens_size, r8)
          count_lower = count_lower + 1
-         write(*,*) 'lower boundary quantile ', q
+         write(*,*) 'lower boundary probability ', u
        elseif (bounded_above .and.  (ens(i) .ge. upper_bound)) then
          ! As above, I draw one random then add 1/Nb to each subsequent value
-         q = 1._r8 - (unif + real(count_upper, r8) / real(ens_size, r8))
+         u = 1._r8 - (unif + real(count_upper, r8) / real(ens_size, r8))
          count_upper = count_upper + 1
-         write(*,*) 'upper boundary quantile ', q
+         write(*,*) 'upper boundary probability ', u
        elseif ((ens_size_interior .eq. 1) .or. (d_max .le. 0._r8)) then
-         ! Can't use kde with only one ensemble member (or identical ensemble members), so assign a random quantile
-         q = (p_int_prior * random_uniform(inc_ran_seq)) + p_lower_prior
+         ! Can't use kde with only one ensemble member (or identical ensemble members), so assign a
+         ! random probability
+         u = (p_int_prior * random_uniform(inc_ran_seq)) + p_lower_prior
       else ! Use the interior cdf obtained using kde.
-         q = kde_cdf_params(ens(i), params_interior_prior)
-         q = (p_int_prior * q) + p_lower_prior
+         u = kde_cdf_params(ens(i), params_interior_prior)
+         u = (p_int_prior * u) + p_lower_prior
       end if
       ! Invert the posterior kde cdf to get the updated ensemble member, then
       ! subtract to get the increment.
-      if (q .le. p_lower_post) then ! Posterior value on the lower boundary
+      if (u .le. p_lower_post) then ! Posterior value on the lower boundary
          obs_inc(i) = lower_bound - ens(i)
-      elseif (q .ge. (1._r8 - p_upper_post)) then ! Posterior value on the upper boundary
+      elseif (u .ge. (1._r8 - p_upper_post)) then ! Posterior value on the upper boundary
          obs_inc(i) = upper_bound - ens(i)
       elseif ((ens_size_interior .eq. 1) .or. (d_max .le. 0._r8)) then
          ! posterior value in the interior, but there's only one prior ensemble member/value
          ! in the interior, so we just assign the posterior ensemble member equal to the prior one
          obs_inc(i) = ens_interior(1) - ens(i)
       else ! posterior value in the interior, can use kde
-         ! Rescale q to be between 0 and 1 before inverting interior cdf
-         q = (q - p_lower_post) / p_int_post
-         obs_inc(i) = inv_kde_cdf_params(q, params_interior_posterior)
+         ! Rescale u to be between 0 and 1 before inverting interior cdf
+         u = (u - p_lower_post) / p_int_post
+         obs_inc(i) = inv_kde_cdf_params(u, params_interior_posterior)
          obs_inc(i) = obs_inc(i) - ens(i)
       end if
    end do
diff --git a/assimilation_code/modules/assimilation/probit_transform_mod.f90 b/assimilation_code/modules/assimilation/probit_transform_mod.f90
index 03cf0401b3..c574e11680 100644
--- a/assimilation_code/modules/assimilation/probit_transform_mod.f90
+++ b/assimilation_code/modules/assimilation/probit_transform_mod.f90
@@ -467,7 +467,7 @@ subroutine to_probit_kde(ens_size, state_ens, p, probit_ens, use_input_p, &
    real(r8),                          intent(in) :: lower_bound,   upper_bound
 
    ! local variables
-   real(r8) :: quantile
+   real(r8) :: u
    integer  :: i
    real(r8) :: y = 0._r8         ! Dummy value, not used
    real(r8) :: obs_param = 1._r8 ! Dummy value, not used
@@ -526,21 +526,21 @@ subroutine to_probit_kde(ens_size, state_ens, p, probit_ens, use_input_p, &
       d_max = 0._r8
    endif
    do i=1,ens_size
-      ! Map each state_ens member to a quantile using the prior cdf. Members
+      ! Map each state_ens member to a probability u using the prior cdf. Members
       ! on the boundary map to p_lower and p_upper.
       if (bounded_below .and. (state_ens(i) .le. lower_bound + eps)) then
-         quantile = p_lower
+         u = p_lower
       elseif (bounded_above .and. (state_ens(i) .ge. upper_bound - eps)) then
-         quantile = 1._r8 - p_upper
+         u = 1._r8 - p_upper
       elseif ((ens_size_interior .le. 1) .or. (d_max .le. 0._r8)) then
          ! Can't use kde with only one ensemble member, so assign to middle of interior range
-         quantile = (p_int * 0.5_r8) + p_lower
+         u = (p_int * 0.5_r8) + p_lower
       else ! Use the interior cdf obtained using kde.
-         quantile = kde_cdf_params(state_ens(i), p_interior)
-         quantile = (p_int * quantile) + p_lower
+         u = kde_cdf_params(state_ens(i), p_interior)
+         u = (p_int * u) + p_lower
       endif
       ! Transform to probit/logit space
-      probit_ens(i) = probit_or_logit_transform(quantile)
+      probit_ens(i) = probit_or_logit_transform(u)
    end do
 
 end subroutine to_probit_kde
@@ -756,7 +756,7 @@ subroutine from_probit_kde(ens_size, probit_ens, p, state_ens)
 
    ! local variables
    integer :: i
-   real(r8) :: quantile
+   real(r8) :: u
    real(r8) :: ens(ens_size)     ! Ensemble that defines the transform
    real(r8) :: ens_interior(ens_size) ! Ensemble members that are not on the boundaries
    integer  :: ens_size_interior ! Number of ensemble members that are not on the boundaries
@@ -798,12 +798,12 @@ subroutine from_probit_kde(ens_size, probit_ens, p, state_ens)
 
    ! Transform each probit ensemble member back to physical space
    do i = 1, ens_size
-      ! First, invert the probit/logit to get quantiles
-      quantile = inv_probit_or_logit_transform(probit_ens(i))
+      ! First, invert the probit/logit to get probabilities u
+      u = inv_probit_or_logit_transform(probit_ens(i))
       ! Next invert the mixture CDF to get the physical space ensemble
-      if (p%bounded_below .and. (quantile .le. p_lower)) then
+      if (p%bounded_below .and. (u .le. p_lower)) then
          state_ens(i) = p%lower_bound
-      elseif (p%bounded_above .and. (quantile .ge. 1._r8-p_upper)) then
+      elseif (p%bounded_above .and. (u .ge. 1._r8-p_upper)) then
          state_ens(i) = p%upper_bound
       elseif ((ens_size_interior .eq. 1) .or. (d_max .le. 0._r8)) then
          ! If there is only one interior ensemble member in the prior then any
@@ -811,8 +811,8 @@ subroutine from_probit_kde(ens_size, probit_ens, p, state_ens)
          ! one interior value from the prior.
          state_ens(i) = ens_interior(1)
       else
-         quantile = (quantile - p_lower) / p_int
-         state_ens(i) = inv_kde_cdf_params(quantile, p_interior)
+         u = (u - p_lower) / p_int
+         state_ens(i) = inv_kde_cdf_params(u, p_interior)
       endif
    end do
 
diff --git a/assimilation_code/modules/assimilation/rootfinding_mod.f90 b/assimilation_code/modules/assimilation/rootfinding_mod.f90
index 226637bb68..715dece906 100644
--- a/assimilation_code/modules/assimilation/rootfinding_mod.f90
+++ b/assimilation_code/modules/assimilation/rootfinding_mod.f90
@@ -18,7 +18,7 @@ module rootfinding_mod
 
 !------------------------------------------------------------------------
 
-function inv_cdf(quantile_in, cdf, first_guess, p) result(x)
+function inv_cdf(cdf_in, cdf, first_guess, p) result(quantile)
 
    interface
       function cdf(x, p)
@@ -31,31 +31,32 @@ function cdf(x, p)
    end interface
 
    interface
-      function first_guess(quantile, p)
+      function first_guess(u, p)
          use types_mod, only : r8
          use distribution_params_mod, only : distribution_params_type
          real(r8)                                   :: first_guess
-         real(r8), intent(in)                       :: quantile
+         real(r8), intent(in)                       :: u
          type(distribution_params_type), intent(in) :: p
       end function
    end interface
 
-   real(r8)                                   :: x
-   real(r8), intent(in)                       :: quantile_in
+   real(r8)                                   :: quantile
+   real(r8), intent(in)                       :: cdf_in
    type(distribution_params_type), intent(in) :: p
 
-   ! Given a quantile q, finds the value of x for which cdf(x) is approximately q
-   ! Starts with Newton's method using an approximate gradient until either convergence
+   ! Given a probability cdf_in, finds the value of x (i.e. the quantile) for which cdf(x)
+   ! is approximately cdf_in = u.
+   ! Starts with Newton's method using an approximate derivative until either convergence
    ! or overshoot. If it overshoots then it switches to the bisection-like ITP method
-   ! from Oliveira & Takahashi, ACM Trans Math Soft, 2020. Here f(x) = cdf(x) - quantile
+   ! from Oliveira & Takahashi, ACM Trans Math Soft, 2020. Here f(x) = cdf(x) - cdf_in
 
    ! Local variables:
    integer,  parameter :: max_iterations = 50 ! Limit on the total number of iterations.
-   real(r8), parameter :: min_quantile = 0.0_r8,  max_quantile = 0.999999999999999_r8
+   real(r8), parameter :: min_probability = 0.0_r8,  max_probability = 0.999999999999999_r8
    real(r8), parameter :: tol = epsilon(1._r8)**(0.75_r8) ! Absolute on q, relative on x
-   real(r8) :: quantile, q_err
+   real(r8) :: u, u_err
    real(r8) :: delta_x, delta_f
-   real(r8) :: x_guess, q_guess, x0, x1, f0, f1
+   real(r8) :: x_guess, u_guess, x0, x1, f0, f1
    real(r8) :: a, b, fa, fb
    integer  :: iter
 
@@ -66,59 +67,59 @@ function first_guess(quantile, p)
    bounded_below = p%bounded_below;   bounded_above = p%bounded_above
    lower_bound   = p%lower_bound;     upper_bound   = p%upper_bound
 
-   quantile = quantile_in
+   u = cdf_in
 
-   ! Do a test for illegal values on the quantile
-   if(quantile <  0.0_r8 .or. quantile > 1.0_r8) then
-      write(errstring, *) 'Illegal Quantile input', quantile
+   ! Do a test for illegal values on the probability u
+   if(u <  0.0_r8 .or. u > 1.0_r8) then
+      write(errstring, *) 'Illegal cdf input', u
       call error_handler(E_ERR, 'inv_cdf', errstring, source)
    endif
 
-   ! If the distribution is bounded, quantiles at the limits have values at the bounds
-   if(bounded_below .and. quantile == 0.0_r8) then
-      x = lower_bound
+   ! If the distribution is bounded, probabilities of 0 and 1 have values at the bounds
+   if(bounded_below .and. u == 0.0_r8) then
+      quantile = lower_bound
       return
    endif
-   if(bounded_above .and. quantile == 1.0_r8) then
-      x = upper_bound
+   if(bounded_above .and. u == 1.0_r8) then
+      quantile = upper_bound
       return
    endif
 
-   ! If input quantiles are outside the numerically supported range, move them to the extremes
-   quantile = min(quantile, max_quantile)
-   ! code tests stably for many distributions with min_quantile of 0.0, could remove this
-   quantile = max(quantile, min_quantile)
+   ! If input probabilities are outside the numerically supported range, move them to the extremes
+   u = min(u, max_probability)
+   ! code tests stably for many distributions with min_probability of 0.0, could remove this
+   u = max(u, min_probability)
 
    ! Get first guess from functional approximation
-   x_guess = first_guess(quantile, p)
+   x_guess = first_guess(u, p)
 
    ! Evaluate the cdf
-   q_guess = cdf(x_guess, p)
+   u_guess = cdf(x_guess, p)
 
    ! Check if first guess is close enough
-   q_err = quantile - q_guess
-   if (abs(q_err) .le. tol) then
-      x = x_guess
+   u_err = u - u_guess
+   if (abs(u_err) .le. tol) then
+      quantile = x_guess
       return
    end if
 
-   ! If bounded below and target quantile is between 0 and q_guess, go to ITP
-   if (bounded_below .and. (quantile .lt. q_guess)) then
+   ! If bounded below and target probability is between 0 and u_guess, go to ITP
+   if (bounded_below .and. (u .lt. u_guess)) then
       a  = lower_bound
       b  = x_guess
-      fa = 0._r8 - quantile
-      fb = q_guess - quantile
-      x  = inv_cdf_ITP(cdf, quantile, a, b, fa, fb, max_iterations, p)
+      fa = 0._r8 - u
+      fb = u_guess - u
+      quantile = inv_cdf_ITP(cdf, u, a, b, fa, fb, max_iterations, p)
       return
    end if
 
-   ! If bounded above and target quantile is between q_guess and 1, go to ITP
-   if (bounded_above .and. (q_guess .lt. quantile)) then
+   ! If bounded above and target probability is between u_guess and 1, go to ITP
+   if (bounded_above .and. (u_guess .lt. u)) then
       a  = x_guess
       b  = upper_bound
-      fa = q_guess
+      fa = u_guess
       fb = 1._r8
-      x  = inv_cdf_ITP(cdf, quantile, a, b, fa, fb, max_iterations, p)
+      quantile = inv_cdf_ITP(cdf, u, a, b, fa, fb, max_iterations, p)
       return
    end if
 
@@ -126,7 +127,7 @@ function first_guess(quantile, p)
    ! doing steps of the secant method, either until convergence or until we bracket
    ! the root. If we bracket the root then we finish using the ITP method.
    x0 = x_guess
-   f0 = q_guess - quantile
+   f0 = u_guess - u
    delta_x = max(1e-2_r8, 1e-2_r8 * abs(x_guess))
    if (f0 .gt. 0._r8) then
       delta_x = -delta_x
@@ -135,7 +136,7 @@ function first_guess(quantile, p)
       x1 = x_guess + delta_x
       if(bounded_below .and. (x1 .lt. lower_bound)) x1 = lower_bound
       if(bounded_above .and. (x1 .gt. upper_bound)) x1 = upper_bound
-      f1 = cdf(x1, p) - quantile
+      f1 = cdf(x1, p) - u
       delta_f = abs(f1 - f0)
       if (delta_f .le. 0._r8) then
          delta_x = 2._r8 * delta_x
@@ -150,18 +151,18 @@ function first_guess(quantile, p)
          b  = max(x0, x1)
          fa = min(f0, f1)
          fb = max(f0, f1)
-         x = inv_cdf_ITP(cdf, quantile, a, b, fa, fb, max_iterations - iter + 1, p)
+         quantile = inv_cdf_ITP(cdf, u, a, b, fa, fb, max_iterations - iter + 1, p)
          return
       else ! Do a secant step
          delta_f = f1 - f0
          if (abs(delta_f) .eq. 0._r8) then ! Stop. Failed step: Flat CDF
-            write(errstring, *)  'Failed to converge; flat cdf for quantile ', quantile
+            write(errstring, *)  'Failed to converge; flat cdf'
             write(*,*) p%bounded_below, p%bounded_above
             write(*,*) 'More params:', p%more_params(:)
             write(*,*) 'iter, x0, x1, f0, f1:', iter, x0, x1, f0, f1
             write(*,*) 'ens:', p%ens(:)
             call error_handler(E_MSG, 'rootfinding_mod:inv_cdf', errstring, source)
-            x = x1
+            quantile = x1
             return
          else
             x_guess = x1 - f1 * delta_x / delta_f
@@ -171,8 +172,8 @@ function first_guess(quantile, p)
             f0 = f1
             x1 = x_guess
             delta_x = x1 - x0
-            f1 = cdf(x1, p) - quantile
-            x = x1
+            f1 = cdf(x1, p) - u
+            quantile = x1
          end if
       end if
 
@@ -185,7 +186,7 @@ function first_guess(quantile, p)
    ! For now, have switched a failed convergence to return the latest guess
    ! This has implications for stability of probit algorithms that require further study
    ! Not currently happening for any of the test cases on gfortran
-   write(errstring, *)  'Failed to converge for quantile ', quantile
+   write(errstring, *)  'Failed to converge for probability ', u
    call error_handler(E_MSG, 'inv_cdf', errstring, source)
    !!!call error_handler(E_ERR, 'inv_cdf', errstring, source)
 
@@ -193,7 +194,7 @@ end function inv_cdf
 
 !-----------------------------------------------------------------------
 
-function inv_cdf_ITP(cdf, quantile, a, b, fa, fb, max_iterations, p) result(x)
+function inv_cdf_ITP(cdf, u, a, b, fa, fb, max_iterations, p) result(x)
    interface
       function cdf(x, p)
          use types_mod, only : r8
@@ -204,16 +205,15 @@ function cdf(x, p)
       end function
    end interface
 
-   real(r8),                       intent(in) :: quantile
+   real(r8),                       intent(in) :: u
    real(r8),                    intent(inout) :: a, b, fa, fb
    integer,                        intent(in) :: max_iterations
    type(distribution_params_type), intent(in) :: p
    real(r8)                                   :: x
 
-   ! Given a quantile q, finds the value of x for which cdf(x) is approximately q
+   ! Given a probability u, finds the value of x for which cdf(x) is approximately u
    ! Uses the bisection-like ITP method from Oliveira & Takahashi, ACM Trans Math
-   ! Soft, 2020. Here f(x) = cdf(x) - quantile. Assumes f(a) < 0 and f(b) > 0 on
-   ! input.
+   ! Soft, 2020. Here f(x) = cdf(x) - u. Assumes f(a) < 0 and f(b) > 0 on input.
 
    ! Local variables:
    integer,  parameter :: n0 = 1._r8
@@ -248,7 +248,7 @@ function cdf(x, p)
       else
          x = x_half - sign(r, x_half - x_f)
       end if
-      f_ITP = cdf(x, p) - quantile
+      f_ITP = cdf(x, p) - u
       if (abs(f_ITP) .le. tol) then
          return
       elseif (f_ITP .gt. 0._r8) then

From 11619e2cc22d5bb48fe3b3bf0c494d0006073779 Mon Sep 17 00:00:00 2001
From: Ian Grooms <ian.grooms@colorado.edu>
Date: Fri, 19 Jul 2024 15:47:39 -0600
Subject: [PATCH 03/14] Move KDE test

---
 .../test_kde_dist/test_kde_dist.f90           |  4 ++
 .../test_kde_dist/test_kde_dist.rst           |  0
 developer_tests/test_kde_dist/work/input.nml  |  5 +++
 .../test_kde_dist/work/quickbuild.sh          | 37 +++++++++++++++++++
 4 files changed, 46 insertions(+)
 rename {assimilation_code/programs => developer_tests}/test_kde_dist/test_kde_dist.f90 (55%)
 rename {assimilation_code/programs => developer_tests}/test_kde_dist/test_kde_dist.rst (100%)
 create mode 100644 developer_tests/test_kde_dist/work/input.nml
 create mode 100755 developer_tests/test_kde_dist/work/quickbuild.sh

diff --git a/assimilation_code/programs/test_kde_dist/test_kde_dist.f90 b/developer_tests/test_kde_dist/test_kde_dist.f90
similarity index 55%
rename from assimilation_code/programs/test_kde_dist/test_kde_dist.f90
rename to developer_tests/test_kde_dist/test_kde_dist.f90
index b190390e1a..ae071996cc 100644
--- a/assimilation_code/programs/test_kde_dist/test_kde_dist.f90
+++ b/developer_tests/test_kde_dist/test_kde_dist.f90
@@ -1,3 +1,7 @@
+! DART software - Copyright UCAR. This open source software is provided
+! by UCAR, "as is", without charge, subject to all terms of use at
+! http://www.image.ucar.edu/DAReS/DART/DART_download
+
 program test_kde_mod
    use  utilities_mod, only : initialize_utilities, finalize_utilities
    use kde_distribution_mod, only : test_kde
diff --git a/assimilation_code/programs/test_kde_dist/test_kde_dist.rst b/developer_tests/test_kde_dist/test_kde_dist.rst
similarity index 100%
rename from assimilation_code/programs/test_kde_dist/test_kde_dist.rst
rename to developer_tests/test_kde_dist/test_kde_dist.rst
diff --git a/developer_tests/test_kde_dist/work/input.nml b/developer_tests/test_kde_dist/work/input.nml
new file mode 100644
index 0000000000..0d3e6e2380
--- /dev/null
+++ b/developer_tests/test_kde_dist/work/input.nml
@@ -0,0 +1,5 @@
+&utilities_nml
+   TERMLEVEL = 1,
+   module_details = .false.
+   logfilename = 'dart_log.out'  
+   /
diff --git a/developer_tests/test_kde_dist/work/quickbuild.sh b/developer_tests/test_kde_dist/work/quickbuild.sh
new file mode 100755
index 0000000000..1a31852f1b
--- /dev/null
+++ b/developer_tests/test_kde_dist/work/quickbuild.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# DART software - Copyright UCAR. This open source software is provided
+# by UCAR, "as is", without charge, subject to all terms of use at
+# http://www.image.ucar.edu/DAReS/DART/DART_download
+
+main() {
+
+
+export DART=$(git rev-parse --show-toplevel)
+source "$DART"/build_templates/buildfunctions.sh
+
+MODEL="none"
+EXTRA="$DART"/models/template/threed_model_mod.f90
+dev_test=1
+LOCATION="threed_sphere"
+TEST="test_kde_dist"
+
+serial_programs=(
+test_kde_dist
+)
+
+# quickbuild arguments
+arguments "$@"
+
+# clean the directory
+\rm -f -- *.o *.mod Makefile .cppdefs
+
+# build 
+buildit
+
+# clean up
+\rm -f -- *.o *.mod
+
+}
+
+main "$@"

From 61ac931ef1b7f20fa599bfae02dffd61830bc8c7 Mon Sep 17 00:00:00 2001
From: Ian Grooms <ian.grooms@colorado.edu>
Date: Fri, 19 Jul 2024 17:06:21 -0600
Subject: [PATCH 04/14] Move obs_increment_kde

This commit moves the obs_increment_kde subroutine out of assim_tools
and into the kde distribution module. It also changes whitespace,
adds the copyright header, and fixes minor bugs in the probit mod.
---
 .../assimilation/algorithm_info_mod.f90       |   4 +-
 .../modules/assimilation/assim_tools_mod.f90  | 167 +----------------
 .../assimilation/distribution_params_mod.f90  |   2 +-
 .../assimilation/kde_distribution_mod.f90     | 172 +++++++++++++++++-
 .../modules/assimilation/rootfinding_mod.f90  |   8 +
 5 files changed, 187 insertions(+), 166 deletions(-)

diff --git a/assimilation_code/modules/assimilation/algorithm_info_mod.f90 b/assimilation_code/modules/assimilation/algorithm_info_mod.f90
index 1eb01207cd..ad095a205b 100644
--- a/assimilation_code/modules/assimilation/algorithm_info_mod.f90
+++ b/assimilation_code/modules/assimilation/algorithm_info_mod.f90
@@ -250,7 +250,7 @@ subroutine read_qceff_table(qceff_table_filename)
       case ('UNIFORM_DISTRIBUTION')
          qceff_table_data(row)%probit_state%dist_type = UNIFORM_DISTRIBUTION
       case ('KDE_DISTRIBUTION')
-         qceff_table_data(row)%probit_inflation%dist_type = KDE_DISTRIBUTION
+         qceff_table_data(row)%probit_state%dist_type = KDE_DISTRIBUTION
       !!!case ('PARTICLE_FILTER_DISTRIBUTION')
          !!!qceff_table_data(row)%probit_state%dist_type = PARTICLE_FILTER_DISTRIBUTION
       case default
@@ -274,7 +274,7 @@ subroutine read_qceff_table(qceff_table_filename)
       case ('UNIFORM_DISTRIBUTION')
          qceff_table_data(row)%probit_extended_state%dist_type = UNIFORM_DISTRIBUTION
       case ('KDE_DISTRIBUTION')
-         qceff_table_data(row)%probit_inflation%dist_type = KDE_DISTRIBUTION
+         qceff_table_data(row)%probit_extended_state%dist_type = KDE_DISTRIBUTION
       !!!case ('PARTICLE_FILTER_DISTRIBUTION')
          !!!qceff_table_data(row)%probit_extended_state%dist_type = PARTICLE_FILTER_DISTRIBUTION
       case default
diff --git a/assimilation_code/modules/assimilation/assim_tools_mod.f90 b/assimilation_code/modules/assimilation/assim_tools_mod.f90
index af20abf925..7c25290a45 100644
--- a/assimilation_code/modules/assimilation/assim_tools_mod.f90
+++ b/assimilation_code/modules/assimilation/assim_tools_mod.f90
@@ -86,7 +86,8 @@ module assim_tools_mod
 use bnrh_distribution_mod, only   :  inv_bnrh_cdf, bnrh_cdf, inv_bnrh_cdf_like
 
 use kde_distribution_mod, only : kde_cdf_params, inv_kde_cdf_params, obs_dist_types,      &
-                                 pack_kde_params, likelihood_function, separate_ensemble
+                                 pack_kde_params, likelihood_function, separate_ensemble, &
+                                 obs_increment_kde
 
 use distribution_params_mod, only : distribution_params_type, deallocate_distribution_params
                                
@@ -890,7 +891,7 @@ end subroutine filter_assim
 !-------------------------------------------------------------
 
 subroutine obs_increment(ens_in, ens_size, obs, obs_var, obs_kind, obs_inc, &
-   inflate, my_cov_inflate, my_cov_inflate_sd, net_a, obs_dist_type)
+   inflate, my_cov_inflate, my_cov_inflate_sd, net_a)
 
 ! Given the ensemble prior for an observation, the observation, and
 ! the observation error variance, computes increments and adjusts
@@ -903,7 +904,6 @@ subroutine obs_increment(ens_in, ens_size, obs, obs_var, obs_kind, obs_inc, &
 type(adaptive_inflate_type), intent(inout) :: inflate
 real(r8),                    intent(inout) :: my_cov_inflate, my_cov_inflate_sd
 real(r8),                    intent(out)   :: net_a
-integer, optional,           intent(in)    :: obs_dist_type
 
 real(r8) :: ens(ens_size), inflate_inc(ens_size)
 real(r8) :: prior_mean, prior_var, new_val(ens_size)
@@ -1031,24 +1031,8 @@ subroutine obs_increment(ens_in, ens_size, obs, obs_var, obs_kind, obs_inc, &
    !--------------------------------------------------------------------------
 
    else if(filter_kind == KERNEL_QCEF) then
-      ! obs_dist_type is an integer whose value is provided by kde_distribution_mod
-      ! Options are uninformative, normal, binomial, gamma,
-      ! inv_gamma, lognormal, and truncated_normal. Details can be found in the
-      ! definition of the likelihood function in kde_distribution_mod.f90. Different
-      ! types of observations have different interpretations for `obs_var'.
-      if (.not. present(obs_dist_type)) then
-!          call error_handler(E_ERR,'obs_increment', &
-!                  'For kde filter, must specify obs_dist_type', source)
-         ! IG: Hack. Assume that the obs dist type is truncated normal with the same
-         ! bounds as the observed variable.
-         call obs_increment_kde(ens, ens_size, obs, obs_var, &
-            obs_dist_types%truncated_normal, bounded_below, &
-            bounded_above, lower_bound, upper_bound, obs_inc)
-      else
-         call obs_increment_kde(ens, ens_size, obs, obs_var, obs_dist_type, &
-            bounded_below, bounded_above, lower_bound, upper_bound, obs_inc)
-      end if
-
+      call obs_increment_kde(ens, ens_size, obs, obs_var, bounded_below, &
+         bounded_above, lower_bound, upper_bound, obs_inc)
    else
        call error_handler(E_ERR,'obs_increment', &
               'Illegal value of filter_kind', source)
@@ -1190,147 +1174,6 @@ subroutine obs_increment_bounded_norm_rhf(ens, ens_like, ens_size, prior_var, &
 
 end subroutine obs_increment_bounded_norm_rhf
 
-
-subroutine obs_increment_kde(ens, ens_size, y, obs_param, obs_dist_type, &
-   bounded_below, bounded_above, lower_bound, upper_bound, obs_inc)
-   integer,  intent(in)  :: ens_size
-   real(r8), intent(in)  :: ens(ens_size)
-   real(r8), intent(in)  :: y
-   real(r8), intent(in)  :: obs_param
-   integer,  intent(in)  :: obs_dist_type
-   logical,  intent(in)  :: bounded_below, bounded_above
-   real(r8), intent(in)  :: lower_bound,   upper_bound
-   real(r8), intent(out) :: obs_inc(ens_size)
-
-   ! Applies a QCEF based on kernel density estimation & quadrature
-
-   real(r8) :: u
-   real(r8) :: p_lower_prior, p_int_prior, p_upper_prior
-   real(r8) :: p_lower_post, p_int_post, p_upper_post
-   real(r8) :: like_ens_mean ! ensemble mean of the likelihood
-   real(r8) :: ens_interior(ens_size)
-   real(r8) :: unif
-   real(r8) :: d(ens_size), d_max
-   type(distribution_params_type) :: params_interior_prior
-   type(distribution_params_type) :: params_interior_posterior
-   integer  :: ens_size_interior
-   integer  :: i, count_lower, count_upper
-
-   ! If this is first time through, need to initialize the random sequence.
-   ! This will reproduce exactly for multiple runs with the same task count,
-   ! but WILL NOT reproduce for a different number of MPI tasks.
-   ! To make it independent of the number of MPI tasks, it would need to
-   ! use the global ensemble number or something else that remains constant
-   ! as the processor count changes.  this is not currently an argument to
-   ! this function and so we are not trying to make it task-count invariant.
-   if(first_inc_ran_call) then
-      call random_seed()
-      call random_number(unif)
-      i = int(unif * 1000)
-      call init_random_seq(inc_ran_seq, my_task_id() + i)
-      first_inc_ran_call = .false.
-   endif
-
-   ! If all ensemble members are identical, then there is no update
-   d(:) = abs( ens(:) - ens(1) )
-   d_max = maxval(d)
-   if(d_max .le. 0.0_r8) then
-      obs_inc(:) = 0._r8
-      return
-   endif
-
-   ! Get mixture component probabilities for the prior, and the interior ensemble, and its params
-   call separate_ensemble(ens, ens_size, bounded_below, bounded_above, &
-      lower_bound, upper_bound, ens_interior, ens_size_interior, &
-      p_lower_prior, p_int_prior, p_upper_prior)
-   if (ens_size_interior .gt. 1) then
-      d(1:ens_size_interior) = abs( ens_interior(1:ens_size_interior) - ens_interior(1) )
-      d_max = maxval(d(1:ens_size_interior))
-      call pack_kde_params(ens_size_interior, bounded_below, bounded_above, lower_bound, upper_bound, &
-                           ens_interior, y, obs_param, obs_dist_types%uninformative, &
-                           params_interior_prior)
-      call pack_kde_params(ens_size_interior, bounded_below, bounded_above, lower_bound, upper_bound, &
-                           ens_interior, y, obs_param, obs_dist_type, &
-                           params_interior_posterior)
-   else
-      d_max = 0._r8
-   endif
-
-   ! Get mixture component probabilities for the posterior
-   if (bounded_below) then
-      p_lower_post = p_lower_prior * likelihood_function(lower_bound, y, obs_param, obs_dist_type)
-   else
-      p_lower_post  = 0._r8
-   end if
-   if (bounded_above) then
-      p_upper_post = p_upper_prior * likelihood_function(upper_bound, y, obs_param, obs_dist_type)
-   else
-      p_upper_post  = 0._r8
-   end if
-   p_int_post = 0._r8
-   do i=1,ens_size_interior
-      p_int_post = p_int_post + likelihood_function(ens_interior(i), y, obs_param, obs_dist_type)
-   end do
-   p_int_post = p_int_prior * p_int_post / real(ens_size_interior, r8)
-   ! Get prior ensemble mean of likelihood
-   like_ens_mean = p_lower_post + p_int_post + p_upper_post
-   ! If likelihood underflow, assume flat likelihood, so no increments
-   if(like_ens_mean .le. 0.0_r8) then
-      obs_inc(:) = 0.0_r8
-      return
-   else ! Finish getting mixture component probabilities for the posterior
-      p_lower_post = p_lower_post / like_ens_mean
-      p_upper_post = p_upper_post / like_ens_mean
-      p_int_post   = 1._r8 - p_lower_post - p_upper_post
-   end if
-
-   ! Update ensemble members -> get observation increments
-
-   count_lower = 0
-   count_upper = 0
-   unif = random_uniform(inc_ran_seq) / real(ens_size, r8)
-   do i=1,ens_size
-      ! Map each ensemble member to a probability u in [0,1] using the prior cdf. Members
-      ! on the boundary get random probabilities.
-      if (bounded_below .and. (ens(i) .le. lower_bound)) then
-         ! Rather than draw a random for each member on the boundary,
-         ! I draw one random (above) then add 1/Nb to each subsequent value
-         u = unif + real(count_lower, r8) / real(ens_size, r8)
-         count_lower = count_lower + 1
-         write(*,*) 'lower boundary probability ', u
-       elseif (bounded_above .and.  (ens(i) .ge. upper_bound)) then
-         ! As above, I draw one random then add 1/Nb to each subsequent value
-         u = 1._r8 - (unif + real(count_upper, r8) / real(ens_size, r8))
-         count_upper = count_upper + 1
-         write(*,*) 'upper boundary probability ', u
-       elseif ((ens_size_interior .eq. 1) .or. (d_max .le. 0._r8)) then
-         ! Can't use kde with only one ensemble member (or identical ensemble members), so assign a
-         ! random probability
-         u = (p_int_prior * random_uniform(inc_ran_seq)) + p_lower_prior
-      else ! Use the interior cdf obtained using kde.
-         u = kde_cdf_params(ens(i), params_interior_prior)
-         u = (p_int_prior * u) + p_lower_prior
-      end if
-      ! Invert the posterior kde cdf to get the updated ensemble member, then
-      ! subtract to get the increment.
-      if (u .le. p_lower_post) then ! Posterior value on the lower boundary
-         obs_inc(i) = lower_bound - ens(i)
-      elseif (u .ge. (1._r8 - p_upper_post)) then ! Posterior value on the upper boundary
-         obs_inc(i) = upper_bound - ens(i)
-      elseif ((ens_size_interior .eq. 1) .or. (d_max .le. 0._r8)) then
-         ! posterior value in the interior, but there's only one prior ensemble member/value
-         ! in the interior, so we just assign the posterior ensemble member equal to the prior one
-         obs_inc(i) = ens_interior(1) - ens(i)
-      else ! posterior value in the interior, can use kde
-         ! Rescale u to be between 0 and 1 before inverting interior cdf
-         u = (u - p_lower_post) / p_int_post
-         obs_inc(i) = inv_kde_cdf_params(u, params_interior_posterior)
-         obs_inc(i) = obs_inc(i) - ens(i)
-      end if
-   end do
-
-end subroutine obs_increment_kde
-
 ! Computes a normal or truncated normal (above and/or below) likelihood.
 function get_truncated_normal_like(x, obs, obs_var, &
    bounded_below, bounded_above, lower_bound, upper_bound)
diff --git a/assimilation_code/modules/assimilation/distribution_params_mod.f90 b/assimilation_code/modules/assimilation/distribution_params_mod.f90
index 43849f7fbd..0507810780 100644
--- a/assimilation_code/modules/assimilation/distribution_params_mod.f90
+++ b/assimilation_code/modules/assimilation/distribution_params_mod.f90
@@ -25,7 +25,7 @@ module distribution_params_mod
 integer, parameter :: LOG_NORMAL_DISTRIBUTION        = 5
 integer, parameter :: UNIFORM_DISTRIBUTION           = 6
 integer, parameter :: PARTICLE_FILTER_DISTRIBUTION   = 7
-integer, parameter :: KDE_DISTRIBUTION            = 8
+integer, parameter :: KDE_DISTRIBUTION               = 8
 
 public :: distribution_params_type, deallocate_distribution_params, &
    NORMAL_DISTRIBUTION, BOUNDED_NORMAL_RH_DISTRIBUTION, GAMMA_DISTRIBUTION, BETA_DISTRIBUTION, &
diff --git a/assimilation_code/modules/assimilation/kde_distribution_mod.f90 b/assimilation_code/modules/assimilation/kde_distribution_mod.f90
index 0b1ef1a042..7386b918b5 100644
--- a/assimilation_code/modules/assimilation/kde_distribution_mod.f90
+++ b/assimilation_code/modules/assimilation/kde_distribution_mod.f90
@@ -1,3 +1,11 @@
+! DART software - Copyright UCAR. This open source software is provided
+! by UCAR, "as is", without charge, subject to all terms of use at
+! http://www.image.ucar.edu/DAReS/DART/DART_download
+
+! This module originally written by I. Grooms and described (briefly)
+! in "A Quantile-Conserving Ensemble Filter Based on Kernel-Density Estimation"
+! by Grooms & Riedel (Remote Sensing, 2024; DOI:10.3390/rs16132377).
+
 module kde_distribution_mod
 
 use types_mod,               only : r8, missing_r8
@@ -6,6 +14,11 @@ module kde_distribution_mod
 
 use sort_mod,                only : sort
 
+use random_seq_mod,       only : random_seq_type, random_gaussian, init_random_seq, &
+                                 random_uniform
+
+use mpi_utilities_mod,    only : my_task_id
+
 use distribution_params_mod, only : distribution_params_type, deallocate_distribution_params, &
                                     KDE_DISTRIBUTION
 
@@ -18,7 +31,7 @@ module kde_distribution_mod
 
 public :: kde_cdf, kde_cdf_params, inv_kde_cdf, inv_kde_cdf_params,       &
           test_kde, obs_dist_types, likelihood_function, pack_kde_params, &
-          separate_ensemble
+          separate_ensemble, obs_increment_kde
 
 type available_obs_dist_types
    integer  :: uninformative, normal, binomial, gamma, &
@@ -31,6 +44,10 @@ module kde_distribution_mod
 character(len=512)          :: errstring
 character(len=*), parameter :: source = 'kde_distribution_mod.f90'
 
+! True if random sequence needs to be initialized
+logical                :: first_inc_ran_call = .true.
+type (random_seq_type) :: inc_ran_seq
+
 contains
 
 !---------------------------------------------------------------------------
@@ -745,6 +762,159 @@ end subroutine separate_ensemble
 
 !-----------------------------------------------------------------------
 
+subroutine obs_increment_kde(ens, ens_size, y, obs_param, bounded_below, &
+   bounded_above, lower_bound, upper_bound, obs_inc)
+   integer,  intent(in)  :: ens_size
+   real(r8), intent(in)  :: ens(ens_size)
+   real(r8), intent(in)  :: y
+   real(r8), intent(in)  :: obs_param
+   logical,  intent(in)  :: bounded_below, bounded_above
+   real(r8), intent(in)  :: lower_bound,   upper_bound
+   real(r8), intent(out) :: obs_inc(ens_size)
+
+   ! Applies a QCEF based on kernel density estimation & quadrature
+   ! Assumes a truncated normal likelihood whose bounds are the
+   ! same as the state variable. If the state variable is unbounded
+   ! then it uses a normal likelihood. Obviously this assumtion is
+   ! limiting. It could eventually be replaced with logic based on
+   ! obs_kind, which would be added as an input argument.
+
+   real(r8) :: u
+   real(r8) :: p_lower_prior, p_int_prior, p_upper_prior
+   real(r8) :: p_lower_post, p_int_post, p_upper_post
+   real(r8) :: like_ens_mean ! ensemble mean of the likelihood
+   real(r8) :: ens_interior(ens_size)
+   real(r8) :: unif
+   real(r8) :: d(ens_size), d_max
+   type(distribution_params_type) :: params_interior_prior
+   type(distribution_params_type) :: params_interior_posterior
+   integer  :: ens_size_interior
+   integer  :: i, count_lower, count_upper
+   integer  :: obs_dist_type
+
+   if (bounded_below .or. bounded_above) then
+      obs_dist_type = obs_dist_types%truncated_normal
+   else
+      obs_dist_type = obs_dist_types%normal
+   endif
+
+   ! If this is first time through, need to initialize the random sequence.
+   ! This will reproduce exactly for multiple runs with the same task count,
+   ! but WILL NOT reproduce for a different number of MPI tasks.
+   ! To make it independent of the number of MPI tasks, it would need to
+   ! use the global ensemble number or something else that remains constant
+   ! as the processor count changes.  this is not currently an argument to
+   ! this function and so we are not trying to make it task-count invariant.
+   if (first_inc_ran_call) then
+      call random_seed()
+      call random_number(unif)
+      i = int(unif * 1000)
+      call init_random_seq(inc_ran_seq, my_task_id() + i)
+      first_inc_ran_call = .false.
+   endif
+
+   ! If all ensemble members are identical, then there is no update
+   d(:) = abs( ens(:) - ens(1) )
+   d_max = maxval(d)
+   if(d_max .le. 0.0_r8) then
+      obs_inc(:) = 0._r8
+      return
+   endif
+
+   ! Get mixture component probabilities for the prior, and the interior ensemble, and its params
+   call separate_ensemble(ens, ens_size, bounded_below, bounded_above, &
+      lower_bound, upper_bound, ens_interior, ens_size_interior, &
+      p_lower_prior, p_int_prior, p_upper_prior)
+   if (ens_size_interior .gt. 1) then
+      d(1:ens_size_interior) = abs( ens_interior(1:ens_size_interior) - ens_interior(1) )
+      d_max = maxval(d(1:ens_size_interior))
+      call pack_kde_params(ens_size_interior, bounded_below, bounded_above, lower_bound, upper_bound, &
+                           ens_interior, y, obs_param, obs_dist_types%uninformative, &
+                           params_interior_prior)
+      call pack_kde_params(ens_size_interior, bounded_below, bounded_above, lower_bound, upper_bound, &
+                           ens_interior, y, obs_param, obs_dist_type, &
+                           params_interior_posterior)
+   else
+      d_max = 0._r8
+   endif
+
+   ! Get mixture component probabilities for the posterior
+   if (bounded_below) then
+      p_lower_post = p_lower_prior * likelihood_function(lower_bound, y, obs_param, obs_dist_type)
+   else
+      p_lower_post  = 0._r8
+   end if
+   if (bounded_above) then
+      p_upper_post = p_upper_prior * likelihood_function(upper_bound, y, obs_param, obs_dist_type)
+   else
+      p_upper_post  = 0._r8
+   end if
+   p_int_post = 0._r8
+   do i=1,ens_size_interior
+      p_int_post = p_int_post + likelihood_function(ens_interior(i), y, obs_param, obs_dist_type)
+   end do
+   p_int_post = p_int_prior * p_int_post / real(ens_size_interior, r8)
+   ! Get prior ensemble mean of likelihood
+   like_ens_mean = p_lower_post + p_int_post + p_upper_post
+   ! If likelihood underflow, assume flat likelihood, so no increments
+   if(like_ens_mean .le. 0.0_r8) then
+      obs_inc(:) = 0.0_r8
+      return
+   else ! Finish getting mixture component probabilities for the posterior
+      p_lower_post = p_lower_post / like_ens_mean
+      p_upper_post = p_upper_post / like_ens_mean
+      p_int_post   = 1._r8 - p_lower_post - p_upper_post
+   end if
+
+   ! Update ensemble members -> get observation increments
+
+   count_lower = 0
+   count_upper = 0
+   unif = random_uniform(inc_ran_seq) / real(ens_size, r8)
+   do i=1,ens_size
+      ! Map each ensemble member to a probability u in [0,1] using the prior cdf. Members
+      ! on the boundary get random probabilities.
+      if (bounded_below .and. (ens(i) .le. lower_bound)) then
+         ! Rather than draw a random for each member on the boundary,
+         ! I draw one random (above) then add 1/Nb to each subsequent value
+         u = unif + real(count_lower, r8) / real(ens_size, r8)
+         count_lower = count_lower + 1
+         write(*,*) 'lower boundary probability ', u
+       elseif (bounded_above .and.  (ens(i) .ge. upper_bound)) then
+         ! As above, I draw one random then add 1/Nb to each subsequent value
+         u = 1._r8 - (unif + real(count_upper, r8) / real(ens_size, r8))
+         count_upper = count_upper + 1
+         write(*,*) 'upper boundary probability ', u
+       elseif ((ens_size_interior .eq. 1) .or. (d_max .le. 0._r8)) then
+         ! Can't use kde with only one ensemble member (or identical ensemble members), so assign a
+         ! random probability
+         u = (p_int_prior * random_uniform(inc_ran_seq)) + p_lower_prior
+      else ! Use the interior cdf obtained using kde.
+         u = kde_cdf_params(ens(i), params_interior_prior)
+         u = (p_int_prior * u) + p_lower_prior
+      end if
+      ! Invert the posterior kde cdf to get the updated ensemble member, then
+      ! subtract to get the increment.
+      if (u .le. p_lower_post) then ! Posterior value on the lower boundary
+         obs_inc(i) = lower_bound - ens(i)
+      elseif (u .ge. (1._r8 - p_upper_post)) then ! Posterior value on the upper boundary
+         obs_inc(i) = upper_bound - ens(i)
+      elseif ((ens_size_interior .eq. 1) .or. (d_max .le. 0._r8)) then
+         ! posterior value in the interior, but there's only one prior ensemble member/value
+         ! in the interior, so we just assign the posterior ensemble member equal to the prior one
+         obs_inc(i) = ens_interior(1) - ens(i)
+      else ! posterior value in the interior, can use kde
+         ! Rescale u to be between 0 and 1 before inverting interior cdf
+         u = (u - p_lower_post) / p_int_post
+         obs_inc(i) = inv_kde_cdf_params(u, params_interior_posterior)
+         obs_inc(i) = obs_inc(i) - ens(i)
+      end if
+   end do
+
+end subroutine obs_increment_kde
+
+!-----------------------------------------------------------------------
+
 subroutine test_kde
    ! This routine provides limited tests of the numerics in this module. It tests
    ! the boundary correction function, the likelihood, the bandwidth selection,
diff --git a/assimilation_code/modules/assimilation/rootfinding_mod.f90 b/assimilation_code/modules/assimilation/rootfinding_mod.f90
index 715dece906..34c0a9cf75 100644
--- a/assimilation_code/modules/assimilation/rootfinding_mod.f90
+++ b/assimilation_code/modules/assimilation/rootfinding_mod.f90
@@ -1,3 +1,11 @@
+! DART software - Copyright UCAR. This open source software is provided
+! by UCAR, "as is", without charge, subject to all terms of use at
+! http://www.image.ucar.edu/DAReS/DART/DART_download
+
+! This module originally written by I. Grooms and described (briefly)
+! in "A Quantile-Conserving Ensemble Filter Based on Kernel-Density Estimation"
+! by Grooms & Riedel (Remote Sensing, 2024; DOI:10.3390/rs16132377).
+
 module rootfinding_mod
 
 use types_mod,               only : r8

From 212ff4a3fb6b230d73c70c5d9cfefa0e1181b29c Mon Sep 17 00:00:00 2001
From: Ian Grooms <ian.grooms@colorado.edu>
Date: Tue, 23 Jul 2024 16:18:56 -0600
Subject: [PATCH 05/14] Add KQCEF to docs

---
 guide/qceff_probit.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/guide/qceff_probit.rst b/guide/qceff_probit.rst
index dddd4fbf7f..e7bfa4ce72 100644
--- a/guide/qceff_probit.rst
+++ b/guide/qceff_probit.rst
@@ -119,6 +119,7 @@ Available filter kinds
    * UNBOUNDED_RHF
    * GAMMA_FILTER
    * BOUNDED_NORMAL_RHF
+   * KERNEL_QCEF
 
 .. _Distributions:
 
@@ -131,7 +132,7 @@ Available distributions
   * BETA_DISTRIBUTION
   * LOG_NORMAL_DISTRIBUTION
   * UNIFORM_DISTRIBUTION
-
+  * KDE_DISTRIBUTION
 
 
 .. _Default values:

From 6990fa999d89e5197f63ad74d0a630a503501e92 Mon Sep 17 00:00:00 2001
From: Ian Grooms <ian.grooms@colorado.edu>
Date: Wed, 24 Jul 2024 16:39:53 -0600
Subject: [PATCH 06/14] Error mesage cleanup

Based on suggestions by @hkershaw-brown, I changed some errors from
messages to fatal errors. Also replaced the case where the observation
distribution is truncated-normal and the observation error variance
is non-positive with a fatal error.
---
 .../modules/assimilation/assim_tools_mod.f90       |  2 +-
 .../modules/assimilation/kde_distribution_mod.f90  | 14 +++++---------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/assimilation_code/modules/assimilation/assim_tools_mod.f90 b/assimilation_code/modules/assimilation/assim_tools_mod.f90
index 7c25290a45..55a79b867c 100644
--- a/assimilation_code/modules/assimilation/assim_tools_mod.f90
+++ b/assimilation_code/modules/assimilation/assim_tools_mod.f90
@@ -1034,7 +1034,7 @@ subroutine obs_increment(ens_in, ens_size, obs, obs_var, obs_kind, obs_inc, &
       call obs_increment_kde(ens, ens_size, obs, obs_var, bounded_below, &
          bounded_above, lower_bound, upper_bound, obs_inc)
    else
-       call error_handler(E_ERR,'obs_increment', &
+      call error_handler(E_ERR,'obs_increment', &
               'Illegal value of filter_kind', source)
    endif
 endif
diff --git a/assimilation_code/modules/assimilation/kde_distribution_mod.f90 b/assimilation_code/modules/assimilation/kde_distribution_mod.f90
index 7386b918b5..402da5fd37 100644
--- a/assimilation_code/modules/assimilation/kde_distribution_mod.f90
+++ b/assimilation_code/modules/assimilation/kde_distribution_mod.f90
@@ -8,7 +8,7 @@
 
 module kde_distribution_mod
 
-use types_mod,               only : r8, missing_r8
+use types_mod,               only : r8, MISSING_R8
 
 use utilities_mod,           only : E_ERR, E_MSG, error_handler
 
@@ -82,7 +82,7 @@ function likelihood_function(x, y, obs_param, obs_dist_type, &
          ! For a normal obs distribution, like_param is the obs error variance
          if (obs_param <= 0._r8) then
             write(errstring, *) 'obs error sd <= 0', obs_param
-            call error_handler(E_MSG, 'kde_distribution_mod:likelihood', errstring, source)
+            call error_handler(E_ERR, 'kde_distribution_mod:likelihood', errstring, source)
          else
             l = exp( -0.5_r8 * (x - y)**2 / obs_param )
          end if
@@ -166,12 +166,8 @@ function likelihood_function(x, y, obs_param, obs_dist_type, &
          ! sigma * sqrt(2 * pi) is ignored.
          ! A zero observation error variance is a degenerate case
          if(obs_param <= 0.0_r8) then
-            if(x == y) then
-               l = 1.0_r8
-            else
-               l = 0.0_r8
-            endif
-            return
+            write(errstring, *) 'obs error sd <= 0', obs_param
+            call error_handler(E_ERR, 'kde_distribution_mod:likelihood', errstring, source)
          endif
 
          cdf = [0._r8, 1._r8]
@@ -193,7 +189,7 @@ function likelihood_function(x, y, obs_param, obs_dist_type, &
          l = exp( -0.5_r8 * (x - y)**2 / obs_param ) / (cdf(2) - cdf(1))
       case DEFAULT
          write(errstring, *) 'likelihood called with unrecognized obs_dist_type ', obs_dist_type
-         call error_handler(E_MSG, 'kde_distribution_mod:likelihood_function', errstring, source)
+         call error_handler(E_ERR, 'kde_distribution_mod:likelihood_function', errstring, source)
    end select
 
 end function likelihood_function

From 3d48c4fca5f3d8a57cb6293334d824afbd65b117 Mon Sep 17 00:00:00 2001
From: Ian Grooms <ian.grooms@colorado.edu>
Date: Wed, 24 Jul 2024 18:01:51 -0600
Subject: [PATCH 07/14] Code cleanup

Many minor changes following suggestions from @hkershaw-brown. I
also removed an unused parameter in rootfinding.mod
---
 .../assimilation/kde_distribution_mod.f90     | 128 ++++++++++++++----
 .../assimilation/probit_transform_mod.f90     |   5 +-
 .../modules/assimilation/rootfinding_mod.f90  |  45 +++---
 .../test_kde_dist/test_kde_dist.f90           |   2 +-
 4 files changed, 130 insertions(+), 50 deletions(-)

diff --git a/assimilation_code/modules/assimilation/kde_distribution_mod.f90 b/assimilation_code/modules/assimilation/kde_distribution_mod.f90
index 402da5fd37..7afa415c7b 100644
--- a/assimilation_code/modules/assimilation/kde_distribution_mod.f90
+++ b/assimilation_code/modules/assimilation/kde_distribution_mod.f90
@@ -118,7 +118,7 @@ function likelihood_function(x, y, obs_param, obs_dist_type, &
          else
             gamma_shape = 1._r8 / obs_param ! k
             gamma_scale = x * obs_param     ! theta
-            if (x .eq. 0._r8) then
+            if (x == 0._r8) then
                l = 0._r8 ! Technically x must be > 0, but just setting l=0 is convenient.
             else
                l = (y / gamma_scale)**gamma_shape * exp(-y / gamma_scale)
@@ -196,7 +196,7 @@ end function likelihood_function
 
 !---------------------------------------------------------------------------
 
-elemental function epanechnikov_kernel(x) ! The fact that this is elemental is not (yet) used
+elemental function epanechnikov_kernel(x)
    real(r8) :: epanechnikov_kernel
    real(r8), intent(in) :: x
    real(r8), parameter  :: norm_const = 3._r8 / 4._r8
@@ -206,7 +206,7 @@ end function epanechnikov_kernel
 
 !---------------------------------------------------------------------------
 
-elemental function epanechnikov_cdf(x) ! The fact that this is elemental is not (yet) used
+elemental function epanechnikov_cdf(x)
    real(r8) :: epanechnikov_cdf
    real(r8), intent(in) :: x
    real(r8), parameter  :: norm_const = 1._r8 / 4._r8
@@ -227,7 +227,6 @@ elemental subroutine boundary_correction(x, lx, mx)
    real(r8), intent(out) :: lx, mx
 
    ! Boundary correction for kde, from Jones (1993) and Jones & Foster (1996)
-   ! The fact that this is elemental is not (yet) used
 
    real(r8) :: denom
 
@@ -717,10 +716,6 @@ subroutine separate_ensemble(ens, ens_size, bounded_below, bounded_above, &
    ! local variables
    integer  :: i, j
    logical  :: is_interior(ens_size)
-   real(r8), parameter :: eps = 0._r8 !epsilon(1._r8)
-
-   ! Initialize
-   ens_interior(:) = missing_r8
 
    q_lower = 0._r8
    q_int   = 0._r8
@@ -729,13 +724,13 @@ subroutine separate_ensemble(ens, ens_size, bounded_below, bounded_above, &
    j = 0
    do i=1,ens_size
       if (bounded_below) then
-         if (ens(i) <= lower_bound + eps) then
+         if (ens(i) <= lower_bound) then
             q_lower        = q_lower + 1._r8
             is_interior(i) = .false.
          end if
       end if
       if (bounded_above) then
-         if (ens(i) >= upper_bound - eps) then
+         if (ens(i) >= upper_bound) then
             q_upper        = q_upper + 1._r8
             is_interior(i) = .false.
          end if
@@ -797,10 +792,6 @@ subroutine obs_increment_kde(ens, ens_size, y, obs_param, bounded_below, &
    ! If this is first time through, need to initialize the random sequence.
    ! This will reproduce exactly for multiple runs with the same task count,
    ! but WILL NOT reproduce for a different number of MPI tasks.
-   ! To make it independent of the number of MPI tasks, it would need to
-   ! use the global ensemble number or something else that remains constant
-   ! as the processor count changes.  this is not currently an argument to
-   ! this function and so we are not trying to make it task-count invariant.
    if (first_inc_ran_call) then
       call random_seed()
       call random_number(unif)
@@ -933,6 +924,11 @@ subroutine test_kde
    write(*, *) 'abs difference in lx is ', abs(1.322997416020672_r8 - lx)
    write(*, *) 'abs difference in mx is ', abs(1.102497846683893_r8 - mx)
    write(*, *) 'abs differences should be less than 1e-15'
+   if ((abs(1.322997416020672_r8 - lx) > 1E-15) .or. (abs(1.102497846683893_r8 - mx) > 1E-15)) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
 
    ! Test uninformative likelihood
    max_diff = -1.0_r8
@@ -948,6 +944,11 @@ subroutine test_kde
    write(*, *) 'Uninformative likelihood test'
    write(*, *) 'max difference in likelihood is ', max_diff
    write(*, *) 'max difference should be less than 1e-15'
+   if (max_diff > 1E-15_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
 
    ! Test normal likelihood
    x = 0.5_r8
@@ -955,10 +956,16 @@ subroutine test_kde
    obs_param = 1._r8
    obs_dist_type = obs_dist_types%normal
    like = likelihood_function(x, y, obs_param, obs_dist_type)
+   max_diff = abs(0.8824969025845955_r8 - like)
    write(*, *) '----------------------------'
    write(*, *) 'Normal likelihood test'
-   write(*, *) 'abs difference in likelihood is ', abs(0.8824969025845955_r8 - like)
+   write(*, *) 'abs difference in likelihood is ', max_diff
    write(*, *) 'abs difference should be less than 1e-15'
+   if (max_diff > 1E-15_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
 
    ! Test binomial obs distribution (beta likelihood)
    x = 0.25_r8
@@ -966,10 +973,16 @@ subroutine test_kde
    obs_param = 5._r8
    obs_dist_type = obs_dist_types%binomial
    like = likelihood_function(x, y, obs_param, obs_dist_type)
+   max_diff = abs(0.0087890625_r8 - like)
    write(*, *) '----------------------------'
    write(*, *) 'Binomial obs distribution test'
-   write(*, *) 'abs difference in likelihood is ', abs(0.0087890625_r8 - like)
+   write(*, *) 'abs difference in likelihood is ', max_diff
    write(*, *) 'abs difference should be less than 1e-15'
+   if (max_diff > 1E-15_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
 
    ! Test gamma obs distribution (inverse gamma likelihood)
    y = 1._r8
@@ -977,10 +990,16 @@ subroutine test_kde
    obs_param = 3._r8
    obs_dist_type = obs_dist_types%gamma
    like = likelihood_function(x, y, obs_param, obs_dist_type)
+   max_diff = abs(0.4658368455179406_r8 - like)
    write(*, *) '----------------------------'
    write(*, *) 'Gamma obs distribution test'
-   write(*, *) 'abs difference in likelihood is ', abs(0.4658368455179406_r8 - like)
+   write(*, *) 'abs difference in likelihood is ', max_diff
    write(*, *) 'abs difference should be less than 1e-15'
+   if (max_diff > 1E-15_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
 
    ! Test inverse gamma obs distribution (gamma likelihood)
    y = 3._r8
@@ -988,10 +1007,16 @@ subroutine test_kde
    obs_param = 4._r8
    obs_dist_type = obs_dist_types%inv_gamma
    like = likelihood_function(x, y, obs_param, obs_dist_type)
+   max_diff = abs(3.415489474106968_r8 - like)
    write(*, *) '----------------------------'
    write(*, *) 'Inverse gamma obs distribution test'
-   write(*, *) 'abs difference in likelihood is ', abs(3.415489474106968_r8 - like)
+   write(*, *) 'abs difference in likelihood is ', max_diff
    write(*, *) 'abs difference should be less than 1e-15'
+   if (max_diff > 1E-15_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
 
    ! Test lognormal obs distribution (normal likelihood)
    y = 1._r8
@@ -999,10 +1024,16 @@ subroutine test_kde
    obs_param = 4._r8
    obs_dist_type = obs_dist_types%lognormal
    like = likelihood_function(x, y, obs_param, obs_dist_type)
+   max_diff = abs(0.6065306597126334_r8 - like)
    write(*, *) '----------------------------'
    write(*, *) 'Lognormal obs distribution test'
-   write(*, *) 'abs difference in likelihood is ', abs(0.6065306597126334_r8 - like)
+   write(*, *) 'abs difference in likelihood is ', max_diff
    write(*, *) 'abs difference should be less than 1e-15'
+   if (max_diff > 1E-15_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
 
    ! Test truncated-normal obs distribution
    y = 1._r8
@@ -1011,22 +1042,40 @@ subroutine test_kde
    obs_dist_type = obs_dist_types%truncated_normal
    like = likelihood_function(x, y, obs_param, obs_dist_type, &
          bounded_above=.true.,bounded_below=.true.,upper_bound=4._r8,lower_bound=0._r8)
+   max_diff = abs(1.292676850528392_r8 - like)
    write(*, *) '----------------------------'
    write(*, *) 'Truncated-normal obs distribution test, doubly-bounded'
-   write(*, *) 'abs difference in likelihood is ', abs(1.292676850528392_r8 - like)
+   write(*, *) 'abs difference in likelihood is ', max_diff
    write(*, *) 'abs difference should be less than 1e-15'
+   if (max_diff > 1E-15_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
    like = likelihood_function(x, y, obs_param, obs_dist_type, &
          bounded_above=.false.,bounded_below=.true.,upper_bound=4._r8,lower_bound=0._r8)
+   max_diff = abs(1.048912359301403_r8 - like)
    write(*, *) '----------------------------'
    write(*, *) 'Truncated-normal obs distribution test, bounded below'
-   write(*, *) 'abs difference in likelihood is ', abs(1.048912359301403_r8 - like)
+   write(*, *) 'abs difference in likelihood is ', max_diff
    write(*, *) 'abs difference should be less than 1e-15'
+   if (max_diff > 1E-15_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
    like = likelihood_function(x, y, obs_param, obs_dist_type, &
          bounded_above=.true.,bounded_below=.false.,upper_bound=4._r8,lower_bound=0._r8)
+   max_diff = abs(1.048912359301403_r8 - like)
    write(*, *) '----------------------------'
    write(*, *) 'Truncated-normal obs distribution test, bounded above'
-   write(*, *) 'abs difference in likelihood is ', abs(1.048912359301403_r8 - like)
+   write(*, *) 'abs difference in likelihood is ', max_diff
    write(*, *) 'abs difference should be less than 1e-15'
+   if (max_diff > 1E-15_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
 
    ! Test bandwidth selection
    target_bandwidths(:) = 2.462288826689833_r8 * [1._r8, 1._r8]
@@ -1038,9 +1087,16 @@ subroutine test_kde
    ! Compare computed bandwidths to exact
    write(*, *) '----------------------------'
    write(*, *) 'kde bandwidths test: Absolute value of differences should be less than 1e-15'
+   max_diff = 0._r8
    do i = 1, ens_size
+      max_diff = max(abs(bandwidths(i) - target_bandwidths(i)), max_diff)
       write(*, *) i, abs(bandwidths(i) - target_bandwidths(i))
    end do
+   if (max_diff > 1E-15_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
 
    ! Test the inversion of the cdf over the support of the pdf, unbounded
    write(*, *) '----------------------------'
@@ -1062,6 +1118,11 @@ subroutine test_kde
    ! The accuracy degrades near the boundary. The slope of the cdf goes to zero on the boundary
    ! But at least the second derivative is nonzero. So near the boundary it behaves like a
    ! double root, and the accuracy of the rootfinding degrades.
+   if (max_diff > 1E-8_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
 
    call deallocate_distribution_params(p)
 
@@ -1086,6 +1147,11 @@ subroutine test_kde
 
    write(*, *) 'max difference in inversion is ', max_diff
    write(*, *) 'Errors should be small, max below 1E-8'
+   if (max_diff > 1E-8_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
 
    call deallocate_distribution_params(p)
 
@@ -1111,6 +1177,11 @@ subroutine test_kde
 
    write(*, *) 'max difference in inversion is ', max_diff
    write(*, *) 'Errors should be small, max below 1E-8'
+   if (max_diff > 1E-8_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
 
    call deallocate_distribution_params(p)
 
@@ -1135,6 +1206,11 @@ subroutine test_kde
 
    write(*, *) 'max difference in inversion is ', max_diff
    write(*, *) 'Errors should be small, max below 1E-8'
+   if (max_diff > 1E-8_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
 
    call deallocate_distribution_params(p)
 
@@ -1148,10 +1224,16 @@ subroutine test_kde
    p%more_params(ens_size + 1) = 1._r8
 
    y = integrate_pdf(0._r8, p)
+   max_diff = abs(0.5_r8 - y)
    write(*, *) '----------------------------'
    write(*, *) 'test quadrature'
-   write(*, *) 'abs difference is ', abs(0.5_r8 - y)
+   write(*, *) 'abs difference is ', max_diff
    write(*, *) 'abs difference should be less than 1e-15'
+   if (max_diff > 1E-15_r8) then
+      write(*, *) 'FAIL'
+   else
+      write(*, *) 'PASS'
+   endif
 
    call deallocate_distribution_params(p)
 
diff --git a/assimilation_code/modules/assimilation/probit_transform_mod.f90 b/assimilation_code/modules/assimilation/probit_transform_mod.f90
index c574e11680..c5645c15a1 100644
--- a/assimilation_code/modules/assimilation/probit_transform_mod.f90
+++ b/assimilation_code/modules/assimilation/probit_transform_mod.f90
@@ -477,7 +477,6 @@ subroutine to_probit_kde(ens_size, state_ens, p, probit_ens, use_input_p, &
    type(distribution_params_type) :: p_interior
    real(r8) :: d(ens_size), d_max
    real(r8) :: p_lower, p_int, p_upper
-   real(r8), parameter :: eps = 0._r8 !epsilon(1._r8)
 
    ! Get the ensemble that defines the transform
    if(use_input_p) then
@@ -528,9 +527,9 @@ subroutine to_probit_kde(ens_size, state_ens, p, probit_ens, use_input_p, &
    do i=1,ens_size
       ! Map each state_ens member to a probability u using the prior cdf. Members
       ! on the boundary map to p_lower and p_upper.
-      if (bounded_below .and. (state_ens(i) .le. lower_bound + eps)) then
+      if (bounded_below .and. (state_ens(i) .le. lower_bound)) then
          u = p_lower
-      elseif (bounded_above .and. (state_ens(i) .ge. upper_bound - eps)) then
+      elseif (bounded_above .and. (state_ens(i) .ge. upper_bound)) then
          u = 1._r8 - p_upper
       elseif ((ens_size_interior .le. 1) .or. (d_max .le. 0._r8)) then
          ! Can't use kde with only one ensemble member, so assign to middle of interior range
diff --git a/assimilation_code/modules/assimilation/rootfinding_mod.f90 b/assimilation_code/modules/assimilation/rootfinding_mod.f90
index 34c0a9cf75..b0db8a2c13 100644
--- a/assimilation_code/modules/assimilation/rootfinding_mod.f90
+++ b/assimilation_code/modules/assimilation/rootfinding_mod.f90
@@ -10,7 +10,7 @@ module rootfinding_mod
 
 use types_mod,               only : r8
 
-use utilities_mod,           only : E_ERR, E_MSG, error_handler
+use utilities_mod,           only : E_ERR, E_MSG, E_ALLMSG, error_handler
 
 use distribution_params_mod, only : distribution_params_type, deallocate_distribution_params
 
@@ -59,9 +59,9 @@ function first_guess(u, p)
    ! from Oliveira & Takahashi, ACM Trans Math Soft, 2020. Here f(x) = cdf(x) - cdf_in
 
    ! Local variables:
-   integer,  parameter :: max_iterations = 50 ! Limit on the total number of iterations.
-   real(r8), parameter :: min_probability = 0.0_r8,  max_probability = 0.999999999999999_r8
-   real(r8), parameter :: tol = epsilon(1._r8)**(0.75_r8) ! Absolute on q, relative on x
+   integer,  parameter :: MAX_ITERATIONS = 50 ! Limit on the total number of iterations.
+   real(r8), parameter :: MIN_PROBABILITY = 0.0_r8,  MAX_PROBABILITY = 0.999999999999999_r8
+   real(r8), parameter :: TOL = epsilon(1._r8)**(0.75_r8) ! Absolute on q, relative on x
    real(r8) :: u, u_err
    real(r8) :: delta_x, delta_f
    real(r8) :: x_guess, u_guess, x0, x1, f0, f1
@@ -94,9 +94,9 @@ function first_guess(u, p)
    endif
 
    ! If input probabilities are outside the numerically supported range, move them to the extremes
-   u = min(u, max_probability)
+   u = min(u, MAX_PROBABILITY)
    ! code tests stably for many distributions with min_probability of 0.0, could remove this
-   u = max(u, min_probability)
+   u = max(u, MIN_PROBABILITY)
 
    ! Get first guess from functional approximation
    x_guess = first_guess(u, p)
@@ -106,7 +106,7 @@ function first_guess(u, p)
 
    ! Check if first guess is close enough
    u_err = u - u_guess
-   if (abs(u_err) .le. tol) then
+   if (abs(u_err) .le. TOL) then
       quantile = x_guess
       return
    end if
@@ -117,7 +117,7 @@ function first_guess(u, p)
       b  = x_guess
       fa = 0._r8 - u
       fb = u_guess - u
-      quantile = inv_cdf_ITP(cdf, u, a, b, fa, fb, max_iterations, p)
+      quantile = inv_cdf_ITP(cdf, u, a, b, fa, fb, MAX_ITERATIONS, p)
       return
    end if
 
@@ -127,7 +127,7 @@ function first_guess(u, p)
       b  = upper_bound
       fa = u_guess
       fb = 1._r8
-      quantile = inv_cdf_ITP(cdf, u, a, b, fa, fb, max_iterations, p)
+      quantile = inv_cdf_ITP(cdf, u, a, b, fa, fb, MAX_ITERATIONS, p)
       return
    end if
 
@@ -152,14 +152,14 @@ function first_guess(u, p)
          exit
       end if
    end do
-   do iter = 1, max_iterations
+   do iter = 1, MAX_ITERATIONS
       ! If f0 * f1 < 0 then x0 and x1 bracket the root, so go to ITP
       if (f0 * f1 .lt. 0._r8 ) then
          a  = min(x0, x1)
          b  = max(x0, x1)
          fa = min(f0, f1)
          fb = max(f0, f1)
-         quantile = inv_cdf_ITP(cdf, u, a, b, fa, fb, max_iterations - iter + 1, p)
+         quantile = inv_cdf_ITP(cdf, u, a, b, fa, fb, MAX_ITERATIONS - iter + 1, p)
          return
       else ! Do a secant step
          delta_f = f1 - f0
@@ -169,7 +169,7 @@ function first_guess(u, p)
             write(*,*) 'More params:', p%more_params(:)
             write(*,*) 'iter, x0, x1, f0, f1:', iter, x0, x1, f0, f1
             write(*,*) 'ens:', p%ens(:)
-            call error_handler(E_MSG, 'rootfinding_mod:inv_cdf', errstring, source)
+            call error_handler(E_MSG, 'inv_cdf:', errstring, source)
             quantile = x1
             return
          else
@@ -186,7 +186,7 @@ function first_guess(u, p)
       end if
 
       ! Finished secant step. Check for convergence.
-      if (abs(delta_x) .le. tol * max(abs(x0), abs(x1)) .or. (abs(f1) .le. tol)) then
+      if (abs(delta_x) .le. TOL * max(abs(x0), abs(x1)) .or. (abs(f1) .le. TOL)) then
          return
       endif
    end do
@@ -195,7 +195,7 @@ function first_guess(u, p)
    ! This has implications for stability of probit algorithms that require further study
    ! Not currently happening for any of the test cases on gfortran
    write(errstring, *)  'Failed to converge for probability ', u
-   call error_handler(E_MSG, 'inv_cdf', errstring, source)
+   call error_handler(E_ALLMSG, 'inv_cdf', errstring, source)
    !!!call error_handler(E_ERR, 'inv_cdf', errstring, source)
 
 end function inv_cdf
@@ -224,10 +224,9 @@ function cdf(x, p)
    ! Soft, 2020. Here f(x) = cdf(x) - u. Assumes f(a) < 0 and f(b) > 0 on input.
 
    ! Local variables:
-   integer,  parameter :: n0 = 1._r8
-   real(r8), parameter :: kappa2 = 2._r8
-   real(r8), parameter :: phi = 0.5_r8 * (1._r8 + sqrt(5._r8)) ! Golden ratio
-   real(r8), parameter :: tol = epsilon(1._r8)**(0.75_r8) ! abs on f, rel on x
+   integer,  parameter :: N0 = 1._r8
+   real(r8), parameter :: KAPPA2 = 2._r8
+   real(r8), parameter :: TOL = epsilon(1._r8)**(0.75_r8) ! abs on f, rel on x
    real(r8) :: kappa1
    real(r8) :: delta
    real(r8) :: x_t, x_f, x_half, f_ITP
@@ -235,16 +234,16 @@ function cdf(x, p)
    integer  :: i, n_max, n_half
 
    kappa1 = 0.2_r8 / (b - a)
-   eps = tol * max(abs(a), abs(b))
+   eps = TOL * max(abs(a), abs(b))
    ! Max number of iterations is either (i) input max, or (ii) number of bisection
-   ! iterations (plus n0) needed to achieve the desired relative tolerance on x.
+   ! iterations (plus N0) needed to achieve the desired relative tolerance on x.
    n_half = max(0, ceiling(log(0.5_r8 * (b - a) / eps) / log(2._r8)))
-   n_max = min(max_iterations, n0 + n_half)
+   n_max = min(max_iterations, N0 + n_half)
 
    do i=1,n_max
       x_half = 0.5_r8 * (a + b) ! Bisection guess
       x_f    = (b* fa - a * fb) / (fa - fb) ! Secant/Regula-Falsi guess
-      delta  = kappa1 * abs(b - a)**kappa2
+      delta  = kappa1 * abs(b - a)**KAPPA2
       if (delta .le. abs(x_half - x_f)) then
          x_t = x_f + sign(delta, x_half - x_f)
       else
@@ -257,7 +256,7 @@ function cdf(x, p)
          x = x_half - sign(r, x_half - x_f)
       end if
       f_ITP = cdf(x, p) - u
-      if (abs(f_ITP) .le. tol) then
+      if (abs(f_ITP) .le. TOL) then
          return
       elseif (f_ITP .gt. 0._r8) then
          b  = x
diff --git a/developer_tests/test_kde_dist/test_kde_dist.f90 b/developer_tests/test_kde_dist/test_kde_dist.f90
index ae071996cc..8e4d1e9e62 100644
--- a/developer_tests/test_kde_dist/test_kde_dist.f90
+++ b/developer_tests/test_kde_dist/test_kde_dist.f90
@@ -9,4 +9,4 @@ program test_kde_mod
    call test_kde()
    call finalize_utilities()
 
-end program test_kde_mod
\ No newline at end of file
+end program test_kde_mod

From 3d3e09981943c2c8a013c1e08c83c9183104b5d7 Mon Sep 17 00:00:00 2001
From: Helen Kershaw <hkershaw@ucar.edu>
Date: Wed, 7 Aug 2024 07:57:07 -0600
Subject: [PATCH 08/14] style: remame test direcory

test_kde_dist -> kde_test. This is for convention and for    gitignore (program and directory not the same name)
---
 developer_tests/{test_kde_dist => kde_dist}/test_kde_dist.f90  | 0
 developer_tests/{test_kde_dist => kde_dist}/test_kde_dist.rst  | 0
 developer_tests/{test_kde_dist => kde_dist}/work/input.nml     | 0
 developer_tests/{test_kde_dist => kde_dist}/work/quickbuild.sh | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename developer_tests/{test_kde_dist => kde_dist}/test_kde_dist.f90 (100%)
 rename developer_tests/{test_kde_dist => kde_dist}/test_kde_dist.rst (100%)
 rename developer_tests/{test_kde_dist => kde_dist}/work/input.nml (100%)
 rename developer_tests/{test_kde_dist => kde_dist}/work/quickbuild.sh (100%)

diff --git a/developer_tests/test_kde_dist/test_kde_dist.f90 b/developer_tests/kde_dist/test_kde_dist.f90
similarity index 100%
rename from developer_tests/test_kde_dist/test_kde_dist.f90
rename to developer_tests/kde_dist/test_kde_dist.f90
diff --git a/developer_tests/test_kde_dist/test_kde_dist.rst b/developer_tests/kde_dist/test_kde_dist.rst
similarity index 100%
rename from developer_tests/test_kde_dist/test_kde_dist.rst
rename to developer_tests/kde_dist/test_kde_dist.rst
diff --git a/developer_tests/test_kde_dist/work/input.nml b/developer_tests/kde_dist/work/input.nml
similarity index 100%
rename from developer_tests/test_kde_dist/work/input.nml
rename to developer_tests/kde_dist/work/input.nml
diff --git a/developer_tests/test_kde_dist/work/quickbuild.sh b/developer_tests/kde_dist/work/quickbuild.sh
similarity index 100%
rename from developer_tests/test_kde_dist/work/quickbuild.sh
rename to developer_tests/kde_dist/work/quickbuild.sh

From 0e9af1fe02bdeff4a5f182945ebd609fd0fb70a4 Mon Sep 17 00:00:00 2001
From: Helen Kershaw <hkershaw@ucar.edu>
Date: Wed, 7 Aug 2024 08:17:13 -0600
Subject: [PATCH 09/14] fix: Added preprocess to quickbuild.sh so test_kde_dist
 compiles

Updated input.nml for preprocess. Use default term level for errors
Added test_kde_dist executable to .gitignore
---
 .gitignore                                  |  1 +
 developer_tests/kde_dist/work/input.nml     | 11 +++++++++--
 developer_tests/kde_dist/work/quickbuild.sh |  7 +++++--
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5d1f27c778..f2344a85b9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -197,6 +197,7 @@ test_quad_irreg_interp
 test_quad_reg_interp
 test_table_read
 test_ran_unif
+test_kde_dist
 
 # Directories to NOT IGNORE ... same as executable names
 # as far as I know, these must be listed after the executables
diff --git a/developer_tests/kde_dist/work/input.nml b/developer_tests/kde_dist/work/input.nml
index 0d3e6e2380..ab9ab5619b 100644
--- a/developer_tests/kde_dist/work/input.nml
+++ b/developer_tests/kde_dist/work/input.nml
@@ -1,5 +1,12 @@
 &utilities_nml
-   TERMLEVEL = 1,
    module_details = .false.
-   logfilename = 'dart_log.out'  
    /
+
+&preprocess_nml
+   input_obs_qty_mod_file  = '../../../assimilation_code/modules/observations/DEFAULT_obs_kind_mod.F90'
+   output_obs_qty_mod_file = '../../../assimilation_code/modules/observations/obs_kind_mod.f90'
+   input_obs_def_mod_file  = '../../../observations/forward_operators/DEFAULT_obs_def_mod.F90'
+   output_obs_def_mod_file = '../../../observations/forward_operators/obs_def_mod.f90'
+   obs_type_files          = '../../../observations/forward_operators/obs_def_gps_mod.f90'
+   quantity_files          = '../../../assimilation_code/modules/observations/default_quantities_mod.f90'
+   / 
diff --git a/developer_tests/kde_dist/work/quickbuild.sh b/developer_tests/kde_dist/work/quickbuild.sh
index 1a31852f1b..94912c0029 100755
--- a/developer_tests/kde_dist/work/quickbuild.sh
+++ b/developer_tests/kde_dist/work/quickbuild.sh
@@ -14,7 +14,7 @@ MODEL="none"
 EXTRA="$DART"/models/template/threed_model_mod.f90
 dev_test=1
 LOCATION="threed_sphere"
-TEST="test_kde_dist"
+TEST="kde_dist"
 
 serial_programs=(
 test_kde_dist
@@ -26,7 +26,10 @@ arguments "$@"
 # clean the directory
 \rm -f -- *.o *.mod Makefile .cppdefs
 
-# build 
+# build and run preprocess before making any other DART executables
+buildpreprocess
+
+# build DART
 buildit
 
 # clean up

From 4b9f50320bdd6204c152c031b895bfce697998a3 Mon Sep 17 00:00:00 2001
From: Helen Kershaw <hkershaw@ucar.edu>
Date: Wed, 7 Aug 2024 08:24:15 -0600
Subject: [PATCH 10/14] remove empty docs

tests print pass/fail https://github.com/NCAR/DART/pull/706#discussion_r1691508023
---
 developer_tests/kde_dist/test_kde_dist.rst | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 developer_tests/kde_dist/test_kde_dist.rst

diff --git a/developer_tests/kde_dist/test_kde_dist.rst b/developer_tests/kde_dist/test_kde_dist.rst
deleted file mode 100644
index 47401fe34a..0000000000
--- a/developer_tests/kde_dist/test_kde_dist.rst
+++ /dev/null
@@ -1 +0,0 @@
-documentation should be here

From a8b05f534fe9b7b6d737faf6a8dd70187561f6a9 Mon Sep 17 00:00:00 2001
From: Ian Grooms <ian.grooms@colorado.edu>
Date: Thu, 8 Aug 2024 15:13:11 -0600
Subject: [PATCH 11/14] Remove write

---
 assimilation_code/modules/assimilation/kde_distribution_mod.f90 | 2 --
 1 file changed, 2 deletions(-)

diff --git a/assimilation_code/modules/assimilation/kde_distribution_mod.f90 b/assimilation_code/modules/assimilation/kde_distribution_mod.f90
index 7afa415c7b..a007070a2e 100644
--- a/assimilation_code/modules/assimilation/kde_distribution_mod.f90
+++ b/assimilation_code/modules/assimilation/kde_distribution_mod.f90
@@ -866,12 +866,10 @@ subroutine obs_increment_kde(ens, ens_size, y, obs_param, bounded_below, &
          ! I draw one random (above) then add 1/Nb to each subsequent value
          u = unif + real(count_lower, r8) / real(ens_size, r8)
          count_lower = count_lower + 1
-         write(*,*) 'lower boundary probability ', u
        elseif (bounded_above .and.  (ens(i) .ge. upper_bound)) then
          ! As above, I draw one random then add 1/Nb to each subsequent value
          u = 1._r8 - (unif + real(count_upper, r8) / real(ens_size, r8))
          count_upper = count_upper + 1
-         write(*,*) 'upper boundary probability ', u
        elseif ((ens_size_interior .eq. 1) .or. (d_max .le. 0._r8)) then
          ! Can't use kde with only one ensemble member (or identical ensemble members), so assign a
          ! random probability

From dbb491cde527774b95aa2232a8d4202e2c53b8c9 Mon Sep 17 00:00:00 2001
From: Ian Grooms <ian.grooms@colorado.edu>
Date: Fri, 16 Aug 2024 15:53:15 -0600
Subject: [PATCH 12/14] Obs increment kde

Adds logic to handle the case where the observation is bounded,
and all the observations _in the interior_ (i.e. not on the
boundaries) are identical. This commit only fixes this case
for the obs increment subroutine; the probit transform will
be handled separately.
---
 .../assimilation/kde_distribution_mod.f90     | 25 +++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/assimilation_code/modules/assimilation/kde_distribution_mod.f90 b/assimilation_code/modules/assimilation/kde_distribution_mod.f90
index a007070a2e..3dee62db75 100644
--- a/assimilation_code/modules/assimilation/kde_distribution_mod.f90
+++ b/assimilation_code/modules/assimilation/kde_distribution_mod.f90
@@ -780,7 +780,7 @@ subroutine obs_increment_kde(ens, ens_size, y, obs_param, bounded_below, &
    type(distribution_params_type) :: params_interior_prior
    type(distribution_params_type) :: params_interior_posterior
    integer  :: ens_size_interior
-   integer  :: i, count_lower, count_upper
+   integer  :: i, count_lower, count_int, count_upper
    integer  :: obs_dist_type
 
    if (bounded_below .or. bounded_above) then
@@ -815,12 +815,14 @@ subroutine obs_increment_kde(ens, ens_size, y, obs_param, bounded_below, &
    if (ens_size_interior .gt. 1) then
       d(1:ens_size_interior) = abs( ens_interior(1:ens_size_interior) - ens_interior(1) )
       d_max = maxval(d(1:ens_size_interior))
-      call pack_kde_params(ens_size_interior, bounded_below, bounded_above, lower_bound, upper_bound, &
-                           ens_interior, y, obs_param, obs_dist_types%uninformative, &
-                           params_interior_prior)
-      call pack_kde_params(ens_size_interior, bounded_below, bounded_above, lower_bound, upper_bound, &
-                           ens_interior, y, obs_param, obs_dist_type, &
-                           params_interior_posterior)
+      if (d_max .gt. 0._r8) then
+         call pack_kde_params(ens_size_interior, bounded_below, bounded_above, lower_bound, upper_bound, &
+                              ens_interior, y, obs_param, obs_dist_types%uninformative, &
+                              params_interior_prior)
+         call pack_kde_params(ens_size_interior, bounded_below, bounded_above, lower_bound, upper_bound, &
+                              ens_interior, y, obs_param, obs_dist_type, &
+                              params_interior_posterior)
+      endif
    else
       d_max = 0._r8
    endif
@@ -856,6 +858,7 @@ subroutine obs_increment_kde(ens, ens_size, y, obs_param, bounded_below, &
    ! Update ensemble members -> get observation increments
 
    count_lower = 0
+   count_int   = 0
    count_upper = 0
    unif = random_uniform(inc_ran_seq) / real(ens_size, r8)
    do i=1,ens_size
@@ -872,8 +875,9 @@ subroutine obs_increment_kde(ens, ens_size, y, obs_param, bounded_below, &
          count_upper = count_upper + 1
        elseif ((ens_size_interior .eq. 1) .or. (d_max .le. 0._r8)) then
          ! Can't use kde with only one ensemble member (or identical ensemble members), so assign a
-         ! random probability
-         u = (p_int_prior * random_uniform(inc_ran_seq)) + p_lower_prior
+         ! random probability. As above, I draw one random then add 1/Nb to each subsequent value
+         u = unif + p_lower_prior + real(count_int, r8) / real(ens_size, r8)
+         count_int = count_int + 1
       else ! Use the interior cdf obtained using kde.
          u = kde_cdf_params(ens(i), params_interior_prior)
          u = (p_int_prior * u) + p_lower_prior
@@ -886,7 +890,8 @@ subroutine obs_increment_kde(ens, ens_size, y, obs_param, bounded_below, &
          obs_inc(i) = upper_bound - ens(i)
       elseif ((ens_size_interior .eq. 1) .or. (d_max .le. 0._r8)) then
          ! posterior value in the interior, but there's only one prior ensemble member/value
-         ! in the interior, so we just assign the posterior ensemble member equal to the prior one
+         ! in the interior (or prior ensemble members in the interior are identical), so we
+         ! just assign the posterior ensemble member equal to the prior one
          obs_inc(i) = ens_interior(1) - ens(i)
       else ! posterior value in the interior, can use kde
          ! Rescale u to be between 0 and 1 before inverting interior cdf

From b310b35419d2880510bc1f6854b26b7450edbc0a Mon Sep 17 00:00:00 2001
From: Ian Grooms <ian.grooms@colorado.edu>
Date: Fri, 23 Aug 2024 07:45:21 -0600
Subject: [PATCH 13/14] Update filter name

---
 .../assimilation/algorithm_info_mod.f90       |  8 ++--
 .../modules/assimilation/assim_tools_mod.f90  |  4 +-
 guide/qceff_probit.rst                        | 42 +++++++++----------
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/assimilation_code/modules/assimilation/algorithm_info_mod.f90 b/assimilation_code/modules/assimilation/algorithm_info_mod.f90
index ad095a205b..41887d4cad 100644
--- a/assimilation_code/modules/assimilation/algorithm_info_mod.f90
+++ b/assimilation_code/modules/assimilation/algorithm_info_mod.f90
@@ -41,12 +41,12 @@ module algorithm_info_mod
 integer, parameter :: UNBOUNDED_RHF      = 8
 integer, parameter :: GAMMA_FILTER       = 11
 integer, parameter :: BOUNDED_NORMAL_RHF = 101
-integer, parameter :: KERNEL_QCEF        = 102
+integer, parameter :: KDE_FILTER        = 102
 
 public :: obs_error_info, probit_dist_info, obs_inc_info, &
           init_algorithm_info_mod, end_algorithm_info_mod, &
           EAKF, ENKF, BOUNDED_NORMAL_RHF, UNBOUNDED_RHF, &
-          GAMMA_FILTER, KERNEL, OBS_PARTICLE, KERNEL_QCEF
+          GAMMA_FILTER, KERNEL, OBS_PARTICLE, KDE_FILTER
 
 ! type definitions for the QCF table
 type obs_error_info_type
@@ -297,8 +297,8 @@ subroutine read_qceff_table(qceff_table_filename)
          qceff_table_data(row)%obs_inc_info%filter_kind = GAMMA_FILTER
       case ('BOUNDED_NORMAL_RHF')
          qceff_table_data(row)%obs_inc_info%filter_kind = BOUNDED_NORMAL_RHF
-      case ('KERNEL_QCEF')
-         qceff_table_data(row)%obs_inc_info%filter_kind = KERNEL_QCEF
+      case ('KDE_FILTER')
+         qceff_table_data(row)%obs_inc_info%filter_kind = KDE_FILTER
       case default
          write(errstring, *) 'Invalid filter kind: ', trim(filter_kind_string(row))
          call error_handler(E_ERR, 'read_qceff_table:', errstring, source)
diff --git a/assimilation_code/modules/assimilation/assim_tools_mod.f90 b/assimilation_code/modules/assimilation/assim_tools_mod.f90
index 55a79b867c..5787c0bebd 100644
--- a/assimilation_code/modules/assimilation/assim_tools_mod.f90
+++ b/assimilation_code/modules/assimilation/assim_tools_mod.f90
@@ -78,7 +78,7 @@ module assim_tools_mod
 
 use algorithm_info_mod, only : probit_dist_info, obs_inc_info, EAKF, ENKF, &
                                BOUNDED_NORMAL_RHF, UNBOUNDED_RHF, GAMMA_FILTER, &
-                               KERNEL, OBS_PARTICLE, KERNEL_QCEF
+                               KERNEL, OBS_PARTICLE, KDE_FILTER
 
 use gamma_distribution_mod, only : gamma_cdf, inv_gamma_cdf, gamma_mn_var_to_shape_scale, &
                                    gamma_gamma_prod
@@ -1030,7 +1030,7 @@ subroutine obs_increment(ens_in, ens_size, obs, obs_var, obs_kind, obs_inc, &
 
    !--------------------------------------------------------------------------
 
-   else if(filter_kind == KERNEL_QCEF) then
+   else if(filter_kind == KDE_FILTER) then
       call obs_increment_kde(ens, ens_size, obs, obs_var, bounded_below, &
          bounded_above, lower_bound, upper_bound, obs_inc)
    else
diff --git a/guide/qceff_probit.rst b/guide/qceff_probit.rst
index e7bfa4ce72..c46cf47bd6 100644
--- a/guide/qceff_probit.rst
+++ b/guide/qceff_probit.rst
@@ -4,8 +4,8 @@ Quantile-Conserving Ensemble Filter Framework
 ==============================================
 
 The Quantile-Conserving Ensemble Filter Framework (QCEFF) tools are available in DART
-as of version v11. 
-The DART development team (dart@ucar.edu) would be happy to hear about your experiences 
+as of version v11.
+The DART development team (dart@ucar.edu) would be happy to hear about your experiences
 and is anxious to build scientific collaborations using these new capabilities.
 
 The QCEFF options are set using a :ref:`qceff table <qceff table>` given as a namelist option to &algorithm_info_nml.
@@ -21,7 +21,7 @@ The QCEFF options are set using a :ref:`qceff table <qceff table>` given as a na
 QCEFF options
 --------------
 
-QCEFF options are per quantity. For a given quantity, you specify the following 
+QCEFF options are per quantity. For a given quantity, you specify the following
 options as columns of the qceff_table:
 
 * Observation error information
@@ -29,19 +29,19 @@ options as columns of the qceff_table:
    Provides information about boundedness constraints that control the likelihood
    distribution associated with an observed variable when using perfect_model_obs
    to generate noisy observations.
-   
-     * bounded_below (default .false.) 
+
+     * bounded_below (default .false.)
      * bounded_above (default .false.)
-     * lower_bound   
+     * lower_bound
      * upper_bound
 
 
-* Probit distribution information 
+* Probit distribution information
 
    Used in the computation of the probit transform.
    The values needed are the bounds and the distribution type.
    These can be different for all three cases (inflation, state, and extended_state priors)
-   
+
      * distribution (one of :ref:`Distributions`)
      * bounded_below (default .false.)
      * bounded_above (default .false.)
@@ -65,17 +65,17 @@ Creating a qceff table
 -----------------------
 
 The table has two headers, row 1 and 2.
-The first row is the version number.  The second row describes the :ref:`QCEFF options` corresponding to each column of the table. 
-These two headers must be present in your qceff table. 
-The :ref:`qcf trunc table` below shows the table headers, 
-and an example quantity QTY_TRACER_CONCENTRATION for the first 5 columns of the table. 
+The first row is the version number.  The second row describes the :ref:`QCEFF options` corresponding to each column of the table.
+These two headers must be present in your qceff table.
+The :ref:`qcf trunc table` below shows the table headers,
+and an example quantity QTY_TRACER_CONCENTRATION for the first 5 columns of the table.
 There is a complete table with all 25 columns in `Google Sheets <https://docs.google.com/spreadsheets/d/1CRGHWc7boQt81pw2pDxEFY6WPyQeCh64OwPyoVMqijE/edit?usp=sharing>`_. You can copy and edit this table as needed.
 
 To add a quantity, add a row to the table.
-For any quantity not listed in the table, the :ref:`Default values` values will be used for all 25 options. 
+For any quantity not listed in the table, the :ref:`Default values` values will be used for all 25 options.
 You only have to add rows for quantities that use non-default values for any of the input options.
 Ensure that there are no empty rows in between the quantities listed in the spreadsheet.
-Save your spreadsheet as a .csv file. 
+Save your spreadsheet as a .csv file.
 
 To run filter or perfect_model_obs, put the .csv file in the directory where you are running.
 Edit input.nml to set the algorithm_info_nml option qceff_table_filename, for example:
@@ -89,14 +89,14 @@ Edit input.nml to set the algorithm_info_nml option qceff_table_filename, for ex
 
 .. _qcf trunc table:
 
-.. list-table:: truncated table 
+.. list-table:: truncated table
    :header-rows: 2
 
    * - QCF table version: 1
-     - 
-     -  
-     -  
-     - 
+     -
+     -
+     -
+     -
    * - QTY
      - bounded_below
      - bounded_above
@@ -119,7 +119,7 @@ Available filter kinds
    * UNBOUNDED_RHF
    * GAMMA_FILTER
    * BOUNDED_NORMAL_RHF
-   * KERNEL_QCEF
+   * KDE_FILTER
 
 .. _Distributions:
 
@@ -128,7 +128,7 @@ Available distributions
 
   * NORMAL_DISTRIBUTION (default)
   * BOUNDED_NORMAL_RH_DISTRIBUTION
-  * GAMMA_DISTRIBUTION 
+  * GAMMA_DISTRIBUTION
   * BETA_DISTRIBUTION
   * LOG_NORMAL_DISTRIBUTION
   * UNIFORM_DISTRIBUTION

From 4a6836972ae4c822ecea621b5e44c73d37f3adae Mon Sep 17 00:00:00 2001
From: Helen Kershaw <hkershaw@ucar.edu>
Date: Fri, 23 Aug 2024 13:07:03 -0400
Subject: [PATCH 14/14] bump version and changlog for release

---
 CHANGELOG.rst | 8 ++++++++
 conf.py       | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index db39b19100..b0ed5bc384 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -22,6 +22,14 @@ individual files.
 
 The changes are now listed with the most recent at the top.
 
+**August 26 2024 :: KQCEF. Tag 11.7.0**
+
+- Adds a Quantile-Conserving Ensemble Filter Based on Kernel-Density Estimation to DART.
+- New distribution module kde_distribution_mod.
+- New module rootfinding_mod that provides a different implementation of inv_cdf.
+
+*Contributed by Ian Grooms*
+
 **August 15 2024 :: WRF fwd operator bug fixes. Tag v11.6.1**
 
 WRF-DART bug-fixes:
diff --git a/conf.py b/conf.py
index fdfe8997c0..c0c136b518 100644
--- a/conf.py
+++ b/conf.py
@@ -21,7 +21,7 @@
 author = 'Data Assimilation Research Section'
 
 # The full version, including alpha/beta/rc tags
-release = '11.6.1'
+release = '11.7.0'
 root_doc = 'index'
 
 # -- General configuration ---------------------------------------------------