SciML · archermarx · Jul 14, 2022 · Jul 14, 2022 · Jul 14, 2022 · ChrisRackauckas
diff --git a/src/Kriging.jl b/src/Kriging.jl
@@ -1,10 +1,12 @@
+using Zygote
+
 #=
-One-dimensional Kriging method, following these papers:
+Kriging (Gaussian process interpolation/regression) method, following these papers:
 "Efficient Global Optimization of Expensive Black Box Functions" and
 "A Taxonomy of Global Optimization Methods Based on Response Surfaces"
 both by DONALD R. JONES
 =#
-mutable struct Kriging{X, Y, L, U, P, T, M, B, S, R} <: AbstractSurrogate
+mutable struct Kriging{X, Y, L, U, P, T, M, B, S, R, N} <: AbstractSurrogate
     x::X
     y::Y
     lb::L
@@ -15,6 +17,7 @@ mutable struct Kriging{X, Y, L, U, P, T, M, B, S, R} <: AbstractSurrogate
     b::B
     sigma::S
     inverse_of_R::R
+    noise_variance::N
 end
 
 """
@@ -101,29 +104,36 @@ Constructor for type Kriging.
    smoothness of the function being approximated, 0-> rough  2-> C^infinity
 - theta: value > 0 modeling how much the function is changing in the i-th variable.
 """
-function Kriging(x, y, lb::Number, ub::Number; p = 2.0,
-                 theta = 0.5 / max(1e-6 * abs(ub - lb), std(x))^p)
-    if length(x) != length(unique(x))
-        println("There exists a repetion in the samples, cannot build Kriging.")
-        return
-    end
+function Kriging(x, y, lb::Number, ub::Number; p = 1.99,
+                 theta = 0.5 / max(1e-6 * abs(ub - lb), std(x))^p,
+                 noise_variance = 0.0)
+
+    # Need autodiff to ignore these checks. When optimizing hyperparameters, these won't
+    # matter as the optiization will be constrained to satisfy these by default.
+    Zygote.ignore() do
+        if length(x) != length(unique(x))
+            println("There exists a repetion in the samples, cannot build Kriging.")
+            return
+        end
 
-    if p > 2.0 || p < 0.0
-        throw(ArgumentError("Hyperparameter p must be between 0 and 2! Got: $p."))
-    end
+        if p > 2.0 || p < 0.0
+            throw(ArgumentError("Hyperparameter p must be between 0 and 2! Got: $p."))
+        end
 
-    if theta ≤ 0
-        throw(ArgumentError("Hyperparameter theta must be positive! Got: $theta"))
+        if theta ≤ 0
+            throw(ArgumentError("Hyperparameter theta must be positive! Got: $theta"))
+        end
     end
 
-    mu, b, sigma, inverse_of_R = _calc_kriging_coeffs(x, y, p, theta)
-    Kriging(x, y, lb, ub, p, theta, mu, b, sigma, inverse_of_R)
+    mu, b, sigma, inverse_of_R = _calc_kriging_coeffs(x, y, p, theta, noise_variance)
+    Kriging(x, y, lb, ub, p, theta, mu, b, sigma, inverse_of_R, noise_variance)
 end
 
-function _calc_kriging_coeffs(x, y, p::Number, theta::Number)
+function _calc_kriging_coeffs(x, y, p::Number, theta::Number, noise_variance)
     n = length(x)
 
-    R = [exp(-theta * abs(x[i] - x[j])^p) for i in 1:n, j in 1:n]
+    R = [exp(-theta * abs(x[i] - x[j])^p) + (i == j) * noise_variance
+         for i in 1:n, j in 1:n]
 
     # Estimate nugget based on maximum allowed condition number
     # This regularizes R to allow for points being close to each other without R becoming
@@ -167,29 +177,35 @@ Constructor for Kriging surrogate.
 - theta: array of values > 0 modeling how much the function is
           changing in the i-th variable.
 """
-function Kriging(x, y, lb, ub; p = 2.0 .* collect(one.(x[1])),
+function Kriging(x, y, lb, ub; p = 1.99 .* collect(one.(x[1])),
                  theta = [0.5 / max(1e-6 * norm(ub .- lb), std(x_i[i] for x_i in x))^p[i]
-                          for i in 1:length(x[1])])
-    if length(x) != length(unique(x))
-        println("There exists a repetition in the samples, cannot build Kriging.")
-        return
-    end
-
-    for i in 1:length(x[1])
-        if p[i] > 2.0 || p[i] < 0.0
-            throw(ArgumentError("All p must be between 0 and 2! Got: $p."))
+                          for i in 1:length(x[1])],
+                 noise_variance = 0.0)
+
+    # Need autodiff to ignore these checks. When optimizing hyperparameters, these won't
+    # matter as the optiization will be constrained to satisfy these by default.
+    Zygote.ignore() do
+        if length(x) != length(unique(x))
+            println("There exists a repetition in the samples, cannot build Kriging.")
+            return
         end
 
-        if theta[i] ≤ 0.0
-            throw(ArgumentError("All theta must be positive! Got: $theta."))
+        for i in 1:length(x[1])
+            if p[i] > 2.0 || p[i] < 0.0
+                throw(ArgumentError("All p must be between 0 and 2! Got: $p."))
+            end
+
+            if theta[i] ≤ 0.0
+                throw(ArgumentError("All theta must be positive! Got: $theta."))
+            end
         end
     end
 
-    mu, b, sigma, inverse_of_R = _calc_kriging_coeffs(x, y, p, theta)
-    Kriging(x, y, lb, ub, p, theta, mu, b, sigma, inverse_of_R)
+    mu, b, sigma, inverse_of_R = _calc_kriging_coeffs(x, y, p, theta, noise_variance)
+    Kriging(x, y, lb, ub, p, theta, mu, b, sigma, inverse_of_R, noise_variance)
 end
 
-function _calc_kriging_coeffs(x, y, p, theta)
+function _calc_kriging_coeffs(x, y, p, theta, noise_variance)
     n = length(x)
     d = length(x[1])
 
@@ -198,7 +214,7 @@ function _calc_kriging_coeffs(x, y, p, theta)
              for l in 1:d
                  sum = sum + theta[l] * norm(x[i][l] - x[j][l])^p[l]
              end
-             exp(-sum)
+             exp(-sum) + (i == j) * noise_variance
          end
          for j in 1:n, i in 1:n]
 
@@ -258,6 +274,27 @@ function add_point!(k::Kriging, new_x, new_y)
         append!(k.x, new_x)
         append!(k.y, new_y)
     end
-    k.mu, k.b, k.sigma, k.inverse_of_R = _calc_kriging_coeffs(k.x, k.y, k.p, k.theta)
+    k.mu, k.b, k.sigma, k.inverse_of_R = _calc_kriging_coeffs(k.x, k.y, k.p, k.theta,
+                                                              k.noise_variance)
+
     nothing
 end
+
+"""
+    kriging_log_likelihood(x, y, p, theta, noise_variance = 0.0)
+Compute the likelihood of the parameters p, theta and noise variance given the data x and y
+for a kriging model. Useful as an objective function in hyperparameter optimization.
+"""
+function kriging_log_likelihood(x, y, p, theta, noise_variance = 0.0)
+    mu, b, sigma, inverse_of_R = _calc_kriging_coeffs(x, y, p, theta, noise_variance)
+
+    n = length(y)
+    y_minus_1μ = y - ones(length(y), 1) * mu
+    Rinv = inverse_of_R
+
+    term1 = only(-(y_minus_1μ' * inverse_of_R * y_minus_1μ) / 2 / sigma)
+    term2 = -log((2π * sigma)^(n / 2) / sqrt(det(Rinv)))
+
+    logpdf = term1 + term2
+    return logpdf
+end
diff --git a/src/LinearSurrogate.jl b/src/LinearSurrogate.jl
@@ -74,6 +74,7 @@ function LinearSurrogate(x, y, lb, ub)
     #T = transpose(reshape(reinterpret(eltype(x[1]), x), length(x[1]), length(x)))
     X = Array{eltype(x[1]), 2}(undef, length(x), length(x[1]))
     X = copy(T)
+
     ols = lm(X, y)
     return LinearSurrogate(x, y, coef(ols), lb, ub)
 end
diff --git a/src/Radials.jl b/src/Radials.jl
@@ -26,7 +26,7 @@ multiquadricRadial(c = 1.0) = RadialFunction(1, z -> sqrt((c * norm(z))^2 + 1))
 
 thinplateRadial() = RadialFunction(2, z -> begin
                                        result = norm(z)^2 * log(norm(z))
-                                       ifelse(iszero(z), zero(result), result)
+                                       ifelse(all(==(0), z), zero(result), result)
                                    end)
 
 """

diff --git a/src/Sampling.jl b/src/Sampling.jl
@@ -6,8 +6,9 @@ using QuasiMonteCarlo: SamplingAlgorithm
 # of vectors of Tuples
 function sample(args...; kwargs...)
     s = QuasiMonteCarlo.sample(args...; kwargs...)
-    if s isa Vector
-        # 1D case: s is a Vector
+    if size(s, 1) == 1
+        return s[1, :]
+    elseif size(s, 2) == 1
         return s
     else
         # ND case: s is a d x n matrix, where d is the dimension and n is the number of samples

diff --git a/test/AD_compatibility.jl b/test/AD_compatibility.jl
@@ -114,10 +114,15 @@ end
 
 # #Kriging
 @testset "Kriging 1D" begin
+    # Test gradient of surrogate evaluation
     my_p = 1.5
     my_krig = Kriging(x, y, lb, ub, p = my_p)
     g = x -> my_krig'(x)
     g(5.0)
+
+    # test gradient with respect to hyperparameters
+    obj = ((p, θ),) -> Surrogates.kriging_log_likelihood(x, y, p, θ)
+    obj'((1.99, 1.0))
 end
 
 # #Linear Surrogate
@@ -202,6 +207,10 @@ end
     my_krig = Kriging(x, y, lb, ub, p = my_p, theta = my_theta)
     g = x -> Zygote.gradient(my_krig, x)
     g((2.0, 5.0))
+
+    # test gradient with respect to hyperparameters
+    obj = params -> Surrogates.kriging_log_likelihood(x, y, params[1:2], params[3:4])
+    obj'([1.9, 1.9, 2.0, 2.0])
 end
 
 # Linear Surrogate

diff --git a/test/Kriging.jl b/test/Kriging.jl
@@ -3,6 +3,8 @@ using Surrogates
 using Test
 using Statistics
 
+include("test_utils.jl")
+
 #1D
 lb = 0.0
 ub = 10.0
@@ -19,6 +21,9 @@ my_p = 1.9
 my_k = Kriging(x, y, lb, ub, p = my_p)
 @test my_k.theta ≈ 0.5 * std(x)^(-my_p)
 
+# Check to make sure interpolation condition is satisfied
+@test _check_interpolation(my_k)
+
 # Check input dimension validation for 1D Kriging surrogates
 @test_throws ArgumentError my_k(rand(3))
 @test_throws ArgumentError my_k(Float64[])
@@ -65,7 +70,7 @@ std_err = std_error_at_point(my_k, 4.0)
 kwar_krig = Kriging(x, y, lb, ub);
 
 # Check hyperparameter initialization for 1D Kriging surrogates
-p_expected = 2.0
+p_expected = 1.99
 @test kwar_krig.p == p_expected
 @test kwar_krig.theta == 0.5 / std(x)^p_expected
 
@@ -130,6 +135,18 @@ kwarg_krig_ND = Kriging(x, y, lb, ub)
 
 # Test hyperparameter initialization
 d = length(x[3])
-p_expected = 2.0
+p_expected = 1.99
 @test all(==(p_expected), kwarg_krig_ND.p)
 @test all(kwarg_krig_ND.theta .≈ [0.5 / std(x_i[ℓ] for x_i in x)^p_expected for ℓ in 1:3])
+
+num_replicates = 100
+
+for i in 1:num_replicates
+    # Check that interpolation condition is satisfied when noise variance is zero
+    surr = _random_surrogate(Kriging)
+    @test _check_interpolation(surr)
+
+    # Check that we do not satisfy interpolation condition when noise variance isn't zero
+    surr = _random_surrogate(Kriging, noise_variance = 0.2)
+    @test !_check_interpolation(surr)
+end
diff --git a/test/Radials.jl b/test/Radials.jl
@@ -3,6 +3,8 @@ using Test
 using LinearAlgebra
 using Surrogates
 
+include("test_utils.jl")
+
 #1D
 lb = 0.0
 ub = 4.0
@@ -174,3 +176,13 @@ y = mockvalues.(x)
 rbf = RadialBasis(x, y, lb, ub, rad = multiquadricRadial(1.788))
 test = (lb .+ ub) ./ 2
 @test isapprox(rbf(test), mockvalues(test), atol = 0.001)
+
+num_replicates = 100
+
+for radial_type in [linearRadial(), cubicRadial(), multiquadricRadial(), thinplateRadial()]
+    for i in 1:num_replicates
+        # Check that interpolation condition is satisfied
+        surr = _random_surrogate(RadialBasis, rad = radial_type)
+        @test _check_interpolation(surr)
+    end
+end
diff --git a/test/Wendland.jl b/test/Wendland.jl
@@ -1,4 +1,7 @@
 using Surrogates
+using LinearAlgebra
+
+include("test_utils.jl")
 
 #1D
 x = [1.0, 2.0, 3.0]

diff --git a/test/inverseDistanceSurrogate.jl b/test/inverseDistanceSurrogate.jl
@@ -1,5 +1,8 @@
 using Surrogates
 using Test
+using LinearAlgebra
+
+include("test_utils.jl")
 
 #1D
 obj = x -> sin(x) + sin(x)^2 + sin(x)^3
@@ -62,3 +65,10 @@ y_new = f(x_new)
 add_point!(surrogate, x_new, y_new)
 @test surrogate(x_new) ≈ y_new
 surrogate((0.0, 0.0))
+
+num_replicates = 100
+for i in 1:num_replicates
+    # Check that interpolation condition is satisfied
+    surr = _random_surrogate(InverseDistanceSurrogate)
+    @test _check_interpolation(surr)
+end
diff --git a/test/linearSurrogate.jl b/test/linearSurrogate.jl
@@ -1,5 +1,7 @@
 using Surrogates
 
+include("test_utils.jl")
+
 #1D
 x = [1.0, 2.0, 3.0]
 y = [1.5, 3.5, 5.3]
@@ -28,3 +30,14 @@ val = my_linear_ND((10.0, 11.0))
 @test_throws ArgumentError my_linear_ND(Float64[])
 @test_throws ArgumentError my_linear_ND(1.0)
 @test_throws ArgumentError my_linear_ND((2.0, 3.0, 4.0))
+
+num_replicates = 100
+for i in 1:num_replicates
+    # LinearSurrogate should not interpolate the data unless the data/func is linear
+    surr = _random_surrogate(LinearSurrogate)
+    @test !_check_interpolation(surr)
+
+    linear_func = x -> sum(x)
+    surr = _random_surrogate(LinearSurrogate, linear_func)
+    @test _check_interpolation(surr)
+end
diff --git a/test/lobachevsky.jl b/test/lobachevsky.jl
@@ -4,6 +4,8 @@ using Test
 using QuadGK
 using Cubature
 
+include("test_utils.jl")
+
 #1D
 obj = x -> 3 * x + log(x)
 a = 1.0
@@ -85,3 +87,10 @@ n = 8
 x = sample(100, lb, ub, SobolSample())
 y = obj.(x)
 my_loba_ND = LobachevskySurrogate(x, y, lb, ub, alpha = [2.4, 2.4], n = 8, sparse = true)
+
+num_replicates = 100
+for i in 1:num_replicates
+    # Check that interpolation condition is satisfied
+    surr = _random_surrogate(LobachevskySurrogate)
+    @test _check_interpolation(surr)
+end
diff --git a/test/sampling.jl b/test/sampling.jl
@@ -63,6 +63,13 @@ s = Surrogates.sample(n, lb, ub, SectionSample([constrained_val], UniformSample(
 @test s isa Vector{Float64} && length(s) == n && all(x -> lb ≤ x ≤ ub, s)
 @test all(==(constrained_val), s)
 
+# ND but 1D
+
+lb = [0.0]
+ub = [5.0]
+s = Surrogates.sample(n, lb, ub, SobolSample())
+@test s isa Vector{Float64} && length(s) == n && all(x -> lb[1] ≤ x ≤ ub[1], s)
+
 # ND
 # Now that we use QuasiMonteCarlo.jl, these tests are to make sure that we transform the output
 # from a Matrix to a Vector of Tuples properly for ND problems.