Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More Kriging improvements (log likelihood, noise variance, more tests) #379

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 71 additions & 34 deletions src/Kriging.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
using Zygote

#=
One-dimensional Kriging method, following these papers:
Kriging (Gaussian process interpolation/regression) method, following these papers:
"Efficient Global Optimization of Expensive Black Box Functions" and
"A Taxonomy of Global Optimization Methods Based on Response Surfaces"
both by DONALD R. JONES
=#
mutable struct Kriging{X, Y, L, U, P, T, M, B, S, R} <: AbstractSurrogate
mutable struct Kriging{X, Y, L, U, P, T, M, B, S, R, N} <: AbstractSurrogate
x::X
y::Y
lb::L
Expand All @@ -15,6 +17,7 @@ mutable struct Kriging{X, Y, L, U, P, T, M, B, S, R} <: AbstractSurrogate
b::B
sigma::S
inverse_of_R::R
noise_variance::N
end

"""
Expand Down Expand Up @@ -101,29 +104,36 @@ Constructor for type Kriging.
smoothness of the function being approximated, 0-> rough 2-> C^infinity
- theta: value > 0 modeling how much the function is changing in the i-th variable.
"""
function Kriging(x, y, lb::Number, ub::Number; p = 2.0,
theta = 0.5 / max(1e-6 * abs(ub - lb), std(x))^p)
if length(x) != length(unique(x))
println("There exists a repetion in the samples, cannot build Kriging.")
return
end
function Kriging(x, y, lb::Number, ub::Number; p = 1.99,
theta = 0.5 / max(1e-6 * abs(ub - lb), std(x))^p,
noise_variance = 0.0)

# Need autodiff to ignore these checks. When optimizing hyperparameters, these won't
# matter as the optiization will be constrained to satisfy these by default.
Zygote.ignore() do
if length(x) != length(unique(x))
println("There exists a repetion in the samples, cannot build Kriging.")
return
end

if p > 2.0 || p < 0.0
throw(ArgumentError("Hyperparameter p must be between 0 and 2! Got: $p."))
end
if p > 2.0 || p < 0.0
throw(ArgumentError("Hyperparameter p must be between 0 and 2! Got: $p."))
end

if theta ≤ 0
throw(ArgumentError("Hyperparameter theta must be positive! Got: $theta"))
if theta ≤ 0
throw(ArgumentError("Hyperparameter theta must be positive! Got: $theta"))
end
end

mu, b, sigma, inverse_of_R = _calc_kriging_coeffs(x, y, p, theta)
Kriging(x, y, lb, ub, p, theta, mu, b, sigma, inverse_of_R)
mu, b, sigma, inverse_of_R = _calc_kriging_coeffs(x, y, p, theta, noise_variance)
Kriging(x, y, lb, ub, p, theta, mu, b, sigma, inverse_of_R, noise_variance)
end

function _calc_kriging_coeffs(x, y, p::Number, theta::Number)
function _calc_kriging_coeffs(x, y, p::Number, theta::Number, noise_variance)
n = length(x)

R = [exp(-theta * abs(x[i] - x[j])^p) for i in 1:n, j in 1:n]
R = [exp(-theta * abs(x[i] - x[j])^p) + (i == j) * noise_variance
for i in 1:n, j in 1:n]

# Estimate nugget based on maximum allowed condition number
# This regularizes R to allow for points being close to each other without R becoming
Expand Down Expand Up @@ -167,29 +177,35 @@ Constructor for Kriging surrogate.
- theta: array of values > 0 modeling how much the function is
changing in the i-th variable.
"""
function Kriging(x, y, lb, ub; p = 2.0 .* collect(one.(x[1])),
function Kriging(x, y, lb, ub; p = 1.99 .* collect(one.(x[1])),
theta = [0.5 / max(1e-6 * norm(ub .- lb), std(x_i[i] for x_i in x))^p[i]
for i in 1:length(x[1])])
if length(x) != length(unique(x))
println("There exists a repetition in the samples, cannot build Kriging.")
return
end

for i in 1:length(x[1])
if p[i] > 2.0 || p[i] < 0.0
throw(ArgumentError("All p must be between 0 and 2! Got: $p."))
for i in 1:length(x[1])],
noise_variance = 0.0)

# Need autodiff to ignore these checks. When optimizing hyperparameters, these won't
# matter as the optiization will be constrained to satisfy these by default.
Zygote.ignore() do
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use ChainRulesCore.@ignore_derivatives

if length(x) != length(unique(x))
println("There exists a repetition in the samples, cannot build Kriging.")
return
end

if theta[i] ≤ 0.0
throw(ArgumentError("All theta must be positive! Got: $theta."))
for i in 1:length(x[1])
if p[i] > 2.0 || p[i] < 0.0
throw(ArgumentError("All p must be between 0 and 2! Got: $p."))
end

if theta[i] ≤ 0.0
throw(ArgumentError("All theta must be positive! Got: $theta."))
end
end
end

mu, b, sigma, inverse_of_R = _calc_kriging_coeffs(x, y, p, theta)
Kriging(x, y, lb, ub, p, theta, mu, b, sigma, inverse_of_R)
mu, b, sigma, inverse_of_R = _calc_kriging_coeffs(x, y, p, theta, noise_variance)
Kriging(x, y, lb, ub, p, theta, mu, b, sigma, inverse_of_R, noise_variance)
end

function _calc_kriging_coeffs(x, y, p, theta)
function _calc_kriging_coeffs(x, y, p, theta, noise_variance)
n = length(x)
d = length(x[1])

Expand All @@ -198,7 +214,7 @@ function _calc_kriging_coeffs(x, y, p, theta)
for l in 1:d
sum = sum + theta[l] * norm(x[i][l] - x[j][l])^p[l]
end
exp(-sum)
exp(-sum) + (i == j) * noise_variance
end
for j in 1:n, i in 1:n]

Expand Down Expand Up @@ -258,6 +274,27 @@ function add_point!(k::Kriging, new_x, new_y)
append!(k.x, new_x)
append!(k.y, new_y)
end
k.mu, k.b, k.sigma, k.inverse_of_R = _calc_kriging_coeffs(k.x, k.y, k.p, k.theta)
k.mu, k.b, k.sigma, k.inverse_of_R = _calc_kriging_coeffs(k.x, k.y, k.p, k.theta,
k.noise_variance)

nothing
end

"""
kriging_log_likelihood(x, y, p, theta, noise_variance = 0.0)
Compute the likelihood of the parameters p, theta and noise variance given the data x and y
for a kriging model. Useful as an objective function in hyperparameter optimization.
"""
function kriging_log_likelihood(x, y, p, theta, noise_variance = 0.0)
mu, b, sigma, inverse_of_R = _calc_kriging_coeffs(x, y, p, theta, noise_variance)

n = length(y)
y_minus_1μ = y - ones(length(y), 1) * mu
Rinv = inverse_of_R

term1 = only(-(y_minus_1μ' * inverse_of_R * y_minus_1μ) / 2 / sigma)
term2 = -log((2π * sigma)^(n / 2) / sqrt(det(Rinv)))

logpdf = term1 + term2
return logpdf
end
1 change: 1 addition & 0 deletions src/LinearSurrogate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ function LinearSurrogate(x, y, lb, ub)
#T = transpose(reshape(reinterpret(eltype(x[1]), x), length(x[1]), length(x)))
X = Array{eltype(x[1]), 2}(undef, length(x), length(x[1]))
X = copy(T)

ols = lm(X, y)
return LinearSurrogate(x, y, coef(ols), lb, ub)
end
2 changes: 1 addition & 1 deletion src/Radials.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ multiquadricRadial(c = 1.0) = RadialFunction(1, z -> sqrt((c * norm(z))^2 + 1))

thinplateRadial() = RadialFunction(2, z -> begin
result = norm(z)^2 * log(norm(z))
ifelse(iszero(z), zero(result), result)
ifelse(all(==(0), z), zero(result), result)
end)

"""
Expand Down
5 changes: 3 additions & 2 deletions src/Sampling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ using QuasiMonteCarlo: SamplingAlgorithm
# of vectors of Tuples
function sample(args...; kwargs...)
s = QuasiMonteCarlo.sample(args...; kwargs...)
if s isa Vector
# 1D case: s is a Vector
if size(s, 1) == 1
return s[1, :]
elseif size(s, 2) == 1
return s
else
# ND case: s is a d x n matrix, where d is the dimension and n is the number of samples
Expand Down
9 changes: 9 additions & 0 deletions test/AD_compatibility.jl
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,15 @@ end

# #Kriging
@testset "Kriging 1D" begin
# Test gradient of surrogate evaluation
my_p = 1.5
my_krig = Kriging(x, y, lb, ub, p = my_p)
g = x -> my_krig'(x)
g(5.0)

# test gradient with respect to hyperparameters
obj = ((p, θ),) -> Surrogates.kriging_log_likelihood(x, y, p, θ)
obj'((1.99, 1.0))
end

# #Linear Surrogate
Expand Down Expand Up @@ -202,6 +207,10 @@ end
my_krig = Kriging(x, y, lb, ub, p = my_p, theta = my_theta)
g = x -> Zygote.gradient(my_krig, x)
g((2.0, 5.0))

# test gradient with respect to hyperparameters
obj = params -> Surrogates.kriging_log_likelihood(x, y, params[1:2], params[3:4])
obj'([1.9, 1.9, 2.0, 2.0])
end

# Linear Surrogate
Expand Down
21 changes: 19 additions & 2 deletions test/Kriging.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ using Surrogates
using Test
using Statistics

include("test_utils.jl")

#1D
lb = 0.0
ub = 10.0
Expand All @@ -19,6 +21,9 @@ my_p = 1.9
my_k = Kriging(x, y, lb, ub, p = my_p)
@test my_k.theta ≈ 0.5 * std(x)^(-my_p)

# Check to make sure interpolation condition is satisfied
@test _check_interpolation(my_k)

# Check input dimension validation for 1D Kriging surrogates
@test_throws ArgumentError my_k(rand(3))
@test_throws ArgumentError my_k(Float64[])
Expand Down Expand Up @@ -65,7 +70,7 @@ std_err = std_error_at_point(my_k, 4.0)
kwar_krig = Kriging(x, y, lb, ub);

# Check hyperparameter initialization for 1D Kriging surrogates
p_expected = 2.0
p_expected = 1.99
@test kwar_krig.p == p_expected
@test kwar_krig.theta == 0.5 / std(x)^p_expected

Expand Down Expand Up @@ -130,6 +135,18 @@ kwarg_krig_ND = Kriging(x, y, lb, ub)

# Test hyperparameter initialization
d = length(x[3])
p_expected = 2.0
p_expected = 1.99
@test all(==(p_expected), kwarg_krig_ND.p)
@test all(kwarg_krig_ND.theta .≈ [0.5 / std(x_i[ℓ] for x_i in x)^p_expected for ℓ in 1:3])

num_replicates = 100

for i in 1:num_replicates
# Check that interpolation condition is satisfied when noise variance is zero
surr = _random_surrogate(Kriging)
@test _check_interpolation(surr)

# Check that we do not satisfy interpolation condition when noise variance isn't zero
surr = _random_surrogate(Kriging, noise_variance = 0.2)
@test !_check_interpolation(surr)
end
12 changes: 12 additions & 0 deletions test/Radials.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ using Test
using LinearAlgebra
using Surrogates

include("test_utils.jl")

#1D
lb = 0.0
ub = 4.0
Expand Down Expand Up @@ -174,3 +176,13 @@ y = mockvalues.(x)
rbf = RadialBasis(x, y, lb, ub, rad = multiquadricRadial(1.788))
test = (lb .+ ub) ./ 2
@test isapprox(rbf(test), mockvalues(test), atol = 0.001)

num_replicates = 100

for radial_type in [linearRadial(), cubicRadial(), multiquadricRadial(), thinplateRadial()]
for i in 1:num_replicates
# Check that interpolation condition is satisfied
surr = _random_surrogate(RadialBasis, rad = radial_type)
@test _check_interpolation(surr)
end
end
3 changes: 3 additions & 0 deletions test/Wendland.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
using Surrogates
using LinearAlgebra

include("test_utils.jl")

#1D
x = [1.0, 2.0, 3.0]
Expand Down
10 changes: 10 additions & 0 deletions test/inverseDistanceSurrogate.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
using Surrogates
using Test
using LinearAlgebra

include("test_utils.jl")

#1D
obj = x -> sin(x) + sin(x)^2 + sin(x)^3
Expand Down Expand Up @@ -62,3 +65,10 @@ y_new = f(x_new)
add_point!(surrogate, x_new, y_new)
@test surrogate(x_new) ≈ y_new
surrogate((0.0, 0.0))

num_replicates = 100
for i in 1:num_replicates
# Check that interpolation condition is satisfied
surr = _random_surrogate(InverseDistanceSurrogate)
@test _check_interpolation(surr)
end
13 changes: 13 additions & 0 deletions test/linearSurrogate.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
using Surrogates

include("test_utils.jl")

#1D
x = [1.0, 2.0, 3.0]
y = [1.5, 3.5, 5.3]
Expand Down Expand Up @@ -28,3 +30,14 @@ val = my_linear_ND((10.0, 11.0))
@test_throws ArgumentError my_linear_ND(Float64[])
@test_throws ArgumentError my_linear_ND(1.0)
@test_throws ArgumentError my_linear_ND((2.0, 3.0, 4.0))

num_replicates = 100
for i in 1:num_replicates
# LinearSurrogate should not interpolate the data unless the data/func is linear
surr = _random_surrogate(LinearSurrogate)
@test !_check_interpolation(surr)

linear_func = x -> sum(x)
surr = _random_surrogate(LinearSurrogate, linear_func)
@test _check_interpolation(surr)
end
9 changes: 9 additions & 0 deletions test/lobachevsky.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ using Test
using QuadGK
using Cubature

include("test_utils.jl")

#1D
obj = x -> 3 * x + log(x)
a = 1.0
Expand Down Expand Up @@ -85,3 +87,10 @@ n = 8
x = sample(100, lb, ub, SobolSample())
y = obj.(x)
my_loba_ND = LobachevskySurrogate(x, y, lb, ub, alpha = [2.4, 2.4], n = 8, sparse = true)

num_replicates = 100
for i in 1:num_replicates
# Check that interpolation condition is satisfied
surr = _random_surrogate(LobachevskySurrogate)
@test _check_interpolation(surr)
end
7 changes: 7 additions & 0 deletions test/sampling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ s = Surrogates.sample(n, lb, ub, SectionSample([constrained_val], UniformSample(
@test s isa Vector{Float64} && length(s) == n && all(x -> lb ≤ x ≤ ub, s)
@test all(==(constrained_val), s)

# ND but 1D

lb = [0.0]
ub = [5.0]
s = Surrogates.sample(n, lb, ub, SobolSample())
@test s isa Vector{Float64} && length(s) == n && all(x -> lb[1] ≤ x ≤ ub[1], s)

# ND
# Now that we use QuasiMonteCarlo.jl, these tests are to make sure that we transform the output
# from a Matrix to a Vector of Tuples properly for ND problems.
Expand Down
Loading