diff --git a/Project.toml b/Project.toml index 79a481846..d87569e97 100644 --- a/Project.toml +++ b/Project.toml @@ -11,6 +11,7 @@ IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] diff --git a/docs/pages.jl b/docs/pages.jl index d4ce6dd70..b53c2d048 100644 --- a/docs/pages.jl +++ b/docs/pages.jl @@ -14,6 +14,7 @@ pages = ["index.md" "Polynomial Chaos" => "polychaos.md", "Variable Fidelity" => "variablefidelity.md", "Gradient Enhanced Kriging" => "gek.md", + "GEKPLS" => "gekpls.md", ] "User guide" => [ "Samples" => "samples.md", diff --git a/docs/src/gekpls.md b/docs/src/gekpls.md new file mode 100644 index 000000000..7a5095d98 --- /dev/null +++ b/docs/src/gekpls.md @@ -0,0 +1,84 @@ +## GEKPLS Surrogate Tutorial + +Gradient Enhanced Kriging with Partial Least Squares Method (GEKPLS) is a surrogate modelling technique that brings down computation time and returns improved accuracy for high-dimensional problems. The Julia implementation of GEKPLS is adapted from the Python version by [SMT](https://github.com/SMTorg) which is based on this [paper](https://arxiv.org/pdf/1708.02663.pdf). + +The following are the inputs when building a GEKPLS surrogate: + +1. X - The matrix containing the training points +2. y - The vector containing the training outputs associated with each of the training points +3. grads - The gradients at each of the input X training points +4. n_comp - Number of components to retain for the partial least squares regression (PLS) +5. delta_x - The step size to use for the first order Taylor approximation +6. xlimits - The lower and upper bounds for the training points +7. extra_points - The number of additional points to use for the PLS +8. theta - The hyperparameter to use for the correlation model + +The following example illustrates how to use GEKPLS: + +```@example gekpls_water_flow + +using Surrogates +using Zygote + +function vector_of_tuples_to_matrix(v) + #helper function to convert training data generated by surrogate sampling into a matrix suitable for GEKPLS + num_rows = length(v) + num_cols = length(first(v)) + K = zeros(num_rows, num_cols) + for row in 1:num_rows + for col in 1:num_cols + K[row, col]=v[row][col] + end + end + return K +end + +function vector_of_tuples_to_matrix2(v) + #helper function to convert gradients into matrix form + num_rows = length(v) + num_cols = length(first(first(v))) + K = zeros(num_rows, num_cols) + for row in 1:num_rows + for col in 1:num_cols + K[row, col] = v[row][1][col] + end + end + return K +end + +function water_flow(x) + r_w = x[1] + r = x[2] + T_u = x[3] + H_u = x[4] + T_l = x[5] + H_l = x[6] + L = x[7] + K_w = x[8] + log_val = log(r/r_w) + return (2*pi*T_u*(H_u - H_l))/ ( log_val*(1 + (2*L*T_u/(log_val*r_w^2*K_w)) + T_u/T_l)) +end + +n = 1000 +d = 8 +lb = [0.05,100,63070,990,63.1,700,1120,9855] +ub = [0.15,50000,115600,1110,116,820,1680,12045] +x = sample(n,lb,ub,SobolSample()) +X = vector_of_tuples_to_matrix(x) +grads = vector_of_tuples_to_matrix2(gradient.(water_flow, x)) +y = reshape(water_flow.(x),(size(x,1),1)) +xlimits = hcat(lb, ub) +n_test = 100 +x_test = sample(n_test,lb,ub,GoldenSample()) +X_test = vector_of_tuples_to_matrix(x_test) +y_true = water_flow.(x_test) +n_comp = 2 +delta_x = 0.0001 +extra_points = 2 +initial_theta = 0.01 +g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) +y_pred = g(X_test) +rmse = sqrt(sum(((y_pred - y_true).^2)/n_test)) #root mean squared error +println(rmse) +``` + diff --git a/src/GEKPLS.jl b/src/GEKPLS.jl new file mode 100644 index 000000000..255cbc588 --- /dev/null +++ b/src/GEKPLS.jl @@ -0,0 +1,537 @@ +using LinearAlgebra +using Statistics + +mutable struct GEKPLS{T <: AbstractFloat} <: AbstractSurrogate + x::Matrix{T} #1 + y::Matrix{T} #2 + grads::Matrix{T} #3 + xl::Matrix{T} #xlimits #4 + delta::T #5 + extra_points::Int #6 + num_components::Int #7 + beta::Vector{T} #8 + gamma::Matrix{T} #9 + theta::Vector{T} #10 + reduced_likelihood_function_value::T #11 + X_offset::Matrix{T} #12 + X_scale::Matrix{T} #13 + X_after_std::Matrix{T} #14 - X after standardization + pls_mean::Matrix{T} #15 + y_mean::T #16 + y_std::T #17 +end + +function bounds_error(x, xl) + num_x_rows = size(x, 1) + num_dim = size(xl, 1) + for i in 1:num_x_rows + for j in 1:num_dim + if (x[i, j] < xl[j, 1] || x[i, j] > xl[j, 2]) + return true + end + end + end + return false +end + +#constructor for GEKPLS Struct +function GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, θ) + + #ensure that X values are within the upper and lower bounds + if bounds_error(X, xlimits) + println("X values outside bounds") + return + end + + theta = [θ for i in 1:n_comp] + pls_mean, X_after_PLS, y_after_PLS = _ge_compute_pls(X, y, n_comp, grads, delta_x, + xlimits, extra_points) + X_after_std, y_after_std, X_offset, y_mean, X_scale, y_std = standardization(X_after_PLS, + y_after_PLS) + D, ij = cross_distances(X_after_std) + pls_mean_reshaped = reshape(pls_mean, (size(X, 2), n_comp)) + d = componentwise_distance_PLS(D, "squar_exp", n_comp, pls_mean_reshaped) + nt, nd = size(X_after_PLS) + beta, gamma, reduced_likelihood_function_value = _reduced_likelihood_function(theta, + "squar_exp", + d, nt, ij, + y_after_std) + return GEKPLS(X, y, grads, xlimits, delta_x, extra_points, n_comp, beta, gamma, theta, + reduced_likelihood_function_value, + X_offset, X_scale, X_after_std, pls_mean_reshaped, y_mean, y_std) + println("struct created") +end + +# predictor +function (g::GEKPLS)(X_test) + n_eval, n_features_x = size(X_test) + X_cont = (X_test .- g.X_offset) ./ g.X_scale + dx = differences(X_cont, g.X_after_std) + pred_d = componentwise_distance_PLS(dx, "squar_exp", g.num_components, g.pls_mean) + nt = size(g.X_after_std, 1) + r = transpose(reshape(squar_exp(g.theta, pred_d), (nt, n_eval))) + f = ones(n_eval, 1) + y_ = (f * g.beta) + (r * g.gamma) + y = g.y_mean .+ g.y_std * y_ + return y +end + +function add_point!(g::GEKPLS, new_x, new_y, new_grads) + if new_x in g.x + println("Adding a sample that already exists. Cannot build GEKPLS") + return + end + + if bounds_error(new_x, g.xl) + println("x values outside bounds") + return + end + + g.x = vcat(g.x, new_x) + g.y = vcat(g.y, new_y) + g.grads = vcat(g.grads, new_grads) + pls_mean, X_after_PLS, y_after_PLS = _ge_compute_pls(g.x, g.y, g.num_components, + g.grads, g.delta, g.xl, + g.extra_points) + g.X_after_std, y_after_std, g.X_offset, g.y_mean, g.X_scale, g.y_std = standardization(X_after_PLS, + y_after_PLS) + D, ij = cross_distances(g.X_after_std) + g.pls_mean = reshape(pls_mean, (size(g.x, 2), g.num_components)) + d = componentwise_distance_PLS(D, "squar_exp", g.num_components, g.pls_mean) + nt, nd = size(X_after_PLS) + g.beta, g.gamma, g.reduced_likelihood_function_value = _reduced_likelihood_function(g.theta, + "squar_exp", + d, + nt, + ij, + y_after_std) +end + +function _ge_compute_pls(X, y, n_comp, grads, delta_x, xlimits, extra_points) + """ + Gradient-enhanced PLS-coefficients. + Parameters + ---------- + X: [n_obs,dim] - The input variables. + y: [n_obs,ny] - The output variable + n_comp: int - Number of principal components used. + gradients: - The gradient values. Matrix size (n_obs,dim) + delta_x: real - The step used in the First Order Taylor Approximation + xlimits: [dim, 2]- The upper and lower var bounds. + extra_points: int - The number of extra points per each training point. + Returns + ------- + Coeff_pls: [dim, n_comp] - The PLS-coefficients. + X: Concatenation of XX: [extra_points*nt, dim] - Extra points added (when extra_points > 0) and X + y: Concatenation of yy[extra_points*nt, 1]- Extra points added (when extra_points > 0) and y + """ + # this function is equivalent to a combination of + # https://github.com/SMTorg/smt/blob/f124c01ffa78c04b80221dded278a20123dac742/smt/utils/kriging_utils.py#L1036 + # and https://github.com/SMTorg/smt/blob/f124c01ffa78c04b80221dded278a20123dac742/smt/surrogate_models/gekpls.py#L48 + + nt, dim = size(X) + XX = zeros(0, dim) + yy = zeros(0, size(y)[2]) + coeff_pls = zeros((dim, n_comp, nt)) + + for i in 1:nt + if dim >= 3 + bb_vals = circshift(boxbehnken(dim, 1), 1) + else + bb_vals = [0.0 0.0; #center + 1.0 0.0; #right + 0.0 1.0; #up + -1.0 0.0; #left + 0.0 -1.0; #down + 1.0 1.0; #right up + -1.0 1.0; #left up + -1.0 -1.0; #left down + 1.0 -1.0] + end + _X = zeros((size(bb_vals)[1], dim)) + _y = zeros((size(bb_vals)[1], 1)) + bb_vals = bb_vals .* (delta_x * (xlimits[:, 2] - xlimits[:, 1]))' #smt calls this sign. I've called it bb_vals + _X = X[i, :]' .+ bb_vals + bb_vals = bb_vals .* grads[i, :]' + _y = y[i, :] .+ sum(bb_vals, dims = 2) + + #_pls.fit(_X, _y) # relic from sklearn versiom; retained for future reference. + #coeff_pls[:, :, i] = _pls.x_rotations_ #relic from sklearn versiom; retained for future reference. + + coeff_pls[:, :, i] = _modified_pls(_X, _y, n_comp) #_modified_pls returns the equivalent of SKLearn's _pls.x_rotations_ + if extra_points != 0 + start_index = max(1, length(coeff_pls[:, 1, i]) - extra_points + 1) + max_coeff = sortperm(broadcast(abs, coeff_pls[:, 1, i]))[start_index:end] + for ii in max_coeff + XX = [XX; transpose(X[i, :])] + XX[end, ii] += delta_x * (xlimits[ii, 2] - xlimits[ii, 1]) + yy = [yy; y[i]] + yy[end] += grads[i, ii] * delta_x * (xlimits[ii, 2] - xlimits[ii, 1]) + end + end + end + if extra_points != 0 + X = [X; XX] + y = [y; yy] + end + + pls_mean = mean(broadcast(abs, coeff_pls), dims = 3) + return pls_mean, X, y +end + +######start of bbdesign###### + +# +# Adapted from 'ExperimentalDesign.jl: Design of Experiments in Julia' +# https://github.com/phrb/ExperimentalDesign.jl + +# MIT License + +# ExperimentalDesign.jl: Design of Experiments in Julia +# Copyright (C) 2019 Pedro Bruel + +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: + +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# + +function boxbehnken(matrix_size::Int) + boxbehnken(matrix_size, matrix_size) +end + +function boxbehnken(matrix_size::Int, center::Int) + @assert matrix_size >= 3 + + A_fact = explicit_fullfactorial(Tuple([-1, 1] for i in 1:2)) + + rows = floor(Int, (0.5 * matrix_size * (matrix_size - 1)) * size(A_fact)[1]) + + A = zeros(rows, matrix_size) + + l = 0 + for i in 1:(matrix_size - 1) + for j in (i + 1):matrix_size + l = l + 1 + A[(max(0, (l - 1) * size(A_fact)[1]) + 1):(l * size(A_fact)[1]), i] = A_fact[:, + 1] + A[(max(0, (l - 1) * size(A_fact)[1]) + 1):(l * size(A_fact)[1]), j] = A_fact[:, + 2] + end + end + + if center == matrix_size + if matrix_size <= 16 + points = [0, 0, 3, 3, 6, 6, 6, 8, 9, 10, 12, 12, 13, 14, 15, 16] + center = points[matrix_size] + end + end + + A = transpose(hcat(transpose(A), transpose(zeros(center, matrix_size)))) +end + +function explicit_fullfactorial(factors::Tuple) + explicit_fullfactorial(fullfactorial(factors)) +end + +function explicit_fullfactorial(iterator::Base.Iterators.ProductIterator) + hcat(vcat.(collect(iterator)...)...) +end + +function fullfactorial(factors::Tuple) + Base.Iterators.product(factors...) +end + +######end of bb design###### + +function standardization(X, y) + """ + We substract the mean from each variable. Then, we divide the values of each + variable by its standard deviation. + + Parameters + ---------- + + X - The input variables. + y - The output variable. + + Returns + ------- + + X: [n_obs, dim] + The standardized input matrix. + + y: [n_obs, 1] + The standardized output vector. + + X_offset: The mean (or the min if scale_X_to_unit=True) of each input variable. + + y_mean: The mean of the output variable. + + X_scale: The standard deviation of each input variable. + + y_std: The standard deviation of the output variable. + + """ + #Equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L21 + X_offset = mean(X, dims = 1) + X_scale = std(X, dims = 1) + X_scale = map(x -> (x == 0.0) ? x = 1 : x = x, X_scale) #to prevent division by 0 below + y_mean = mean(y) + y_std = std(y) + y_std = map(y -> (y == 0) ? y = 1 : y = y, y_std) #to prevent division by 0 below + X = (X .- X_offset) ./ X_scale + y = (y .- y_mean) ./ y_std + return X, y, X_offset, y_mean, X_scale, y_std +end + +function cross_distances(X) + """ + Computes the nonzero componentwise cross-distances between the vectors + in X + + Parameters + ---------- + + X: [n_obs, dim] + + Returns + ------- + D: [n_obs * (n_obs - 1) / 2, dim] + - The cross-distances between the vectors in X. + + ij: [n_obs * (n_obs - 1) / 2, 2] + - The indices i and j of the vectors in X associated to the cross- + distances in D. + """ + # equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L86 + n_samples, n_features = size(X) + n_nonzero_cross_dist = (n_samples * (n_samples - 1)) ÷ 2 + ij = zeros((n_nonzero_cross_dist, 2)) + D = zeros((n_nonzero_cross_dist, n_features)) + ll_1 = 0 + + for k in 1:(n_samples - 1) + ll_0 = ll_1 + 1 + ll_1 = ll_0 + n_samples - k - 1 + ij[ll_0:ll_1, 1] .= k + ij[ll_0:ll_1, 2] = (k + 1):1:n_samples + D[ll_0:ll_1, :] = -(X[(k + 1):n_samples, :] .- X[k, :]') + end + return D, Int.(ij) +end + +function componentwise_distance_PLS(D, corr, n_comp, coeff_pls) + """ + Computes the nonzero componentwise cross-spatial-correlation-distance + between the vectors in X. + + Equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L1257 + with some simplifications (removed theta and return_derivative as it's not required for GEKPLS) + + Parameters + ---------- + + D: [n_obs * (n_obs - 1) / 2, dim] + - The L1 cross-distances between the vectors in X. + + corr: str + - Name of the correlation function used. + squar_exp or abs_exp. + + n_comp: int + - Number of principal components used. + + coeff_pls: [dim, n_comp] + - The PLS-coefficients. + + Returns + ------- + + D_corr: [n_obs * (n_obs - 1) / 2, n_comp] + - The componentwise cross-spatial-correlation-distance between the + vectors in X. + + """ + #equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L1257 + #todo + #figure out how to handle this computation in the case of very large matrices + #similar to what SMT has done + #at https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L1257 + D_corr = zeros((size(D)[1], n_comp)) + + if corr == "squar_exp" + D_corr = D .^ 2 * coeff_pls .^ 2 + else #abs_exp + D_corr = abs.(D) * abs.(coeff_pls) + end + + return D_corr +end + +function squar_exp(theta, d) + """ + Squared exponential correlation model. + Equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L604 + Parameters: + ----------- + theta : Hyperparameters of the correlation model + d: componentwise distances from componentwise_distance_PLS + + Returns: + -------- + r: array containing the values of the autocorrelation model + + """ + n_components = size(d)[2] + theta = reshape(theta, (1, n_components)) + return exp.(-sum(theta .* d, dims = 2)) +end + +function differences(X, Y) + #equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L392 + #code credit: Elias Carvalho - https://stackoverflow.com/questions/72392010/row-wise-operations-between-matrices-in-julia + Rx = repeat(X, inner = (size(Y, 1), 1)) + Ry = repeat(Y, size(X, 1)) + return Rx - Ry +end + +function _reduced_likelihood_function(theta, kernel_type, d, nt, ij, y_norma, noise = 0.0) + """ + This function is a loose translation of SMT code from + https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/surrogate_models/krg_based.py#L247 + It determines the BLUP parameters and evaluates the reduced likelihood function for the given theta. + + Parameters + ---------- + theta: array containing the parameters at which the Gaussian Process model parameters should be determined. + kernel_type: name of the correlation function. + d: The componentwise cross-spatial-correlation-distance between the vectors in X. + nt: number of training points + ij: The indices i and j of the vectors in X associated to the cross-distances in D. + y_norma: Standardized y values + noise: noise hyperparameter - increasing noise reduces reduced_likelihood_function_value + + + Returns + ------- + reduced_likelihood_function_value: real + - The value of the reduced likelihood function associated with the given autocorrelation parameters theta. + beta: Generalized least-squares regression weights + gamma: Gaussian Process weights. + + """ + #equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/surrogate_models/krg_based.py#L247 + reduced_likelihood_function_value = -Inf + nugget = 1000000.0 * eps() #a jitter for numerical stability; reducing the multiple from 1000000.0 results in positive definite error for Cholesky decomposition; + if kernel_type == "squar_exp" #todo - add other kernel type abs_exp etc. + r = squar_exp(theta, d) + end + R = (I + zeros(nt, nt)) .* (1.0 + nugget + noise) + + for k in 1:size(ij)[1] + R[ij[k, 1], ij[k, 2]] = r[k] + R[ij[k, 2], ij[k, 1]] = r[k] + end + + C = cholesky(R).L #todo - #values diverge at this point from SMT code; verify impact + F = ones(nt, 1) #todo - examine if this should be a parameter for this function + Ft = C \ F + Q, G = qr(Ft) + Q = Array(Q) + Yt = C \ y_norma + #todo - in smt, they check if the matrix is ill-conditioned using SVD. Verify and include if necessary + beta = G \ [(transpose(Q) ⋅ Yt)] + rho = Yt .- (Ft .* beta) + gamma = transpose(C) \ rho + sigma2 = sum((rho) .^ 2, dims = 1) / nt + detR = prod(diag(C) .^ (2.0 / nt)) + reduced_likelihood_function_value = -nt * log10(sum(sigma2)) - nt * log10(detR) + return beta, gamma, reduced_likelihood_function_value +end + +### MODIFIED PLS BELOW ### + +# The code below is a simplified version of +# SKLearn's PLS +# https://github.com/scikit-learn/scikit-learn/blob/80598905e/sklearn/cross_decomposition/_pls.py + +function _center_scale(X, Y) + x_mean = mean(X, dims = 1) + X .-= x_mean + y_mean = mean(Y, dims = 1) + Y .-= y_mean + x_std = std(X, dims = 1) + x_std[x_std .== 0] .= 1.0 + X ./= x_std + y_std = std(Y, dims = 1) + y_std[y_std .== 0] .= 1.0 + Y ./= y_std + return X, Y +end + +function _svd_flip_1d(u, v) + # equivalent of https://github.com/scikit-learn/scikit-learn/blob/80598905e517759b4696c74ecc35c6e2eb508cff/sklearn/cross_decomposition/_pls.py#L149 + biggest_abs_val_idx = findmax(abs.(vec(u)))[2] + sign_ = sign(u[biggest_abs_val_idx]) + u .*= sign_ + v .*= sign_ +end + +function _get_first_singular_vectors_power_method(X, Y) + my_eps = eps() + y_score = vec(Y) + x_weights = transpose(X)y_score / dot(y_score, y_score) + x_weights ./= (sqrt(dot(x_weights, x_weights)) + my_eps) + x_score = X * x_weights + y_weights = transpose(Y)x_score / dot(x_score, x_score) + y_score = Y * y_weights / (dot(y_weights, y_weights) + my_eps) + #Equivalent in intent to https://github.com/scikit-learn/scikit-learn/blob/80598905e517759b4696c74ecc35c6e2eb508cff/sklearn/cross_decomposition/_pls.py#L66 + if any(isnan.(x_weights)) || any(isnan.(y_weights)) + return false, false + end + return x_weights, y_weights +end + +function _modified_pls(X, Y, n_components) + x_weights_ = zeros(size(X, 2), n_components) + _x_scores = zeros(size(X, 1), n_components) + x_loadings_ = zeros(size(X, 2), n_components) + Xk, Yk = _center_scale(X, Y) + + for k in 1:n_components + x_weights, y_weights = _get_first_singular_vectors_power_method(Xk, Yk) + + if x_weights == false + break + end + + _svd_flip_1d(x_weights, y_weights) + x_scores = Xk * x_weights + x_loadings = transpose(x_scores)Xk / dot(x_scores, x_scores) + Xk = Xk - (x_scores * x_loadings) + y_loadings = transpose(x_scores) * Yk / dot(x_scores, x_scores) + Yk = Yk - x_scores * y_loadings + x_weights_[:, k] = x_weights + _x_scores[:, k] = x_scores + x_loadings_[:, k] = vec(x_loadings) + end + + x_rotations_ = x_weights_ * pinv(transpose(x_loadings_)x_weights_) + return x_rotations_ +end + +### MODIFIED PLS ABOVE ### diff --git a/src/Surrogates.jl b/src/Surrogates.jl index ef2137fdf..eaf50ed44 100644 --- a/src/Surrogates.jl +++ b/src/Surrogates.jl @@ -12,12 +12,12 @@ include("Lobachevsky.jl") include("LinearSurrogate.jl") include("InverseDistanceSurrogate.jl") include("SecondOrderPolynomialSurrogate.jl") - include("Wendland.jl") include("MOE.jl") #rewrite gaussian mixture with own algorithm to fix deps issue include("VariableFidelity.jl") include("Earth.jl") include("GEK.jl") +include("GEKPLS.jl") current_surrogates = ["Kriging", "LinearSurrogate", "LobachevskySurrogate", "NeuralSurrogate", @@ -81,6 +81,7 @@ function PolyChaosStructure(; op) end export current_surrogates +export GEKPLS export RadialBasisStructure, KrigingStructure, LinearStructure, InverseDistanceStructure export LobachevskyStructure, NeuralStructure, RandomForestStructure, SecondOrderPolynomialStructure diff --git a/test/GEKPLS.jl b/test/GEKPLS.jl new file mode 100644 index 000000000..26f33f39b --- /dev/null +++ b/test/GEKPLS.jl @@ -0,0 +1,239 @@ +using Surrogates +using Zygote + +function vector_of_tuples_to_matrix(v) + #convert training data generated by surrogate sampling into a matrix suitable for GEKPLS + num_rows = length(v) + num_cols = length(first(v)) + K = zeros(num_rows, num_cols) + for row in 1:num_rows + for col in 1:num_cols + K[row, col] = v[row][col] + end + end + return K +end + +function vector_of_tuples_to_matrix2(v) + #convert gradients into matrix form + num_rows = length(v) + num_cols = length(first(first(v))) + K = zeros(num_rows, num_cols) + for row in 1:num_rows + for col in 1:num_cols + K[row, col] = v[row][1][col] + end + end + return K +end + +# # water flow function tests +function water_flow(x) + r_w = x[1] + r = x[2] + T_u = x[3] + H_u = x[4] + T_l = x[5] + H_l = x[6] + L = x[7] + K_w = x[8] + log_val = log(r / r_w) + return (2 * pi * T_u * (H_u - H_l)) / + (log_val * (1 + (2 * L * T_u / (log_val * r_w^2 * K_w)) + T_u / T_l)) +end + +n = 1000 +d = 8 +lb = [0.05, 100, 63070, 990, 63.1, 700, 1120, 9855] +ub = [0.15, 50000, 115600, 1110, 116, 820, 1680, 12045] +x = sample(n, lb, ub, SobolSample()) +X = vector_of_tuples_to_matrix(x) +grads = vector_of_tuples_to_matrix2(gradient.(water_flow, x)) +y = reshape(water_flow.(x), (size(x, 1), 1)) +xlimits = hcat(lb, ub) +n_test = 100 +x_test = sample(n_test, lb, ub, GoldenSample()) +X_test = vector_of_tuples_to_matrix(x_test) +y_true = water_flow.(x_test) + +@testset "Test 1: Water Flow Function Test (dimensions = 8; n_comp = 2; extra_points = 2)" begin + n_comp = 2 + delta_x = 0.0001 + extra_points = 2 + initial_theta = 0.01 + g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g(X_test) + rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) + @test isapprox(rmse, 0.03, atol = 0.02) #rmse: 0.039 +end + +@testset "Test 2: Water Flow Function Test (dimensions = 8; n_comp = 3; extra_points = 2)" begin + n_comp = 3 + delta_x = 0.0001 + extra_points = 2 + initial_theta = 0.01 + g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) #change hard-coded 2 param to variable + y_pred = g(X_test) + rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) + @test isapprox(rmse, 0.02, atol = 0.01) #rmse: 0.027 +end + +@testset "Test 3: Water Flow Function Test (dimensions = 8; n_comp = 3; extra_points = 3)" begin + n_comp = 3 + delta_x = 0.0001 + extra_points = 3 + initial_theta = 0.01 + g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g(X_test) + rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) + @test isapprox(rmse, 0.02, atol = 0.01) #rmse: 0.027 +end + +## welded beam tests +function welded_beam(x) + h = x[1] + l = x[2] + t = x[3] + a = 6000 / (sqrt(2) * h * l) + b = (6000 * (14 + 0.5 * l) * sqrt(0.25 * (l^2 + (h + t)^2))) / + (2 * (0.707 * h * l * (l^2 / 12 + 0.25 * (h + t)^2))) + return (sqrt(a^2 + b^2 + l * a * b)) / (sqrt(0.25 * (l^2 + (h + t)^2))) +end + +n = 1000 +d = 3 +lb = [0.125, 5.0, 5.0] +ub = [1.0, 10.0, 10.0] +x = sample(n, lb, ub, SobolSample()) +X = vector_of_tuples_to_matrix(x) +grads = vector_of_tuples_to_matrix2(gradient.(welded_beam, x)) +y = reshape(welded_beam.(x), (size(x, 1), 1)) +xlimits = hcat(lb, ub) +n_test = 100 +x_test = sample(n_test, lb, ub, GoldenSample()) +X_test = vector_of_tuples_to_matrix(x_test) +y_true = welded_beam.(x_test) + +@testset "Test 4: Welded Beam Function Test (dimensions = 3; n_comp = 3; extra_points = 2)" begin + n_comp = 3 + delta_x = 0.0001 + extra_points = 2 + initial_theta = 0.01 + g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g(X_test) + rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) + @test isapprox(rmse, 39.0, atol = 0.5) #rmse: 38.988 +end + +@testset "Test 5: Welded Beam Function Test (dimensions = 3; n_comp = 2; extra_points = 2)" begin + n_comp = 2 + delta_x = 0.0001 + extra_points = 2 + initial_theta = 0.01 + g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g(X_test) + rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) + @test isapprox(rmse, 39.5, atol = 0.5) #rmse: 39.481 +end + +## increasing extra points increases accuracy +@testset "Test 6: Welded Beam Function Test (dimensions = 3; n_comp = 2; extra_points = 4)" begin + n_comp = 2 + delta_x = 0.0001 + extra_points = 4 + initial_theta = 0.01 + g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g(X_test) + rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) + @test isapprox(rmse, 37.5, atol = 0.5) #rmse: 37.87 +end + +## sphere function tests +function sphere_function(x) + return sum(x .^ 2) +end + +## 3D +n = 100 +lb = [-5.0, -5.0, -5.0] +ub = [5.0, 5.0, 5.0] +x = sample(n, lb, ub, SobolSample()) +X = vector_of_tuples_to_matrix(x) +grads = vector_of_tuples_to_matrix2(gradient.(sphere_function, x)) +y = reshape(sphere_function.(x), (size(x, 1), 1)) +xlimits = hcat(lb, ub) +n_test = 100 +x_test = sample(n_test, lb, ub, GoldenSample()) +X_test = vector_of_tuples_to_matrix(x_test) +y_true = sphere_function.(x_test) + +@testset "Test 7: Sphere Function Test (dimensions = 3; n_comp = 2; extra_points = 2)" begin + n_comp = 2 + delta_x = 0.0001 + extra_points = 2 + initial_theta = 0.01 + g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g(X_test) + rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) + @test isapprox(rmse, 0.001, atol = 0.05) #rmse: 0.00083 +end + +## 2D +n = 50 +d = 2 +lb = [-10.0, -10.0] +ub = [10.0, 10.0] +x = sample(n, lb, ub, SobolSample()) +X = vector_of_tuples_to_matrix(x) +grads = vector_of_tuples_to_matrix2(gradient.(sphere_function, x)) +y = reshape(sphere_function.(x), (size(x, 1), 1)) +xlimits = hcat(lb, ub) +n_test = 10 +x_test = sample(n_test, lb, ub, GoldenSample()) +X_test = vector_of_tuples_to_matrix(x_test) +y_true = sphere_function.(x_test) + +@testset "Test 8: Sphere Function Test (dimensions = 2; n_comp = 2; extra_points = 2" begin + n_comp = 2 + delta_x = 0.0001 + extra_points = 2 + initial_theta = 0.01 + g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) + y_pred = g(X_test) + rmse = sqrt(sum(((y_pred - y_true) .^ 2) / n_test)) + @test isapprox(rmse, 0.1, atol = 0.5) #rmse: 0.0022 +end + +@testset "Test 9: Add Point Test (dimensions = 3; n_comp = 2; extra_points = 2)" begin + #first we create a surrogate model with just 3 input points + initial_x_vec = [(1.0, 2.0, 3.0), (4.0, 5.0, 6.0), (7.0, 8.0, 9.0)] + initial_y = reshape(sphere_function.(initial_x_vec), (size(initial_x_vec, 1), 1)) + initial_X = vector_of_tuples_to_matrix(initial_x_vec) + initial_grads = vector_of_tuples_to_matrix2(gradient.(sphere_function, initial_x_vec)) + lb = [-5.0, -5.0, -5.0] + ub = [10.0, 10.0, 10.0] + xlimits = hcat(lb, ub) + n_comp = 2 + delta_x = 0.0001 + extra_points = 2 + initial_theta = 0.01 + g = GEKPLS(initial_X, initial_y, initial_grads, n_comp, delta_x, xlimits, extra_points, + initial_theta) + n_test = 100 + x_test = sample(n_test, lb, ub, GoldenSample()) + X_test = vector_of_tuples_to_matrix(x_test) + y_true = sphere_function.(x_test) + y_pred1 = g(X_test) + rmse1 = sqrt(sum(((y_pred1 - y_true) .^ 2) / n_test)) #rmse1 = 31.91 + + #then we update the model with more points to see if performance improves + n = 100 + x = sample(n, lb, ub, SobolSample()) + X = vector_of_tuples_to_matrix(x) + grads = vector_of_tuples_to_matrix2(gradient.(sphere_function, x)) + y = reshape(sphere_function.(x), (size(x, 1), 1)) + add_point!(g, X, y, grads) + y_pred2 = g(X_test) + rmse2 = sqrt(sum(((y_pred2 - y_true) .^ 2) / n_test)) #rmse2 = 0.0015 + @test (rmse2 < rmse1) +end diff --git a/test/runtests.jl b/test/runtests.jl index 9a5353848..e435b486f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -15,6 +15,7 @@ for pkg in ["SurrogatesAbstractGPs", "SurrogatesFlux", "SurrogatesPolyChaos", end end +@time @safetestset "GEKPLS.jl" begin include("GEKPLS.jl") end @time @safetestset "Radials.jl" begin include("Radials.jl") end @time @safetestset "Kriging.jl" begin include("Kriging.jl") end @time @safetestset "Sampling" begin include("sampling.jl") end