diff --git a/src/GEKPLS.jl b/src/GEKPLS.jl new file mode 100644 index 000000000..88e2ee615 --- /dev/null +++ b/src/GEKPLS.jl @@ -0,0 +1,399 @@ +using LinearAlgebra +using ScikitLearn +using Statistics + +@sk_import cross_decomposition: PLSRegression #todo - replace SK Learn's PLS with a pure Julia alternative + +mutable struct GEKPLS <: AbstractSurrogate + x::Matrix{Float64} #1 + y::Matrix{Float64} #2 + grads::Matrix{Float64} #3 + xl::Matrix{Float64} #xlimits #4 + delta:: Float64 #5 + extra_points::Int64 #6 + num_components::Int64 #7 + beta::Vector{Float64} #8 + gamma::Matrix{Float64} #9 + theta::Vector{Float64} #10 + reduced_likelihood_function_value:: Float64 #11 + X_offset::Matrix{Float64} #12 + X_scale::Matrix{Float64} #13 + X_after_std::Matrix{Float64} #14 - X after standardization + pls_mean::Matrix{Float64} #15 + y_mean::Float64 #16 + y_std::Float64 #17 +end + +#constructor for GEKPLS Struct +function GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, θ) + theta = [θ for i in 1:n_comp] + pls_mean, X_after_PLS, y_after_PLS = _ge_compute_pls(X, y, n_comp, grads, delta_x, xlimits, extra_points) + X_after_std, y_after_std, X_offset, y_mean, X_scale, y_std = standardization(X_after_PLS, y_after_PLS) + D, ij = cross_distances(X_after_std) + pls_mean_reshaped = reshape(pls_mean, (size(X,2),n_comp)) + d = componentwise_distance_PLS(D, "squar_exp", n_comp, pls_mean_reshaped) + nt, nd = size(X_after_PLS) + beta, gamma, reduced_likelihood_function_value = _reduced_likelihood_function(theta, "squar_exp", d, nt, ij, y_after_std) + return GEKPLS(X, y, grads, xlimits, delta_x, extra_points, n_comp, beta, gamma, theta, reduced_likelihood_function_value, + X_offset, X_scale, X_after_std, pls_mean_reshaped, y_mean, y_std) + println("struct created") +end + + +# predictor +function (g::GEKPLS)(X_test) + n_eval, n_features_x = size(X_test) + X_cont = (X_test .- g.X_offset) ./ g.X_scale + dx = differences(X_cont, g.X_after_std) + pred_d = componentwise_distance_PLS(dx, "squar_exp", g.num_components, g.pls_mean) + nt = size(g.X_after_std,1) + r = transpose(reshape(squar_exp(g.theta, pred_d),(nt,n_eval))) + f = ones(n_eval,1) + y_ = (f * g.beta) + (r * g.gamma) + y = g.y_mean .+ g.y_std * y_ + return y +end + +function _ge_compute_pls(X, y, n_comp, grads, delta_x, xlimits, extra_points) + """ + Gradient-enhanced PLS-coefficients. + Parameters + ---------- + X: [n_obs,dim] - The input variables. + y: [n_obs,ny] - The output variable + n_comp: int - Number of principal components used. + gradients: - The gradient values. Matrix size (n_obs,dim) + delta_x: real - The step used in the First Order Taylor Approximation + xlimits: [dim, 2]- The upper and lower var bounds. + extra_points: int - The number of extra points per each training point. + Returns + ------- + Coeff_pls: [dim, n_comp] - The PLS-coefficients. + X: Concatenation of XX: [extra_points*nt, dim] - Extra points added (when extra_points > 0) and X + y: Concatenation of yy[extra_points*nt, 1]- Extra points added (when extra_points > 0) and y + """ + # this function is equivalent to a combination of + # https://github.com/SMTorg/smt/blob/f124c01ffa78c04b80221dded278a20123dac742/smt/utils/kriging_utils.py#L1036 + # and https://github.com/SMTorg/smt/blob/f124c01ffa78c04b80221dded278a20123dac742/smt/surrogate_models/gekpls.py#L48 + nt, dim = size(X) + XX = zeros(0,dim) + yy = zeros(0,size(y)[2]) + _pls = PLSRegression(n_comp) + coeff_pls = zeros((dim, n_comp, nt)) + for i in 1:nt + if dim >= 3 #to do ... this is consistent with SMT but inefficient. Move outside for loop and evaluate dim >= 3 just once. + bb_vals = circshift(boxbehnken(dim, 1),1) + _X = zeros((size(bb_vals)[1], dim)) + _y = zeros((size(bb_vals)[1], 1)) + bb_vals = bb_vals .* (delta_x * (xlimits[:, 2] - xlimits[:, 1]))' #smt calls this sign. I've called it bb_vals + _X = X[i, :]' .+ bb_vals + bb_vals = bb_vals .* grads[i,:]' + _y = y[i, :] .+ sum(bb_vals, dims=2) + else + println("GEKPLS for less than 3 dimensions is coming soon") + end + _pls.fit(_X, _y) + coeff_pls[:, :, i] = _pls.x_rotations_ #size of _pls.x_rotations_ is (dim, n_comp) + if extra_points != 0 + start_index = max(1, length(coeff_pls[:,1,i])-extra_points+1) #todo: evaluate just once + max_coeff = sortperm(broadcast(abs,coeff_pls[:,1,i]))[start_index:end] + for ii in max_coeff + XX = [XX; transpose(X[i, :])] + XX[end, ii] += delta_x * (xlimits[ii,2]-xlimits[ii,1]) + yy = [yy; y[i]] + yy[end] += grads[i,ii] * delta_x * (xlimits[ii,2]-xlimits[ii,1]) + end + end + end + if extra_points != 0 + X = [X; XX] + y = [y; yy] + end + pls_mean = mean(broadcast(abs,coeff_pls),dims=3) + return pls_mean, X, y +end + +######start of bbdesign###### + + + # + # Adapted from 'ExperimentalDesign.jl: Design of Experiments in Julia' + # https://github.com/phrb/ExperimentalDesign.jl + + # MIT License + + # ExperimentalDesign.jl: Design of Experiments in Julia + # Copyright (C) 2019 Pedro Bruel + + # Permission is hereby granted, free of charge, to any person obtaining a copy of + # this software and associated documentation files (the "Software"), to deal in + # the Software without restriction, including without limitation the rights to + # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + # the Software, and to permit persons to whom the Software is furnished to do so, + # subject to the following conditions: + + # The above copyright notice and this permission notice (including the next + # paragraph) shall be included in all copies or substantial portions of the + # Software. + + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + # + + function boxbehnken(matrix_size::Int) + boxbehnken(matrix_size, matrix_size) + end + + function boxbehnken(matrix_size::Int, center::Int) + @assert matrix_size>=3 + + A_fact = explicit_fullfactorial(Tuple([-1,1] for i = 1:2)) + + rows = floor(Int, (0.5*matrix_size*(matrix_size-1))*size(A_fact)[1]) + + A = zeros(rows, matrix_size) + + l = 0 + for i in 1:matrix_size-1 + for j in i+1:matrix_size + l = l +1 + A[max(0, (l - 1)*size(A_fact)[1])+1:l*size(A_fact)[1], i] = A_fact[:, 1] + A[max(0, (l - 1)*size(A_fact)[1])+1:l*size(A_fact)[1], j] = A_fact[:, 2] + end + end + + if center == matrix_size + if matrix_size <= 16 + points = [0, 0, 3, 3, 6, 6, 6, 8, 9, 10, 12, 12, 13, 14, 15, 16] + center = points[matrix_size] + end + end + + A = transpose(hcat(transpose(A), transpose(zeros(center, matrix_size)))) + end + + function explicit_fullfactorial(factors::Tuple) + explicit_fullfactorial(fullfactorial(factors)) + end + + function explicit_fullfactorial(iterator::Base.Iterators.ProductIterator) + hcat(vcat.(collect(iterator)...)...) + end + + function fullfactorial(factors::Tuple) + Base.Iterators.product(factors...) + end + + +######end of bb design###### + +function standardization(X,y) + """ + We substract the mean from each variable. Then, we divide the values of each + variable by its standard deviation. + + Parameters + ---------- + + X - The input variables. + y - The output variable. + + Returns + ------- + + X: [n_obs, dim] + The standardized input matrix. + + y: [n_obs, 1] + The standardized output vector. + + X_offset: The mean (or the min if scale_X_to_unit=True) of each input variable. + + y_mean: The mean of the output variable. + + X_scale: The standard deviation of each input variable. + + y_std: The standard deviation of the output variable. + + """ + #Equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L21 + X_offset = mean(X, dims = 1) + X_scale = std(X, dims = 1) + X_scale = map(x -> (x==0.0) ? x=1 : x=x, X_scale) #to prevent division by 0 below + y_mean = mean(y) + y_std = std(y) + y_std = map(y -> (y==0) ? y=1 : y=y, y_std) #to prevent division by 0 below + X = (X.-X_offset) ./ X_scale + y = (y .- y_mean) ./ y_std + return X, y, X_offset, y_mean, X_scale, y_std + +end + +function cross_distances(X) + """ + Computes the nonzero componentwise cross-distances between the vectors + in X + + Parameters + ---------- + + X: [n_obs, dim] + + Returns + ------- + D: [n_obs * (n_obs - 1) / 2, dim] + - The cross-distances between the vectors in X. + + ij: [n_obs * (n_obs - 1) / 2, 2] + - The indices i and j of the vectors in X associated to the cross- + distances in D. + """ + # equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L86 + n_samples, n_features = size(X) + n_nonzero_cross_dist = ( n_samples * (n_samples - 1) ) ÷ 2 + ij = zeros((n_nonzero_cross_dist, 2)) + D = zeros((n_nonzero_cross_dist, n_features)) + ll_1 = 0 + + for k in 1:n_samples - 1 + ll_0 = ll_1 + 1 + ll_1 = ll_0 + n_samples - k - 1 + ij[ll_0:ll_1, 1] .= k + ij[ll_0:ll_1, 2] = k+1:1:n_samples + D[ll_0:ll_1, :] = -(X[(k + 1) : n_samples,:] .- X[k,:]') + + end + return D, Int.(ij) +end + +function componentwise_distance_PLS(D, corr, n_comp, coeff_pls) + """ + Computes the nonzero componentwise cross-spatial-correlation-distance + between the vectors in X. + + Equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L1257 + with some simplifications (removed theta and return_derivative as it's not required for GEKPLS) + + Parameters + ---------- + + D: [n_obs * (n_obs - 1) / 2, dim] + - The L1 cross-distances between the vectors in X. + + corr: str + - Name of the correlation function used. + squar_exp or abs_exp. + + n_comp: int + - Number of principal components used. + + coeff_pls: [dim, n_comp] + - The PLS-coefficients. + + Returns + ------- + + D_corr: [n_obs * (n_obs - 1) / 2, n_comp] + - The componentwise cross-spatial-correlation-distance between the + vectors in X. + + """ + #equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L1257 + #todo + #figure out how to handle this computation in the case of very large matrices + #similar to what SMT has done + #at https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L1257 + D_corr = zeros((size(D)[1], n_comp)) + + if corr == "squar_exp" + D_corr = D.^2 * coeff_pls.^2 + else #abs_exp + D_corr = abs.(D) * abs.(coeff_pls) + end + return D_corr +end + +function squar_exp(theta, d) + """ + Squared exponential correlation model. + Equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L604 + Parameters: + ----------- + theta : Hyperparameters of the correlation model + d: componentwise distances from componentwise_distance_PLS + + Returns: + -------- + r: array containing the values of the autocorrelation model + + """ + n_components = size(d)[2] + theta = reshape(theta, (1,n_components)) + return exp.(-sum(theta .* d, dims=2)) +end + +function differences(X, Y) + #equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/utils/kriging_utils.py#L392 + #code credit: Elias Carvalho - https://stackoverflow.com/questions/72392010/row-wise-operations-between-matrices-in-julia + Rx = repeat(X, inner=(size(Y, 1), 1)) + Ry = repeat(Y, size(X, 1)) + return Rx - Ry +end + +function _reduced_likelihood_function(theta, kernel_type, d, nt, ij, y_norma, noise=0.0) + """ + This function is a loose translation of SMT code from + https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/surrogate_models/krg_based.py#L247 + It determines the BLUP parameters and evaluates the reduced likelihood function for the given theta. + + Parameters + ---------- + theta: array containing the parameters at which the Gaussian Process model parameters should be determined. + kernel_type: name of the correlation function. + d: The componentwise cross-spatial-correlation-distance between the vectors in X. + nt: number of training points + ij: The indices i and j of the vectors in X associated to the cross-distances in D. + y_norma: Standardized y values + noise: noise hyperparameter - increasing noise reduces reduced_likelihood_function_value + + + Returns + ------- + reduced_likelihood_function_value: real + - The value of the reduced likelihood function associated with the given autocorrelation parameters theta. + beta: Generalized least-squares regression weights + gamma: Gaussian Process weights. + + """ + #equivalent of https://github.com/SMTorg/smt/blob/4a4df255b9259965439120091007f9852f41523e/smt/surrogate_models/krg_based.py#L247 + reduced_likelihood_function_value = -Inf + nugget = 1000000.0 * eps() #a jitter for numerical stability; reducing the multiple from 1000000.0 results in positive definite error for Cholesky decomposition; + if kernel_type =="squar_exp" #todo - add other kernel types like abs_exp, matern52 etc. + r = squar_exp(theta,d) + end + R = (I + zeros(nt,nt)) .* (1.0 + nugget + noise) + for k in 1:size(ij)[1] #todo - speed up using @inbounds / @simd where required + R[ij[k,1],ij[k,2]]=r[k] + R[ij[k,2],ij[k,1]]=r[k] + end + C = cholesky(R).L #todo - #values diverge at this point from SMT code; verify impact + F = ones(nt,1) #todo - examine if this should be a parameter for this function + Ft = C\F + Q, G = qr(Ft) + Q = Array(Q) + Yt = C\y_norma + #todo - in smt, they check if the matrix is ill-conditioned using SVD. Verify and include if necessary + beta = G \ [(transpose(Q) ⋅ Yt)] + rho = Yt .- (Ft .* beta) + gamma = transpose(C) \ rho + sigma2 = sum((rho).^2, dims=1)/nt + detR = prod(diag(C).^(2.0/nt)) + reduced_likelihood_function_value = - nt * log10(sum(sigma2)) - nt * log10(detR) + return beta, gamma, reduced_likelihood_function_value +end + + diff --git a/test/GEKPLS.jl b/test/GEKPLS.jl new file mode 100644 index 000000000..dba541414 --- /dev/null +++ b/test/GEKPLS.jl @@ -0,0 +1,95 @@ +using Surrogates +using Zygote + + +function water_flow(x) + r_w = x[1] + r = x[2] + T_u = x[3] + H_u = x[4] + T_l = x[5] + H_l = x[6] + L = x[7] + K_w = x[8] + log_val = log(r/r_w) + return (2*pi*T_u*(H_u - H_l))/ ( log_val*(1 + (2*L*T_u/(log_val*r_w^2*K_w)) + T_u/T_l)) +end + + + + +function vector_of_tuples_to_matrix(v) + #convert training data generated by surrogate sampling into a matrix suitable for GEKPLS + num_rows = length(v) + num_cols = length(first(v)) + K = zeros(num_rows, num_cols) + for row in 1:num_rows + for col in 1:num_cols + K[row, col]=v[row][col] + end + end + return K +end + +function vector_of_tuples_to_matrix2(v) + #convert gradients into matrix form + num_rows = length(v) + num_cols = length(first(first(v))) + K = zeros(num_rows, num_cols) + for row in 1:num_rows + for col in 1:num_cols + K[row, col] = v[row][1][col] + end + end + return K +end + +n = 1000 +d = 8 +lb = [0.05,100,63070,990,63.1,700,1120,9855] +ub = [0.15,50000,115600,1110,116,820,1680,12045] +x = sample(n,lb,ub,SobolSample()) +X = vector_of_tuples_to_matrix(x) +grads = vector_of_tuples_to_matrix2(gradient.(water_flow, x)) +y = reshape(water_flow.(x),(size(x,1),1)) +xlimits = hcat(lb, ub) +n_test = 100 +x_test = sample(n_test,lb,ub,GoldenSample()) +X_test = vector_of_tuples_to_matrix(x_test) +y_true = water_flow.(x_test) + +@testset "Water Flow Function Test (dimensions = 8; n_comp = 2; extra_points = 2; initial_theta=.01)" begin + n_comp = 2 + delta_x = 0.0001 + extra_points = 2 + initial_theta = 0.01 + g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) #change hard-coded 2 param to variable + y_pred = g(X_test) + rmse = sqrt(sum(((y_pred - y_true).^2)/n_test)) + println("rmse: ", rmse) #rmse: 0.028999713540341848 + @test isapprox(rmse, 0.02, atol=0.01) +end + +@testset "Water Flow Function Test (dimensions = 8; n_comp = 3; extra_points = 2; initial_theta=.01)" begin + n_comp = 3 + delta_x = 0.0001 + extra_points = 2 + initial_theta = 0.01 + g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) #change hard-coded 2 param to variable + y_pred = g(X_test) + rmse = sqrt(sum(((y_pred - y_true).^2)/n_test)) + println("rmse: ", rmse) #rmse: 0.024098165334772537 + @test isapprox(rmse, 0.02, atol=0.01) +end + +@testset "Water Flow Function Test (dimensions = 8; n_comp = 3; extra_points = 3; initial_theta=.01)" begin + n_comp = 3 + delta_x = 0.0001 + extra_points = 3 + initial_theta = 0.01 + g = GEKPLS(X, y, grads, n_comp, delta_x, xlimits, extra_points, initial_theta) #change hard-coded 2 param to variable + y_pred = g(X_test) + rmse = sqrt(sum(((y_pred - y_true).^2)/n_test)) + println("rmse: ", rmse) #0.023712136706924902 + @test isapprox(rmse, 0.02, atol=0.01) +end \ No newline at end of file