diff --git a/docs/pages.jl b/docs/pages.jl index e870f3b6..ce76c3fd 100644 --- a/docs/pages.jl +++ b/docs/pages.jl @@ -16,6 +16,7 @@ pages = ["index.md" "Gradient Enhanced Kriging" => "gek.md", "GEKPLS" => "gekpls.md", "MOE" => "moe.md", + "Parallel Optimization" => "parallel.md" ] "User guide" => [ "Samples" => "samples.md", diff --git a/docs/src/parallel.md b/docs/src/parallel.md new file mode 100755 index 00000000..2388e1ee --- /dev/null +++ b/docs/src/parallel.md @@ -0,0 +1,56 @@ +# Parallel Optimization + +There are some situations where it can be beneficial to run multiple optimizations in parallel. For example, if your objective function is very expensive to evaluate, you may want to run multiple evaluations in parallel. + +## Ask-Tell Interface + +To enable parallel optimization, we make use of an Ask-Tell interface. The user will construct the initial surrogate model the same way as for non-parallel surrogate models, but instead of using `surrogate_optimize`, the user will use `potential_optimal_points`. This will return the coordinates of points that the optimizer has determined are most useful to evaluate next. How the user evaluates these points is up to them. The Ask-Tell interface requires more manual control than `surrogate_optimize`, but it allows for more flexibility. After the point has been evaluated, the user will *tell* the surrogate model the new points with the `add_point!` function. + +## Virtual Points + +To ensure that points of interest returned by `potential_optimal_points` are sufficiently far from each other, the function makes use of *virtual points*. They are used as follows: +1. `potential_optimal_points` is told to return `n` points. +2. The point with the highest merit function value is selected. +3. This point is now treated as a virtual point and is assigned a temporary value that changes the landscape of the merit function. How the the temporary value is chosen depends on the strategy used. (see below) +4. The point with the new highest merit is selected. +5. The process is repeated until `n` points have been selected. + +The following strategies are available for virtual point selection for all optimization algorithms: + +- "Minimum Constant Liar (CLmin)": + - The virtual point is assigned using the lowest known value of the merit function across all evaluated points. +- "Mean Constant Liar (CLmean)": + - The virtual point is assigned using the mean of the merit function across all evaluated points. +- "Maximum Constant Liar (CLmax)": + - The virtual point is assigned using the great known value of the merit function across all evaluated points. + +For Kriging surrogates, specifically, the above and follow strategies are available: + +- "Kriging Believer (KB)": + - The virtual point is assigned using the mean of the Kriging surrogate at the virtual point. +- "Kriging Believer Upper Bound (KBUB)": + - The virtual point is assigned using 3$\sigma$ above the mean of the Kriging surrogate at the virtual point. +- "Kriging Believer Lower Bound (KBLB)": + - The virtual point is assigned using 3$\sigma$ below the mean of the Kriging surrogate at the virtual point. + + +In general, CLmin and KBLB tend to favor exploitation while CLmax and KBUB tend to favor exploration. CLmean and KB tend to be a compromise between the two. + +## Examples + +```@example +using Surrogates + +lb = 0.0 +ub = 10.0 +f = x -> log(x) * exp(x) +x = sample(5, lb, ub, SobolSample()) +y = f.(x) + +my_k = Kriging(x, y, lb, ub) + +for _ in 1:10 + new_x, eis = potential_optimal_points(EI(), lb, ub, my_k, SobolSample(), 3, CLmean!) + add_point!(my_k, new_x, f.(new_x)) +end +``` diff --git a/src/Optimization.jl b/src/Optimization.jl old mode 100644 new mode 100755 index a2fb005d..81e8e352 --- a/src/Optimization.jl +++ b/src/Optimization.jl @@ -3,6 +3,14 @@ using Distributions using Zygote abstract type SurrogateOptimizationAlgorithm end +abstract type ParallelStrategy end + +struct KrigingBeliever <: ParallelStrategy end +struct KrigingBelieverUpperBound <: ParallelStrategy end +struct KrigingBelieverLowerBound <: ParallelStrategy end +struct MinimumConstantLiar <: ParallelStrategy end +struct MaximumConstantLiar <: ParallelStrategy end +struct MeanConstantLiar <: ParallelStrategy end #single objective optimization struct SRBF <: SurrogateOptimizationAlgorithm end @@ -375,6 +383,214 @@ function surrogate_optimize(obj::Function, ::SRBF, lb::Number, ub::Number, end end +# Ask SRBF ND +function potential_optimal_points(::SRBF, strategy, lb, ub, surr::AbstractSurrogate, sample_type::SamplingAlgorithm, n_parallel; + num_new_samples = 500) + + scale = 0.2 + w_range = [0.3, 0.5, 0.7, 0.95] + w_cycle = Iterators.cycle(w_range) + + w, state = iterate(w_cycle) + + #Vector containing size in each direction + box_size = lb - ub + dtol = 1e-3 * norm(ub - lb) + d = length(surr.x) + incumbent_x = surr.x[argmin(surr.y)] + + new_lb = incumbent_x .- 3 * scale * norm(incumbent_x .- lb) + new_ub = incumbent_x .+ 3 * scale * norm(incumbent_x .- ub) + + @inbounds for i in 1:length(new_lb) + if new_lb[i] < lb[i] + new_lb = collect(new_lb) + new_lb[i] = lb[i] + end + if new_ub[i] > ub[i] + new_ub = collect(new_ub) + new_ub[i] = ub[i] + end + end + + new_sample = sample(num_new_samples, new_lb, new_ub, sample_type) + s = zeros(eltype(surr.x[1]), num_new_samples) + for j in 1:num_new_samples + s[j] = surr(new_sample[j]) + end + s_max = maximum(s) + s_min = minimum(s) + + d_min = norm(box_size .+ 1) + d_max = 0.0 + for r in 1:length(surr.x) + for c in 1:num_new_samples + distance_rc = norm(surr.x[r] .- new_sample[c]) + if distance_rc > d_max + d_max = distance_rc + end + if distance_rc < d_min + d_min = distance_rc + end + end + end + + tmp_surr = deepcopy(surr) + + + new_addition = 0 + diff_x = zeros(eltype(surr.x[1]), d) + + evaluation_of_merit_function = zeros(float(eltype(surr.x[1])), num_new_samples) + proposed_points_x = Vector{typeof(surr.x[1])}(undef, n_parallel) + merit_of_proposed_points = zeros(Float64, n_parallel) + + while new_addition < n_parallel + #find minimum + + @inbounds for r in eachindex(evaluation_of_merit_function) + evaluation_of_merit_function[r] = merit_function(new_sample[r], w, tmp_surr, + s_max, s_min, d_max, d_min, + box_size) + end + + min_index = argmin(evaluation_of_merit_function) + new_min_x = new_sample[min_index] + min_x_merit = evaluation_of_merit_function[min_index] + + for l in 1:d + diff_x[l] = norm(surr.x[l] .- new_min_x) + end + bit_x = diff_x .> dtol + #new_min_x has to have some distance from krig.x + if false in bit_x + #The new_point is not actually that new, discard it! + + deleteat!(evaluation_of_merit_function, min_index) + deleteat!(new_sample, min_index) + + if length(new_sample) == 0 + println("Out of sampling points") + index = argmin(surr.y) + return (surr.x[index], surr.y[index]) + end + else + new_addition += 1 + proposed_points_x[new_addition] = new_min_x + merit_of_proposed_points[new_addition] = min_x_merit + + # Update temporary surrogate using provided strategy + calculate_liars(strategy, tmp_surr, surr, new_min_x) + end + + #4) Update w + w, state = iterate(w_cycle, state) + end + + return (proposed_points_x, merit_of_proposed_points) +end + +# Ask SRBF 1D +function potential_optimal_points(::SRBF, strategy, lb::Number, ub::Number, surr::AbstractSurrogate, + sample_type::SamplingAlgorithm, n_parallel; + num_new_samples = 500) + scale = 0.2 + success = 0 + w_range = [0.3, 0.5, 0.7, 0.95] + w_cycle = Iterators.cycle(w_range) + + w, state = iterate(w_cycle) + + box_size = lb - ub + success = 0 + failures = 0 + dtol = 1e-3 * norm(ub - lb) + num_of_iterations = 0 + + #1) Sample near incumbent (the 2 fraction is arbitrary here) + incumbent_x = surr.x[argmin(surr.y)] + + new_lb = incumbent_x - scale * norm(incumbent_x - lb) + new_ub = incumbent_x + scale * norm(incumbent_x - ub) + if new_lb < lb + new_lb = lb + end + if new_ub > ub + new_ub = ub + end + + new_sample = sample(num_new_samples, new_lb, new_ub, sample_type) + + #2) Create merit function + s = zeros(eltype(surr.x[1]), num_new_samples) + for j in 1:num_new_samples + s[j] = surr(new_sample[j]) + end + s_max = maximum(s) + s_min = minimum(s) + + d_min = box_size + 1 + d_max = 0.0 + for r in 1:length(surr.x) + for c in 1:num_new_samples + distance_rc = norm(surr.x[r] - new_sample[c]) + if distance_rc > d_max + d_max = distance_rc + end + if distance_rc < d_min + d_min = distance_rc + end + end + end + + new_addition = 0 + proposed_points_x = zeros(eltype(new_sample[1]), n_parallel) + merit_of_proposed_points = zeros(eltype(new_sample[1]), n_parallel) + + # Temporary surrogate for virtual points + tmp_surr = deepcopy(surr) + + # Loop until we have n_parallel new points + while new_addition < n_parallel + + #3) Evaluate merit function at the sampled points in parallel + evaluation_of_merit_function = merit_function.(new_sample, w, tmp_surr, s_max, + s_min, d_max, d_min, box_size) + + #find minimum + min_index = argmin(evaluation_of_merit_function) + new_min_x = new_sample[min_index] + min_x_merit = evaluation_of_merit_function[min_index] + + diff_x = abs.(tmp_surr.x .- new_min_x) + bit_x = diff_x .> dtol + #new_min_x has to have some distance from krig.x + if false in bit_x + #The new_point is not actually that new, discard it! + deleteat!(evaluation_of_merit_function, min_index) + deleteat!(new_sample, min_index) + if length(new_sample) == 0 + println("Out of sampling points") + return (proposed_points_x[1:new_addition], + merit_of_proposed_points[1:new_addition]) + end + else + new_addition += 1 + proposed_points_x[new_addition] = new_min_x + merit_of_proposed_points[new_addition] = min_x_merit + + # Update temporary surrogate using provided strategy + calculate_liars(strategy, tmp_surr, surr, new_min_x) + end + + #4) Update w + w, state = iterate(w_cycle, state) + end + + return (proposed_points_x, merit_of_proposed_points) +end + + """ This is an implementation of Lower Confidence Bound (LCB), a popular acquisition function in Bayesian optimization. @@ -569,6 +785,73 @@ function surrogate_optimize(obj::Function, ::EI, lb::Number, ub::Number, krig, println("Completed maximum number of iterations") end +# Ask EI 1D & ND +function potential_optimal_points(::EI, strategy, lb, ub, krig, sample_type::SamplingAlgorithm, n_parallel::Number; + num_new_samples = 100) + + lb = krig.lb + ub = krig.ub + + dtol = 1e-3 * norm(ub - lb) + eps = 0.01 + + tmp_krig = deepcopy(krig) # Temporary copy of the kriging model to store virtual points + + new_x_max = Vector{typeof(tmp_krig.x[1])}(undef, n_parallel) # New x point + new_EI_max = zeros(eltype(tmp_krig.x[1]), n_parallel) # EI at new x point + + for i in 1:n_parallel + # Sample lots of points from the design space -- we will evaluate the EI function at these points + new_sample = sample(num_new_samples, lb, ub, sample_type) + + # Find the best point so far + f_min = minimum(tmp_krig.y) + + # Allocate some arrays + evaluations = zeros(eltype(tmp_krig.x[1]), num_new_samples) # Holds EI function evaluations + point_found = false # Whether we have found a new point to test + while point_found == false + # For each point in the sample set, evaluate the Expected Improvement function + for j in eachindex(new_sample) + std = std_error_at_point(tmp_krig, new_sample[j]) + u = tmp_krig(new_sample[j]) + if abs(std) > 1e-6 + z = (f_min - u - eps) / std + else + z = 0 + end + # Evaluate EI at point new_sample[j] + evaluations[j] = (f_min - u - eps) * cdf(Normal(), z) + + std * pdf(Normal(), z) + end + # find the sample which maximizes the EI function + index_max = argmax(evaluations) + x_new = new_sample[index_max] # x point which maximized EI + y_new = maximum(evaluations) # EI at the new point + diff_x = [norm(prev_point .- x_new) for prev_point in tmp_krig.x] + bit_x = [diff_x_point .> dtol for diff_x_point in diff_x] + #new_min_x has to have some distance from tmp_krig.x + if false in bit_x + #The new_point is not actually that new, discard it! + deleteat!(evaluations, index_max) + deleteat!(new_sample, index_max) + if length(new_sample) == 0 + println("Out of sampling points") + index = argmin(tmp_krig.y) + return (tmp_krig.x[index], tmp_krig.y[index]) + end + else + point_found = true + new_x_max[i] = x_new + new_EI_max[i] = y_new + calculate_liars(strategy, tmp_krig, krig, x_new) + end + end + end + + return (new_x_max, new_EI_max) +end + """ This is an implementation of Expected Improvement (EI), arguably the most popular acquisition function in Bayesian optimization. diff --git a/src/Surrogates.jl b/src/Surrogates.jl old mode 100644 new mode 100755 index eaf50ed4..45df925b --- a/src/Surrogates.jl +++ b/src/Surrogates.jl @@ -18,6 +18,7 @@ include("VariableFidelity.jl") include("Earth.jl") include("GEK.jl") include("GEKPLS.jl") +include("VirtualStrategy.jl") current_surrogates = ["Kriging", "LinearSurrogate", "LobachevskySurrogate", "NeuralSurrogate", @@ -88,6 +89,11 @@ export LobachevskyStructure, NeuralStructure, RandomForestStructure, export WendlandStructure export AbstractSurrogate, SamplingAlgorithm export Kriging, RadialBasis, add_point!, current_estimate, std_error_at_point +# Parallelization Strategies +export potential_optimal_points +export MinimumConstantLiar, MaximumConstantLiar, MeanConstantLiar, KrigingBeliever, + KrigingBelieverUpperBound, KrigingBelieverLowerBound + # radial basis functions export linearRadial, cubicRadial, multiquadricRadial, thinplateRadial diff --git a/src/VirtualStrategy.jl b/src/VirtualStrategy.jl new file mode 100644 index 00000000..cc63a0c1 --- /dev/null +++ b/src/VirtualStrategy.jl @@ -0,0 +1,35 @@ +# Minimum Constant Liar +function calculate_liars(::MinimumConstantLiar, tmp_surr::AbstractSurrogate, surr::AbstractSurrogate, new_x) + new_y = minimum(surr.y) + add_point!(tmp_surr, new_x, new_y) +end + +# Maximum Constant Liar +function calculate_liars(::MaximumConstantLiar, tmp_surr::AbstractSurrogate, surr::AbstractSurrogate, new_x) + new_y = maximum(surr.y) + add_point!(tmp_surr, new_x, new_y) +end + +# Mean Constant Liar +function calculate_liars(::MeanConstantLiar, tmp_surr::AbstractSurrogate, surr::AbstractSurrogate, new_x) + new_y = mean(surr.y) + add_point!(tmp_surr, new_x, new_y) +end + +# Kriging Believer +function calculate_liars(::KrigingBeliever, tmp_k::Kriging, k::Kriging, new_x) + new_y = k(new_x) + add_point!(tmp_k, new_x, new_y) +end + +# Kriging Believer Upper Bound +function calculate_liars(::KrigingBelieverUpperBound, tmp_k::Kriging, k::Kriging, new_x) + new_y = k(new_x) + 3 * std_error_at_point(k, new_x) + add_point!(tmp_k, new_x, new_y) +end + +# Kriging Believer Lower Bound +function calculate_liars(::KrigingBelieverLowerBound, tmp_k::Kriging, k::Kriging, new_x) + new_y = k(new_x) - 3 * std_error_at_point(k, new_x) + add_point!(tmp_k, new_x, new_y) +end \ No newline at end of file diff --git a/test/parallel.jl b/test/parallel.jl new file mode 100755 index 00000000..7cc133e5 --- /dev/null +++ b/test/parallel.jl @@ -0,0 +1,61 @@ +using Surrogates +using Test +using Revise + + +#1D +lb = 0.0 +ub = 10.0 +f = x -> log(x) * exp(x) +x = sample(5, lb, ub, SobolSample()) +y = f.(x) + + +# Test lengths of new_x and EI (1D) + +# TODO + +my_k = Kriging(x, y, lb, ub) +new_x, eis = potential_optimal_points(EI(), MeanConstantLiar(), lb, ub, my_k, SobolSample(), 3) + +@test length(new_x) == 3 +@test length(eis) == 3 + +# Test lengths of new_x and SRBF (1D) + +my_surr = RadialBasis(x, y, lb, ub) +new_x, eis = potential_optimal_points(SRBF(), MeanConstantLiar(), lb, ub, my_surr, SobolSample(), 3) +@test length(new_x) == 3 +@test length(eis) == 3 + + +# Test lengths of new_x and EI (ND) + +lb = [0.0, 0.0, 1.0] +ub = [5.0, 7.5, 10.0] +x = sample(5, lb, ub, SobolSample()) +f = x -> x[1] + x[2] * x[3] +y = f.(x) + +my_k = Kriging(x, y, lb, ub) + +new_x, eis = potential_optimal_points(EI(), MeanConstantLiar(), lb, ub, my_k, SobolSample(), 5) + +@test length(new_x) == 5 +@test length(eis) == 5 + +@test length(new_x[1]) == 3 + +# Test lengths of new_x and SRBF (ND) + +my_surr = RadialBasis(x, y, lb, ub) +new_x, eis = potential_optimal_points(SRBF(), MeanConstantLiar(), lb, ub, my_surr, SobolSample(), 5) + +@test length(new_x) == 5 +@test length(eis) == 5 + +@test length(new_x[1]) == 3 + +# # Check hyperparameter validation for potential_optimal_points +@test_throws ArgumentError new_x, eis = potential_optimal_points(EI(), MeanConstantLiar(), lb, ub, my_k, SobolSample(), -1) +