Skip to content

Commit

Permalink
Merge pull request #65 from JuliaNLSolvers/pkm/nonallogdogleg
Browse files Browse the repository at this point in the history
Allow non-allocating dogleg
  • Loading branch information
pkofod authored Jan 21, 2024
2 parents 2b12412 + 5c5f598 commit 00f3b00
Show file tree
Hide file tree
Showing 14 changed files with 136 additions and 73 deletions.
16 changes: 12 additions & 4 deletions src/NLSolvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -185,17 +185,25 @@ include("lsqsolve/problem_types.jl")
include("lsqsolve/optimization.jl")
export LeastSquaresProblem, LeastSquaresOptions

function negate(problem::AbstractProblem, A)

end
function negate(::InPlace, A::AbstractArray)
A .= .-A
return A
end
function negate(::OutOfPlace, A::AbstractArray)
-A
end

function _scale(::InPlace, B, A, m)
B .= A .* m
end
function _scale(::OutOfPlace, B, A, m)
A .* m
end
function _copyto(::InPlace, z, x)
copyto!(z, x)
end
function _copyto(::OutOfPlace, z, x)
copy(x)
end
mstyle(problem::AbstractProblem) = problem.mstyle

"""
Expand Down
2 changes: 1 addition & 1 deletion src/globalization/linesearches/backtracking.jl
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ function find_steplength(mstyle, ls::Backtracking, φ::T, λ) where {T}
Tf(ls.ratio), Tf(ls.decrease), ls.maxiter, ls.verbose

#== factor in Armijo condition ==#
t = -decrease * dφ0
t = decrease * dφ0

iter, α, β = 0, λ, λ # iteration variables
f_α = φ(α) # initial function value
Expand Down
9 changes: 5 additions & 4 deletions src/globalization/trs_solvers/solvers/Dogleg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ struct Dogleg{T} <: TRSPSolver
end
Dogleg() = Dogleg(nothing)

function (dogleg::Dogleg)(∇f, H, Δ, p, scheme; abstol = 1e-10, maxiter = 50)
function (dogleg::Dogleg)(∇f, H, Δ, p, scheme, mstyle; abstol = 1e-10, maxiter = 50)
T = eltype(p)
n = length(∇f)

Expand All @@ -32,7 +32,8 @@ function (dogleg::Dogleg)(∇f, H, Δ, p, scheme; abstol = 1e-10, maxiter = 50)
norm_d_cauchy = norm(d_cauchy)
if norm_d_cauchy Δ
shrink = Δ / norm_d_cauchy # inv(Δ/norm_d_cauchy) puts it on the border
p .= d_cauchy .* shrink

p = _scale(mstyle, p, d_cauchy, shrink)
interior = false
else
# Else, calculate (Quasi-)Newton step. If this is interior, then take the
Expand All @@ -42,7 +43,6 @@ function (dogleg::Dogleg)(∇f, H, Δ, p, scheme; abstol = 1e-10, maxiter = 50)
p = find_direction!(p, H, nothing, ∇f, scheme)
norm_p = norm(p)
if norm_p Δ # fixme really need to add the 20% slack here (see TR book and NTR)
p .= p
if norm_p < Δ
interior = true
else
Expand Down Expand Up @@ -75,7 +75,8 @@ function (dogleg::Dogleg)(∇f, H, Δ, p, scheme; abstol = 1e-10, maxiter = 50)
else
t = T(0)
end
p .= d_cauchy .+ t .* p

p, _ = move(mstyle, p, d_cauchy, p, p, t)
interior = false
end
end
Expand Down
1 change: 1 addition & 0 deletions src/globalization/trs_solvers/solvers/NTR.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ function (ms::NTR)(
Δ::T,
s,
scheme,
mstyle,
λ0 = 0;
abstol = 1e-10,
maxiter = 50,
Expand Down
2 changes: 1 addition & 1 deletion src/globalization/trs_solvers/solvers/NWI.jl
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ Returns:
solved - Whether or not a solution was reached (as opposed to
terminating early due to maxiter)
"""
function (ms::NWI)(∇f, H, Δ, p, scheme; abstol = 1e-10, maxiter = 50)
function (ms::NWI)(∇f, H, Δ, p, scheme, mstyle; abstol = 1e-10, maxiter = 50)
T = eltype(p)
n = length(∇f)
H = H isa UniformScaling ? Diagonal(copy(∇f) .* 0 .+ 1) : H
Expand Down
1 change: 1 addition & 0 deletions src/globalization/trs_solvers/solvers/TCG.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ function (ms::TCG)(
Δ::T,
s,
scheme,
mstyle,
λ0 = 0;
abstol = 1e-10,
maxiter = min(5, length(s)),
Expand Down
31 changes: 10 additions & 21 deletions src/nlsolve/trustregions/newton.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@ has_param(::NormedResiduals) = false
function value(nr::NormedResiduals, x)
Fx = nr.F.F(nr.Fx, x)

# this is just to grab them outside, but this hsould come from the convergence info perhaps?
nr.x .= x
nr.Fx .= Fx

f = (norm(Fx)^2) / 2
return f
end
Expand All @@ -24,32 +20,26 @@ function upto_gradient(nr::NormedResiduals, Fx, x)
Fx, Jx = nr.F.FJ(nr.Fx, nr.Fx * x', x)

# this is just to grab them outside, but this hsould come from the convergence info perhaps?
nr.x .= x
nr.Fx .= Fx

f = (norm(Fx)^2) / 2
Fx .= Jx' * Fx
return f, Fx
JxtFx = Jx' * Fx
return f, JxtFx
end
function upto_hessian(nr::NormedResiduals, Fx, Jx, x) #Fx is the gradient and Jx is the Hessian
Fx, Jx = nr.F.FJ(nr.Fx, Jx, x)

# this is just to grab them outside, but this hsould come from the convergence info perhaps?
nr.x .= x
nr.Fx .= Fx

f = (norm(Fx)^2) / 2
# this is the gradient
Fx .= Jx' * Fx
JxtFx = Jx' * Fx
# As you may notice, this can be expensive... Because the gradient
# is going to be very simple. May want to create a
# special type or way to hook into trust regions here. We can exploit
# that we only need the cauchy and the newton steps, not any shifted
# systems. There is no need to get the full hessian. because these two
# steps are don't need these multiplies
# This is the Hessian
Jx .= Jx' * Jx
return f, Fx, Jx
Jx2 = Jx' * Jx
return f, JxtFx, Jx2
end

function solve(
Expand All @@ -58,7 +48,7 @@ function solve(
approach::TrustRegion{<:Union{SR1,DBFGS,BFGS,Newton},<:Any,<:Any},
options::NEqOptions,
)
if !(mstyle(prob) === InPlace())
if !(mstyle(prob) === InPlace()) && !(approach.spsolve isa Dogleg)
throw(
ErrorException(
"solve() not defined for OutOfPlace() with Trustregion for NEqProblem",
Expand All @@ -75,17 +65,16 @@ function solve(
normed_residual = NormedResiduals(x_outer, Fx_outer, F)
ρ2F0 = sqrt(value(normed_residual, x_outer) * 2)
ρF0 = norm(normed_residual.Fx, Inf)
td = OptimizationProblem(normed_residual)
td = OptimizationProblem(normed_residual, inplace = mstyle(prob) == InPlace())
options.maxiter
res = solve(td, x, approach, OptimizationOptions(maxiter = options.maxiter))
# normed_residual to get Fx
# f0*2 is wrong because it's 2norm
newinfo = (
solution = solution(res),
best_residual = Fx_outer,
best_residual = value(F, Fx_outer, solution(res)),
ρF0 = ρF0,
ρ2F0 = ρ2F0,
time = res.info.time,
iter = res.info.iter,
)
return ConvergenceInfo(approach, newinfo, options)
return ConvergenceInfo(approach, newinfo, options)
end
27 changes: 10 additions & 17 deletions src/objectives.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,16 @@ struct ScalarObjective{Tf,Tg,Tfg,Tfgh,Th,Thv,Tbf,P}
batched_f::Tbf
param::P
end
ScalarObjective(; f = nothing, g = nothing, fg = nothing, fgh = nothing, h = nothing) =
ScalarObjective(f, g, fg, fgh, h, nothing, nothing, nothing)
ScalarObjective(;
f = nothing,
g = nothing,
fg = nothing,
fgh = nothing,
h = nothing,
hv = nothing,
batched_f = nothing,
param = nothing,
) = ScalarObjective(f, g, fg, fgh, h, hv, batched_f, param)
has_param(so::ScalarObjective) = so.param === nothing ? false : true
function value(so::ScalarObjective, x)
if has_param(so)
Expand Down Expand Up @@ -177,21 +185,6 @@ function _value(mo, R::VectorObjective, Fx, x)
= (norm(Fx)^2) / 2, Fx = Fx)
end

struct LsqWrapper{Tobj,TF,TJ} <: ObjWrapper
R::Tobj
F::TF
J::TJ
end
function (lw::LsqWrapper)(x)
F = lw.R(lw.F, x)
sum(abs2, F) / 2
end
function (lw::LsqWrapper)(∇f, x)
_F, _J = lw.R(lw.F, lw.J, x)
copyto!(∇f, sum(_J; dims = 1))
sum(abs2, _F), ∇f
end

struct LeastSquaresObjective{TFx,TJx,Tf,Tfj,Td}
Fx::TFx
Jx::TJx
Expand Down
34 changes: 23 additions & 11 deletions src/optimize/trustregions/optimize/inplace_loop.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function solve(
objvars;
initial_Δ = 20.0,
)
if !(mstyle(problem) === InPlace())
if !(mstyle(problem) === InPlace()) && !(approach.spsolve == Dogleg())
throw(
ErrorException("solve() not defined for OutOfPlace() with TrustRegion solvers"),
)
Expand Down Expand Up @@ -49,15 +49,15 @@ function solve(
qnvars = QNVars(objvars.z, objvars.z)
p = copy(objvars.x)

objvars, Δkp1, reject =
objvars, Δkp1, reject, qnvars =
iterate!(p, objvars, Δk, approach, problem, options, qnvars, false)

iter = 1
# Check for convergence
is_converged = converged(approach, objvars, ∇f0, options, reject, Δkp1)
while iter <= options.maxiter && !any(is_converged)
iter += 1
objvars, Δkp1, reject =
objvars, Δkp1, reject, qnvars =
iterate!(p, objvars, Δkp1, approach, problem, options, qnvars, false)

# Check for convergence
Expand Down Expand Up @@ -113,9 +113,11 @@ function iterate!(
scheme, subproblemsolver = modelscheme(approach), algorithm(approach)
y, d, s = qnvars.y, qnvars.d, qnvars.s
fx = fz
copyto!(x, z)
copyto!(∇fx, ∇fz)
spr = subproblemsolver(∇fx, B, Δk, p, scheme; abstol = 1e-10)

x = _copyto(mstyle(problem), x, z)
∇fx = _copyto(mstyle(problem), ∇fx, ∇fz)

spr = subproblemsolver(∇fx, B, Δk, p, scheme, problem.mstyle; abstol = 1e-10)
Δm = -spr.mz

# Grab the model value, m. If m is zero, the solution, z, does not improve
Expand All @@ -124,14 +126,18 @@ function iterate!(
# tive value, we may conclude that "something" is wrong. We might be at a
# ridge (positive-indefinite case) for example, or the scaling of the model
# is such that we cannot satisfy ||∇f|| < tol.
if abs(spr.mz) < eps(T)
if abs(spr.mz) < eps(spr.mz)
# set flag to check for problems
end

z = retract(problem, z, x, p)
z = retract(problem, z, x, spr.p)
# Update before acceptance, to keep adding information about the hessian
# even when the step is not "good" enough.

y, d, s = qnvars.y, qnvars.d, qnvars.s
fx = fz
# Should build a good code for picking update model.

fz, ∇fz, B, s, y = update_obj!(problem, spr.p, y, ∇fx, z, ∇fz, B, scheme, scale)

# Δf is often called ared or Ared for actual reduction. I prefer "change in"
Expand All @@ -144,13 +150,19 @@ function iterate!(
Δkp1, reject_step = update_trust_region(spr, R, p)

if reject_step
z .= x
z = _copyto(mstyle(problem), z, x)
∇fz = _copyto(mstyle(problem), ∇fz, ∇fx)
fz = fx
∇fz .= ∇fx
# This is correct because
s = _scale(mstyle(problem), s, s, 0) # z - x
y = _scale(mstyle(problem), y, y, 0) # ∇fz - ∇fx
# and will cause quasinewton updates to not update
# this seems incorrect as it's already updated, should hold off here
end
return (x = x, fx = fx, ∇fx = ∇fx, z = z, fz = fz, ∇fz = ∇fz, B = B, Pg = nothing),
Δkp1,
reject_step
reject_step,
QNVars(d, s, y)
end

function update_trust_region(spr, R, p)
Expand Down
2 changes: 1 addition & 1 deletion src/quasinewton/approximations/newton.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ function default_newton_linsolve(B, g)
B \ g
end
function default_newton_linsolve(d, B, g)
d .= (B \ g)
d = (B \ g)
end

function init_f∇fB(prob, scheme::Newton, ∇fz, B, x)
Expand Down
10 changes: 5 additions & 5 deletions test/globalization/truncatedconjugategradient.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,29 @@ using Test, NLSolvers, LinearAlgebra
H = [0.3 0.0; 0.0 0.9]
g = [0.2, 0.4]

m(g, H, 0.7, rand(2), 1)
m(g, H, 0.7, rand(2), 1, NLSolvers.InPlace())

# Since -H is negative definite the solution is guaranteed
# to be at the boundary unless g = 0
for Δ in range(0, 100; step = 0.1)
@test norm(m(g, -H, Δ, rand(2), 1).p, 2) Δ
@test norm(m(g, -H, Δ, rand(2), 1, NLSolvers.InPlace()).p, 2) Δ
end

# Small gradient entries
g .= [1e-12, 1e-9]
for Δ in range(0, 100; step = 0.1)
@test norm(m(g, -H, Δ, rand(2), 1).p, 2) Δ
@test norm(m(g, -H, Δ, rand(2), 1, NLSolvers.InPlace()).p, 2) Δ
end

# Mixed gradient entries
g .= [1e12, 1e-9]
for Δ in range(0, 100; step = 0.1)
@test norm(m(g, -H, Δ, rand(2), 1).p, 2) Δ
@test norm(m(g, -H, Δ, rand(2), 1, NLSolvers.InPlace()).p, 2) Δ
end

# Zero case
g = [0.0, 0.0]
for Δ in range(0, 100; step = 0.1)
@test norm(m(g, -H, Δ, rand(2), 1).p, 2) == 0
@test norm(m(g, -H, Δ, rand(2), 1, NLSolvers.InPlace()).p, 2) == 0
end
end
Loading

2 comments on commit 00f3b00

@pkofod
Copy link
Member Author

@pkofod pkofod commented on 00f3b00 Jan 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error while trying to register: Version 0.4.0 already exists

Please sign in to comment.