Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use MeritObjective for LineSearch in NEqProblem #52

Draft
wants to merge 11 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion src/fixedpoints/anderson.jl
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ options = NEqOptions(maxiter=maxiter)

Gold = copy(Gx)
Fold = copy(Fx)
γv0 = zeros(effective_memory)
γv0 = zeros(eltype(Gx),memory)
iter = 0
while iter < options.maxiter
iter += 1
Expand Down
26 changes: 19 additions & 7 deletions src/nlsolve/linesearch/newton.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,36 @@ function solve(
options = NEqOptions(),
state = init(problem, method; x),
)
t0 = time()

# Unpack
scheme, linesearch = modelscheme(method), algorithm(method)
# Unpack important objectives
F = problem.R.F
FJ = problem.R.FJ
# Unpack state
z, d, Fx, Jx = state
z .= x
T = eltype(Fx)


T = eltype(x)
# Set up MeritObjective. This defines the least squares
# objective for the line search.
merit = MeritObjective(problem, F, FJ, Fx, Jx, d)
meritproblem =
OptimizationProblem(merit, nothing, Euclidean(0), nothing, mstyle(problem), nothing)
OptimizationProblem(merit, bounds(problem), _manifold(problem), nothing, mstyle(problem), nothing)

opt_options = OptimizationOptions(options)
res = solve(meritproblem, x, method, opt_options)

F0 = res.info.∇f0
ρF0, ρ2F0 = norm(F0, Inf), norm(F0, 2)
newinfo = (
solution = solution(res),
best_residual = res.info.minimum,
ρs = T(NaN),
ρF0 = ρF0,
ρ2F0 = ρ2F0,
time = res.info.time,
iter = res.info.iter,
)

return ConvergenceInfo(method, newinfo, options)

# Evaluate the residual and Jacobian
Fx, Jx = FJ(Fx, Jx, x)
Expand Down
85 changes: 84 additions & 1 deletion src/nlsolve/problem_types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,34 @@ struct NEqOptions{T,Tmi,Call}
f_reltol::T
maxiter::Tmi
callback::Call
show_trace::Bool
end
NEqOptions(;
f_limit = 0.0,
f_abstol = 1e-8,
f_reltol = 1e-12,
maxiter = 10^4,
callback = nothing,
) = NEqOptions(f_limit, f_abstol, f_reltol, maxiter, callback)
show_trace = false,
) = NEqOptions(f_limit, f_abstol, f_reltol, maxiter, callback, show_trace)

function OptimizationOptions(neq::NEqOptions)
return OptimizationOptions(;
x_abstol = 0.0,
x_reltol = 0.0,
x_norm = x -> norm(x, Inf),
g_abstol = 2*neq.f_abstol,
g_reltol = 2*neq.f_reltol,
g_norm = x -> norm(x, Inf),
f_limit = neq.f_limit,
f_abstol = 0.0,
f_reltol = 0.0,
nm_tol = 1e-8,
maxiter = neq.maxiter,
show_trace = neq.show_trace,
)
end

function Base.show(io::IO, ci::ConvergenceInfo{<:Any,<:Any,<:NEqOptions})
opt = ci.options
info = ci.info
Expand Down Expand Up @@ -131,3 +151,66 @@ function Base.show(io::IO, ci::ConvergenceInfo{<:Any,<:Any,<:NEqOptions})
println(io, " Seconds run: $(@sprintf("%.2e", info.time))")
println(io, " Iterations: $(info.iter)")
end

function upto_gradient(merit::MeritObjective, Fx, x)
upto_gradient(merit.prob, Fx, x)
#merit.F .= Fx
#return res
end
upto_gradient(neq::NEqProblem, Fx, x) =
upto_gradient(neq.R, Fx, x)

function upto_gradient(vo::VectorObjective, Fx, x)
value(vo,Fx,x)
obj = norm(Fx)^2/2
return obj, Fx
end

function upto_hessian(merit::MeritObjective, ∇f, ∇²f, x)
upto_hessian(merit.prob, ∇f, ∇²f, x)
end

upto_hessian(neq::NEqProblem, ∇f, ∇²f, x) =
upto_hessian(neq.R, ∇f, ∇²f, x)

function upto_hessian(vo::VectorObjective, ∇f, ∇²f, x)
Fx, Jx = vo.FJ(∇f, ∇²f, x)
obj = norm(Fx)^2/2
return obj, Fx, Jx
end

function LineObjective(obj::TP,∇fz::T1,z::T2,x::T2,d::T2,φ0::T3,dφ0::T3) where {TP<:(NLSolvers.OptimizationProblem{<:NLSolvers.MeritObjective}),T1,T2,T3}
return LineObjective(obj.objective,∇fz,z,x,d,φ0,dφ0)
end

function LineObjective!(obj::TP,∇fz::T1,z::T2,x::T2,d::T2,φ0::T3,dφ0::T3) where {TP<:(NLSolvers.OptimizationProblem{<:NLSolvers.MeritObjective}),T1,T2,T3}
return LineObjective!(obj.objective,∇fz,z,x,d,φ0,dφ0)
end

function ls_has_gradient(obj::MeritObjective)
if obj.prob.R.Jv === nothing
throw(error("You need to provide Jv for using LineSearch with gradient information."))
end
return nothing
end

function ls_upto_gradient(merit::MeritObjective, Fx, x)
upto_gradient(merit.prob, Fx, x)
end
upto_gradient(neq::NEqProblem, Fx, x) =
upto_gradient(neq.R, Fx, x)

function upto_gradient(vo::VectorObjective, Fx, x)
vo.Jv(x)
value(vo,Fx,x)
obj = norm(Fx)^2/2
return obj, Fx
end


_manifold(merit::NLSolvers.MeritObjective) = _manifold(merit.prob)

bounds(neq::NEqProblem) = neq.bounds



34 changes: 24 additions & 10 deletions src/objectives.jl
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ end

## If prob is a NEqProblem, then we can just dispatch to least squares MeritObjective
# if fast JacVec exists then maybe even line searches that updates the gradient can be used???
ls_has_gradient(prob) = nothing
ls_upto_gradient(prob,∇f,x) = upto_gradient(prob,∇f,x)
ls_value(prob,z) = (value(prob,z),z)

struct LineObjective!{TP,T1,T2,T3}
prob::TP
∇fz::T1
Expand All @@ -109,15 +113,19 @@ struct LineObjective!{TP,T1,T2,T3}
φ0::T3
dφ0::T3
end

function (le::LineObjective!)(λ)
z = retract!(_manifold(le.prob), le.z, le.x, le.d, λ)
ϕ = value(le.prob, z)
(ϕ = ϕ, z = z)
ϕ,z = ls_value(le.prob, z)
return (ϕ = ϕ, z = z)
end

function (le::LineObjective!)(λ, calc_grad::Bool)
f, g = upto_gradient(le.prob, le.∇fz, retract!(_manifold(le.prob), le.z, le.x, le.d, λ))
ls_has_gradient(le.prob)
f, g = ls_upto_gradient(le.prob, le.∇fz, retract!(_manifold(le.prob), le.z, le.x, le.d, λ))
(ϕ = f, dϕ = real(dot(g, le.d))) # because complex dot might not have exactly zero im part and it's the wrong type
end

struct LineObjective{TP,T1,T2,T3}
prob::TP
∇fz::T1
Expand All @@ -127,16 +135,16 @@ struct LineObjective{TP,T1,T2,T3}
φ0::T3
dφ0::T3
end

function (le::LineObjective)(λ)
z = retract(_manifold(le.prob), le.x, le.d, λ)
_value = value(le.prob, z)
if le.prob.objective isa MeritObjective
return (ϕ = _value.ϕ, _value.Fx)
end
(ϕ = _value,)
ϕ,z = ls_value(le.prob, z)
return (ϕ = ϕ, z = z)
end

function (le::LineObjective)(λ, calc_grad::Bool)
f, g = upto_gradient(le.prob, le.∇fz, retract(_manifold(le.prob), le.x, le.d, λ))
ls_has_gradient(le.prob)
f, g = ls_upto_gradient(le.prob, le.∇fz, retract(_manifold(le.prob), le.x, le.d, λ))
(ϕ = f, dϕ = real(dot(g, le.d))) # because complex dot might not have exactly zero im part and it's the wrong type
end

Expand All @@ -156,7 +164,13 @@ struct MeritObjective{TP,T1,T2,T3,T4,T5}
end
function value(mo::MeritObjective, x)
Fx = mo.F(mo.Fx, x)
(ϕ = (norm(Fx)^2) / 2, Fx = Fx)
return norm(Fx)^2 / 2
#(ϕ = (norm(Fx)^2) / 2, Fx = Fx)
end

function ls_value(mo::MeritObjective, x)
Fx = mo.F(mo.Fx, x)
return (norm(Fx)^2 / 2,Fx)
end

struct LsqWrapper{Tobj,TF,TJ} <: ObjWrapper
Expand Down
20 changes: 16 additions & 4 deletions src/optimize/problem_types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ OptimizationOptions(;
maxiter,
show_trace,
)

#=
struct MinResults{Tr,Tc<:ConvergenceInfo,Th,Ts,To}
res::Tr
conv::Tc
Expand Down Expand Up @@ -251,7 +251,7 @@ function Base.show(io::IO, mr::MinResults)
println(io)
println(io, " * Convergence measures\n")
show(io, convinfo(mr))
end
end =#
#=
* Work counters
Seconds run: 0 (vs limit Inf)
Expand All @@ -260,6 +260,18 @@ end
∇f(x) calls: 53
=#

function _identity(v::Vector{T}) where T
out = Matrix{T}(undef, length(v), length(v))
out .= false
for i in 1:length(v)
out[i,i] = true
end
return out
end

function _identity(x)
return I + x .* x' .* false
end
function prepare_variables(prob, approach, x0, ∇fz, B)
objective = prob.objective
z = x0
Expand All @@ -278,7 +290,7 @@ function prepare_variables(prob, approach, x0, ∇fz, B)
else
# Construct a matrix on the correct form and of the correct type
# with the content of I_{n,n}
B = I + abs.(0 * x * x')
B = _identity(x)
end
end
# first evaluation
Expand All @@ -303,7 +315,7 @@ end
function x_converged(x, z, options)
x_converged = false
if x !== nothing # if not calling from initial_converged
y = x .- z
y = (xi-zi for (xi,zi) in zip(x,z)) #do not allocate an additional vector
ynorm = options.x_norm(y)
x_converged = x_converged || ynorm ≤ options.x_abstol
x_converged = x_converged || ynorm ≤ options.x_norm(x) * options.x_reltol
Expand Down
4 changes: 2 additions & 2 deletions src/quasinewton/update_qn.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ function update_obj!(problem, s, y, ∇fx, z, ∇fz, B, scheme, scale = nothing)
# Update B
if scale == nothing
if isa(scheme.approx, Direct)
yBy = dot(y, B * y)
yBy = dot(y, B, y)
if !iszero(yBy)
Badj = dot(y, s) / yBy .* B
end
else
ys = dot(y, s)
if !iszero(ys)
Badj = dot(y, B * y) / ys .* B
Badj = dot(y, B, y) / ys .* B
else
return fz, ∇fz, B, s, y
end
Expand Down