diff --git a/src/fixedpoints/anderson.jl b/src/fixedpoints/anderson.jl index 2ba5bde..18cf08a 100644 --- a/src/fixedpoints/anderson.jl +++ b/src/fixedpoints/anderson.jl @@ -94,7 +94,7 @@ options = NEqOptions(maxiter=maxiter) Gold = copy(Gx) Fold = copy(Fx) - γv0 = zeros(effective_memory) + γv0 = zeros(eltype(Gx),memory) iter = 0 while iter < options.maxiter iter += 1 diff --git a/src/nlsolve/linesearch/newton.jl b/src/nlsolve/linesearch/newton.jl index cb3a6da..0fb1300 100644 --- a/src/nlsolve/linesearch/newton.jl +++ b/src/nlsolve/linesearch/newton.jl @@ -8,8 +8,6 @@ function solve( options = NEqOptions(), state = init(problem, method; x), ) - t0 = time() - # Unpack scheme, linesearch = modelscheme(method), algorithm(method) # Unpack important objectives @@ -17,15 +15,29 @@ function solve( FJ = problem.R.FJ # Unpack state z, d, Fx, Jx = state - z .= x - T = eltype(Fx) - - + T = eltype(x) # Set up MeritObjective. This defines the least squares # objective for the line search. merit = MeritObjective(problem, F, FJ, Fx, Jx, d) meritproblem = - OptimizationProblem(merit, nothing, Euclidean(0), nothing, mstyle(problem), nothing) + OptimizationProblem(merit, bounds(problem), _manifold(problem), nothing, mstyle(problem), nothing) + + opt_options = OptimizationOptions(options) + res = solve(meritproblem, x, method, opt_options) + + F0 = res.info.∇f0 + ρF0, ρ2F0 = norm(F0, Inf), norm(F0, 2) + newinfo = ( + solution = solution(res), + best_residual = res.info.minimum, + ρs = T(NaN), + ρF0 = ρF0, + ρ2F0 = ρ2F0, + time = res.info.time, + iter = res.info.iter, + ) + + return ConvergenceInfo(method, newinfo, options) # Evaluate the residual and Jacobian Fx, Jx = FJ(Fx, Jx, x) diff --git a/src/nlsolve/problem_types.jl b/src/nlsolve/problem_types.jl index 3bdb14d..9cd8ec5 100644 --- a/src/nlsolve/problem_types.jl +++ b/src/nlsolve/problem_types.jl @@ -60,6 +60,7 @@ struct NEqOptions{T,Tmi,Call} f_reltol::T maxiter::Tmi callback::Call + show_trace::Bool end NEqOptions(; f_limit = 0.0, @@ -67,7 +68,26 @@ NEqOptions(; f_reltol = 1e-12, maxiter = 10^4, callback = nothing, -) = NEqOptions(f_limit, f_abstol, f_reltol, maxiter, callback) + show_trace = false, +) = NEqOptions(f_limit, f_abstol, f_reltol, maxiter, callback, show_trace) + +function OptimizationOptions(neq::NEqOptions) + return OptimizationOptions(; + x_abstol = 0.0, + x_reltol = 0.0, + x_norm = x -> norm(x, Inf), + g_abstol = 2*neq.f_abstol, + g_reltol = 2*neq.f_reltol, + g_norm = x -> norm(x, Inf), + f_limit = neq.f_limit, + f_abstol = 0.0, + f_reltol = 0.0, + nm_tol = 1e-8, + maxiter = neq.maxiter, + show_trace = neq.show_trace, +) +end + function Base.show(io::IO, ci::ConvergenceInfo{<:Any,<:Any,<:NEqOptions}) opt = ci.options info = ci.info @@ -131,3 +151,66 @@ function Base.show(io::IO, ci::ConvergenceInfo{<:Any,<:Any,<:NEqOptions}) println(io, " Seconds run: $(@sprintf("%.2e", info.time))") println(io, " Iterations: $(info.iter)") end + +function upto_gradient(merit::MeritObjective, Fx, x) + upto_gradient(merit.prob, Fx, x) + #merit.F .= Fx + #return res +end +upto_gradient(neq::NEqProblem, Fx, x) = + upto_gradient(neq.R, Fx, x) + +function upto_gradient(vo::VectorObjective, Fx, x) + value(vo,Fx,x) + obj = norm(Fx)^2/2 + return obj, Fx +end + +function upto_hessian(merit::MeritObjective, ∇f, ∇²f, x) + upto_hessian(merit.prob, ∇f, ∇²f, x) +end + +upto_hessian(neq::NEqProblem, ∇f, ∇²f, x) = + upto_hessian(neq.R, ∇f, ∇²f, x) + +function upto_hessian(vo::VectorObjective, ∇f, ∇²f, x) + Fx, Jx = vo.FJ(∇f, ∇²f, x) + obj = norm(Fx)^2/2 + return obj, Fx, Jx +end + +function LineObjective(obj::TP,∇fz::T1,z::T2,x::T2,d::T2,φ0::T3,dφ0::T3) where {TP<:(NLSolvers.OptimizationProblem{<:NLSolvers.MeritObjective}),T1,T2,T3} + return LineObjective(obj.objective,∇fz,z,x,d,φ0,dφ0) +end + +function LineObjective!(obj::TP,∇fz::T1,z::T2,x::T2,d::T2,φ0::T3,dφ0::T3) where {TP<:(NLSolvers.OptimizationProblem{<:NLSolvers.MeritObjective}),T1,T2,T3} + return LineObjective!(obj.objective,∇fz,z,x,d,φ0,dφ0) +end + +function ls_has_gradient(obj::MeritObjective) + if obj.prob.R.Jv === nothing + throw(error("You need to provide Jv for using LineSearch with gradient information.")) + end + return nothing +end + +function ls_upto_gradient(merit::MeritObjective, Fx, x) + upto_gradient(merit.prob, Fx, x) +end +upto_gradient(neq::NEqProblem, Fx, x) = + upto_gradient(neq.R, Fx, x) + +function upto_gradient(vo::VectorObjective, Fx, x) + vo.Jv(x) + value(vo,Fx,x) + obj = norm(Fx)^2/2 + return obj, Fx +end + + +_manifold(merit::NLSolvers.MeritObjective) = _manifold(merit.prob) + +bounds(neq::NEqProblem) = neq.bounds + + + diff --git a/src/objectives.jl b/src/objectives.jl index ffff7ce..c1682c1 100644 --- a/src/objectives.jl +++ b/src/objectives.jl @@ -100,6 +100,10 @@ end ## If prob is a NEqProblem, then we can just dispatch to least squares MeritObjective # if fast JacVec exists then maybe even line searches that updates the gradient can be used??? +ls_has_gradient(prob) = nothing +ls_upto_gradient(prob,∇f,x) = upto_gradient(prob,∇f,x) +ls_value(prob,z) = (value(prob,z),z) + struct LineObjective!{TP,T1,T2,T3} prob::TP ∇fz::T1 @@ -109,15 +113,19 @@ struct LineObjective!{TP,T1,T2,T3} φ0::T3 dφ0::T3 end + function (le::LineObjective!)(λ) z = retract!(_manifold(le.prob), le.z, le.x, le.d, λ) - ϕ = value(le.prob, z) - (ϕ = ϕ, z = z) + ϕ,z = ls_value(le.prob, z) + return (ϕ = ϕ, z = z) end + function (le::LineObjective!)(λ, calc_grad::Bool) - f, g = upto_gradient(le.prob, le.∇fz, retract!(_manifold(le.prob), le.z, le.x, le.d, λ)) + ls_has_gradient(le.prob) + f, g = ls_upto_gradient(le.prob, le.∇fz, retract!(_manifold(le.prob), le.z, le.x, le.d, λ)) (ϕ = f, dϕ = real(dot(g, le.d))) # because complex dot might not have exactly zero im part and it's the wrong type end + struct LineObjective{TP,T1,T2,T3} prob::TP ∇fz::T1 @@ -127,16 +135,16 @@ struct LineObjective{TP,T1,T2,T3} φ0::T3 dφ0::T3 end + function (le::LineObjective)(λ) z = retract(_manifold(le.prob), le.x, le.d, λ) - _value = value(le.prob, z) - if le.prob.objective isa MeritObjective - return (ϕ = _value.ϕ, _value.Fx) - end - (ϕ = _value,) + ϕ,z = ls_value(le.prob, z) + return (ϕ = ϕ, z = z) end + function (le::LineObjective)(λ, calc_grad::Bool) - f, g = upto_gradient(le.prob, le.∇fz, retract(_manifold(le.prob), le.x, le.d, λ)) + ls_has_gradient(le.prob) + f, g = ls_upto_gradient(le.prob, le.∇fz, retract(_manifold(le.prob), le.x, le.d, λ)) (ϕ = f, dϕ = real(dot(g, le.d))) # because complex dot might not have exactly zero im part and it's the wrong type end @@ -156,7 +164,13 @@ struct MeritObjective{TP,T1,T2,T3,T4,T5} end function value(mo::MeritObjective, x) Fx = mo.F(mo.Fx, x) - (ϕ = (norm(Fx)^2) / 2, Fx = Fx) + return norm(Fx)^2 / 2 + #(ϕ = (norm(Fx)^2) / 2, Fx = Fx) +end + +function ls_value(mo::MeritObjective, x) + Fx = mo.F(mo.Fx, x) + return (norm(Fx)^2 / 2,Fx) end struct LsqWrapper{Tobj,TF,TJ} <: ObjWrapper diff --git a/src/optimize/problem_types.jl b/src/optimize/problem_types.jl index 856bf1f..c674084 100644 --- a/src/optimize/problem_types.jl +++ b/src/optimize/problem_types.jl @@ -223,7 +223,7 @@ OptimizationOptions(; maxiter, show_trace, ) - +#= struct MinResults{Tr,Tc<:ConvergenceInfo,Th,Ts,To} res::Tr conv::Tc @@ -251,7 +251,7 @@ function Base.show(io::IO, mr::MinResults) println(io) println(io, " * Convergence measures\n") show(io, convinfo(mr)) -end +end =# #= * Work counters Seconds run: 0 (vs limit Inf) @@ -260,6 +260,18 @@ end ∇f(x) calls: 53 =# +function _identity(v::Vector{T}) where T + out = Matrix{T}(undef, length(v), length(v)) + out .= false + for i in 1:length(v) + out[i,i] = true + end + return out +end + +function _identity(x) + return I + x .* x' .* false +end function prepare_variables(prob, approach, x0, ∇fz, B) objective = prob.objective z = x0 @@ -278,7 +290,7 @@ function prepare_variables(prob, approach, x0, ∇fz, B) else # Construct a matrix on the correct form and of the correct type # with the content of I_{n,n} - B = I + abs.(0 * x * x') + B = _identity(x) end end # first evaluation @@ -303,7 +315,7 @@ end function x_converged(x, z, options) x_converged = false if x !== nothing # if not calling from initial_converged - y = x .- z + y = (xi-zi for (xi,zi) in zip(x,z)) #do not allocate an additional vector ynorm = options.x_norm(y) x_converged = x_converged || ynorm ≤ options.x_abstol x_converged = x_converged || ynorm ≤ options.x_norm(x) * options.x_reltol diff --git a/src/quasinewton/update_qn.jl b/src/quasinewton/update_qn.jl index 3129ba4..7371e6f 100644 --- a/src/quasinewton/update_qn.jl +++ b/src/quasinewton/update_qn.jl @@ -8,14 +8,14 @@ function update_obj!(problem, s, y, ∇fx, z, ∇fz, B, scheme, scale = nothing) # Update B if scale == nothing if isa(scheme.approx, Direct) - yBy = dot(y, B * y) + yBy = dot(y, B, y) if !iszero(yBy) Badj = dot(y, s) / yBy .* B end else ys = dot(y, s) if !iszero(ys) - Badj = dot(y, B * y) / ys .* B + Badj = dot(y, B, y) / ys .* B else return fz, ∇fz, B, s, y end