From caf28dc18023d2e3732acd462aa4eef7f4ecbfd9 Mon Sep 17 00:00:00 2001 From: Patrick Kofod Mogensen Date: Thu, 27 Jul 2023 22:57:22 -0400 Subject: [PATCH] Update nleq-solving docs and adjust code to support the written text. --- docs/src/mkdocs.yml | 59 ---------------- docs/src/nonlineareq.md | 118 ++++++++++++++++++++++++++++++- src/NLSolvers.jl | 2 +- src/nlsolve/linesearch/newton.jl | 16 +++-- src/nlsolve/problem_types.jl | 21 +++--- src/nlsolve/spectral/dfsane.jl | 2 +- src/objectives.jl | 27 +++++-- test/nlsolve/interface.jl | 20 ++++++ 8 files changed, 179 insertions(+), 86 deletions(-) delete mode 100644 docs/src/mkdocs.yml diff --git a/docs/src/mkdocs.yml b/docs/src/mkdocs.yml deleted file mode 100644 index d674b0e..0000000 --- a/docs/src/mkdocs.yml +++ /dev/null @@ -1,59 +0,0 @@ -ite_name: NLSolvers.jl -repo_url: https://github.com/pkofod/NLSolvers.jl/ -site_description: Pure Julia implementations of optimization algorithms. -site_author: Patrick Kofod Mogensen - -theme: windmill - -markdown_extensions: -# - codehilite - - extra - - tables - - fenced_code - - mdx_math - - admonition - -extra_javascript: - - https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS_HTML - - assets/mathjaxhelper.js - -extra_css: - - assets/Documenter.css - -docs_dir: 'build' - -pages: - - Home: 'index.md' - - Tutorials: -# - Minimizing a function: 'user/minimization.md' -# - Gradients and Hessians: 'user/gradientsandhessians.md' -# - Configurable Options: 'user/config.md' -# - Linesearch: 'algo/linesearch.md' -# - Algorithm choice: 'user/algochoice.md' - - Preconditioners: 'tutorials/precondition.md' -# - Complex optimization: 'algo/complex.md' -# - Manifolds: 'algo/manifolds.md' -# - Tips and tricks: 'user/tipsandtricks.md' -# - Interior point Newton: 'examples/generated/ipnewton_basics.md' -# - Maximum likelihood estimation: 'examples/generated/maxlikenlm.md' -# - Conditional maximum likelihood estimation: 'examples/generated/rasch.md' -# - Algorithms: -# - Gradient Free: -# - Nelder Mead: 'algo/nelder_mead.md' -# - Simulated Annealing: 'algo/simulated_annealing.md' -# - Simulated Annealing w/ bounds: 'algo/samin.md' -# - Particle Swarm: 'algo/particle_swarm.md' -# - Univariate: -# - Brent's Method: 'algo/brent.md' -# - Golden Section: 'algo/goldensection.md' -# - Gradient Required: -# - 'Conjugate Gradient': 'algo/cg.md' -# - 'Gradient Descent': 'algo/gradientdescent.md' -# - '(L-)BFGS': 'algo/lbfgs.md' -# - 'Acceleration': 'algo/ngmres.md' -# - Hessian Required: -# - Newton: 'algo/newton.md' -# - Newton with Trust Region: 'algo/newton_trust_region.md' -# - Interior point Newton: 'algo/ipnewton.md' -# - Contributing: 'dev/contributing.md' -# - License: 'LICENSE.md' \ No newline at end of file diff --git a/docs/src/nonlineareq.md b/docs/src/nonlineareq.md index 13ccf07..d256ee9 100644 --- a/docs/src/nonlineareq.md +++ b/docs/src/nonlineareq.md @@ -1 +1,117 @@ -# Solving non-linear systems of equations \ No newline at end of file +# Solving non-linear systems of equations +Non-linear systems of equations arise in many different applications. As for optimization, different situations will call for different inputs and optimizations: scalar, static, mutating, iterative solvers, etc. Below, we will cover some important algorithms and special cases. + +## Scalar solving with 1st order derivatives +Assume that you have a residual system that you want to solve. This means that if your original system to solve is is `F(x) = K` for some real number `K`, you have made sure to provide a residual function `G(x) = F(x) - K` that we can solve for `G(x) = 0` instead. Then, you can use Newton's method for root finding as follows: + + +``` +function F(x) + x^2 + end + + function FJ(Jx, x) + x^2, 2x + end + + prob_obj = NLSolvers.ScalarObjective( + f=F, + fg=FJ, + ) + +prob = NEqProblem(prob_obj; inplace = false) + +x0 = 0.3 +res = solve(prob, x0, LineSearch(Newton(), Backtracking())) +``` + +with output + +``` +julia> res = solve(prob, x0, LineSearch(Newton(), Backtracking())) +Results of solving non-linear equations + +* Algorithm: + Newton's method with default linsolve with backtracking (no interp) + +* Candidate solution: + Final residual 2-norm: 5.36e-09 + Final residual Inf-norm: 5.36e-09 + + Initial residual 2-norm: 9.00e-02 + Initial residual Inf-norm: 9.00e-02 + +* Stopping criteria + |F(x')| = 5.36e-09 <= 1.00e-08 (true) + +* Work counters + Seconds run: 1.41e-05 + Iterations: 12 +``` +The output reports initial and final residual norms. The convergence check is also reported, and some work counters report the time spent and the number of iterations. + +## Multivariate non-linear equation solving +Multivariate non-linear equation solving requires writing a `VectorObjective` instead of a `ScalarObjective` as above. The `VectorObjective` is + +``` +using NLSolvers, ForwardDiff +function theta(x) + if x[1] > 0 + return atan(x[2] / x[1]) / (2.0 * pi) + else + return (pi + atan(x[2] / x[1])) / (2.0 * pi) + end +end + +function F_powell!(Fx, x) + if (x[1]^2 + x[2]^2 == 0) + dtdx1 = 0 + dtdx2 = 0 + else + dtdx1 = -x[2] / (2 * pi * (x[1]^2 + x[2]^2)) + dtdx2 = x[1] / (2 * pi * (x[1]^2 + x[2]^2)) + end + Fx[1] = + -2000.0 * (x[3] - 10.0 * theta(x)) * dtdx1 + + 200.0 * (sqrt(x[1]^2 + x[2]^2) - 1) * x[1] / sqrt(x[1]^2 + x[2]^2) + Fx[2] = + -2000.0 * (x[3] - 10.0 * theta(x)) * dtdx2 + + 200.0 * (sqrt(x[1]^2 + x[2]^2) - 1) * x[2] / sqrt(x[1]^2 + x[2]^2) + Fx[3] = 200.0 * (x[3] - 10.0 * theta(x)) + 2.0 * x[3] + Fx +end + +function F_jacobian_powell!(Fx, Jx, x) + ForwardDiff.jacobian!(Jx, F_powell!, Fx, x) + Fx, Jx +end + +prob_obj = VectorObjective(F=F_powell!, J=F_jacobian_powell!) +prob = NEqProblem(prob_obj) + +x0 = [-1.0, 0.0, 0.0] +res = solve(prob, copy(x0), LineSearch(Newton(), Backtracking())) +``` +with result +``` +julia> res = solve(prob, copy(x0), LineSearch(Newton(), Backtracking())) +Results of solving non-linear equations + +* Algorithm: + Newton's method with default linsolve with backtracking (no interp) + +* Candidate solution: + Final residual 2-norm: 8.57e-16 + Final residual Inf-norm: 6.24e-16 + + Initial residual 2-norm: 1.88e+03 + Initial residual Inf-norm: 1.59e+03 + +* Stopping criteria + |F(x')| = 6.24e-16 <= 1.00e-08 (true) + +* Work counters + Seconds run: 2.10e-04 + Iterations: 33 +``` +We again see initial and final residual norms. This time the 2- and Inf-norm are different because there is more than one element in the state to be optimized over. The final 2-norm also satisfies the required threshold, and the run-time and number of iterations are reported. \ No newline at end of file diff --git a/src/NLSolvers.jl b/src/NLSolvers.jl index f5f5241..2aad83a 100644 --- a/src/NLSolvers.jl +++ b/src/NLSolvers.jl @@ -109,7 +109,7 @@ include("Manifolds.jl") include("objectives.jl") export LeastSquaresObjective include("linearalgebra.jl") -export NonDiffed, OnceDiffed, TwiceDiffed, ScalarObjective +export NonDiffed, OnceDiffed, TwiceDiffed, ScalarObjective, VectorObjective # make this struct that has scheme and approx abstract type QuasiNewton{T1} end diff --git a/src/nlsolve/linesearch/newton.jl b/src/nlsolve/linesearch/newton.jl index b22433f..c420c38 100644 --- a/src/nlsolve/linesearch/newton.jl +++ b/src/nlsolve/linesearch/newton.jl @@ -12,23 +12,25 @@ function solve( # Unpack scheme, linesearch = modelscheme(method), algorithm(method) - # Unpack important objectives - F = problem.R.F - FJ = problem.R.FJ + # Unpack state z, d, Fx, Jx = state - z .= x + if mstyle(problem) isa InPlace + z .= x + else + z = copy(x) + end T = eltype(Fx) # Set up MeritObjective. This defines the least squares # objective for the line search. - merit = MeritObjective(problem, F, FJ, Fx, Jx, d) + merit = MeritObjective(problem, Fx, d) meritproblem = OptimizationProblem(merit, nothing, Euclidean(0), nothing, mstyle(problem), nothing) # Evaluate the residual and Jacobian - Fx, Jx = FJ(Fx, Jx, x) + Fx, Jx = value_jacobian(problem, Fx, Jx, x) if !(scheme.reset_age === nothing) JFx = factorize(Jx) else @@ -160,7 +162,7 @@ function update_model( ) # Update F, J, JF, and age as necessary if scheme.reset_age === nothing - F, J = problem.R.FJ(F, J, z) + F, J = value_jacobian(problem, F, J, z) if scheme.factorizer === nothing JF = nothing else diff --git a/src/nlsolve/problem_types.jl b/src/nlsolve/problem_types.jl index 3bdb14d..748f06a 100644 --- a/src/nlsolve/problem_types.jl +++ b/src/nlsolve/problem_types.jl @@ -30,20 +30,21 @@ is_inplace(problem::NEqProblem) = mstyle(problem) === InPlace() #function value(nleq::NEqProblem, x) # nleq.R(x) #end -function value(nleq::NEqProblem, x, F) - value(nleq.R, x, F) +function value(nleq::NEqProblem, F, x) + value(nleq.R, F, x) end -struct VectorObjective{TF,TJ,TFJ,TJv} - F::TF - J::TJ - FJ::TFJ - Jv::TJv +function jacobian(nleq::NEqProblem{<:ScalarObjective}, J, x) + gradient(nleq.R, J, x) end +function value_jacobian(nleq::NEqProblem{<:ScalarObjective, <:Any, <:Any, OutOfPlace}, F, J, x) + nleq.R.fg(J, x) +end + function value(nleq::VectorObjective, F, x) nleq.F(F, x) end -function value_jacobian!(nleq::NEqProblem{<:VectorObjective,<:Any,<:Any}, F, J, x) - nleq.FJ(F, J, x) +function value_jacobian(nleq::NEqProblem{<:VectorObjective,<:Any,<:Any,<:Any}, F, J, x) + nleq.R.FJ(F, J, x) end """ @@ -100,7 +101,7 @@ function Base.show(io::IO, ci::ConvergenceInfo{<:Any,<:Any,<:NEqOptions}) ρF = norm(info.best_residual, Inf) println( io, - " |F(x')| = $(@sprintf("%.2e", ρF)) <= $(@sprintf("%.2e", opt.f_limit)) ($(ρF<=opt.f_limit))", + " |F(x')| = $(@sprintf("%.2e", ρF)) <= $(@sprintf("%.2e", opt.f_abstol)) ($(ρF<=opt.f_abstol))", ) end if haskey(info, :fx) diff --git a/src/nlsolve/spectral/dfsane.jl b/src/nlsolve/spectral/dfsane.jl index cf15d4f..2599618 100644 --- a/src/nlsolve/spectral/dfsane.jl +++ b/src/nlsolve/spectral/dfsane.jl @@ -70,7 +70,7 @@ function solve( prob::NEqProblem, x0, method::DFSANE, - options::NEqOptions, + options::NEqOptions = NEqOptions(), state = init(prob, method; x = copy(x0)), ) if !(mstyle(prob) === InPlace()) diff --git a/src/objectives.jl b/src/objectives.jl index b2d59f4..4b4ed8e 100644 --- a/src/objectives.jl +++ b/src/objectives.jl @@ -103,6 +103,15 @@ function batched_value(so::ScalarObjective, F, X) end end + +struct VectorObjective{TF,TJ,TFJ,TJv} + F::TF + J::TJ + FJ::TFJ + Jv::TJv +end +VectorObjective(; F=nothing, J=nothing, FJ=nothing, Jv=nothing) = VectorObjective(F, J, FJ, Jv) + ## If prob is a NEqProblem, then we can just dispatch to least squares MeritObjective # if fast JacVec exists then maybe even line searches that updates the gradient can be used??? struct LineObjective!{TP,T1,T2,T3} @@ -151,16 +160,20 @@ _lineobjective(mstyle::InPlace, prob::AbstractProblem, ∇fz, z, x, d, φ0, dφ0 _lineobjective(mstyle::OutOfPlace, prob::AbstractProblem, ∇fz, z, x, d, φ0, dφ0) = LineObjective(prob, ∇fz, z, x, d, φ0, real(dφ0)) -struct MeritObjective{TP,T1,T2,T3,T4,T5} +struct MeritObjective{TP,T1,T2} prob::TP - F::T1 - FJ::T2 - Fx::T3 - Jx::T4 - d::T5 + Fx::T1 + d::T2 end function value(mo::MeritObjective, x) - Fx = mo.F(mo.Fx, x) + _value(mo, mo.prob.R, mo.Fx, x) +end +function _value(mo, R::ScalarObjective, Fx, x) + Fx = R.f(x) + (ϕ = (norm(Fx)^2) / 2, Fx = Fx) +end +function _value(mo, R::VectorObjective, Fx, x) + Fx = mo.prob.R.F(mo.Fx, x) (ϕ = (norm(Fx)^2) / 2, Fx = Fx) end diff --git a/test/nlsolve/interface.jl b/test/nlsolve/interface.jl index ea01aa8..6996400 100644 --- a/test/nlsolve/interface.jl +++ b/test/nlsolve/interface.jl @@ -448,3 +448,23 @@ end @test norm(res.info.best_residual) < 1e-10 end + +@testset "scalar nlsolves" begin + function ff(x) + x^2 + end + + function fgg(Jx, x) + x^2, 2x + end + + prob_obj = NLSolvers.ScalarObjective( + f=ff, + fg=fgg, + ) + + prob = NEqProblem(prob_obj; inplace = false) + + x0 = 0.3 + res = solve(prob, x0, LineSearch(Newton(), Backtracking())) +end \ No newline at end of file