Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs updates #819

Merged
merged 5 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/Documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ jobs:
with:
version: '1'
- name: Install dependencies
run: julia --project=docs/ -e 'using Pkg; Pkg.develop(vcat(PackageSpec(path = pwd()), [PackageSpec(path = joinpath("lib", dir)) for dir in readdir("lib") if dir !== "OptimizationQuadDIRECT"])); Pkg.instantiate()'
run: julia --project=docs/ -e 'using Pkg; Pkg.develop(vcat(PackageSpec(path = pwd()), [PackageSpec(path = joinpath("lib", dir)) for dir in readdir("lib") if (dir !== "OptimizationQuadDIRECT" && dir !== "OptimizationMultistartOptimization")])); Pkg.instantiate()'
- name: Build and deploy
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key
run: julia --project=docs/ --code-coverage=user docs/make.jl
- uses: julia-actions/julia-processcoverage@v1
with:
directories: src,lib/OptimizationBBO/src,lib/OptimizationCMAEvolutionStrategy/src,lib/OptimizationEvolutionary/src,lib/OptimizationFlux/src,lib/OptimizationGCMAES/src,lib/OptimizationMOI/src,lib/OptimizationMetaheuristics/src,lib/OptimizationMultistartOptimization/src,lib/OptimizationNLopt/src,lib/OptimizationNOMAD/src,lib/OptimizationOptimJL/src,lib/OptimizationOptimisers/src,lib/OptimizationPolyalgorithms/src,lib/OptimizationQuadDIRECT/src,lib/OptimizationSpeedMapping/src
directories: src,lib/OptimizationBBO/src,lib/OptimizationCMAEvolutionStrategy/src,lib/OptimizationEvolutionary/src,lib/OptimizationGCMAES/src,lib/OptimizationMOI/src,lib/OptimizationMetaheuristics/src,lib/OptimizationMultistartOptimization/src,lib/OptimizationNLopt/src,lib/OptimizationNOMAD/src,lib/OptimizationOptimJL/src,lib/OptimizationOptimisers/src,lib/OptimizationPolyalgorithms/src,lib/OptimizationQuadDIRECT/src,lib/OptimizationSpeedMapping/src
- uses: codecov/codecov-action@v4
with:
file: lcov.info
50 changes: 27 additions & 23 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
[deps]
AmplNLWriter = "7c4d4715-977e-5154-bfe0-e096adeac482"
ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
Ipopt_jll = "9cc047cb-c261-5740-88fc-0cf96f7bdcc7"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
Juniper = "2ddba703-00a4-53a7-87a5-e8b9971dde84"
Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
Manifolds = "1cead3c2-87b3-11e9-0ccd-23c62b72b94e"
Manopt = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5"
MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6"
NLPModelsTest = "7998695d-6960-4d3a-85c4-e1bceb8cd856"
Expand All @@ -20,12 +22,10 @@ OptimizationBBO = "3e6eede4-6085-4f62-9a71-46d9bc1eb92b"
OptimizationBase = "bca83a33-5cc9-4baa-983d-23429ab6bcbb"
OptimizationCMAEvolutionStrategy = "bd407f91-200f-4536-9381-e4ba712f53f8"
OptimizationEvolutionary = "cb963754-43f6-435e-8d4b-99009ff27753"
OptimizationFlux = "253f991c-a7b2-45f8-8852-8b9a9df78a86"
OptimizationGCMAES = "6f0a0517-dbc2-4a7a-8a20-99ae7f27e911"
OptimizationMOI = "fd9f6733-72f4-499f-8506-86b2bdd0dea1"
OptimizationManopt = "e57b7fff-7ee7-4550-b4f0-90e9476e9fb6"
OptimizationMetaheuristics = "3aafef2f-86ae-4776-b337-85a36adf0b55"
OptimizationMultistartOptimization = "e4316d97-8bbb-4fd3-a7d8-3851d2a72823"
OptimizationNLPModels = "064b21be-54cf-11ef-1646-cdfee32b588f"
OptimizationNLopt = "4e6fcdb7-1186-4e1f-a706-475e75c168bb"
OptimizationNOMAD = "2cab0595-8222-4775-b714-9828e6a9e01b"
Expand All @@ -35,6 +35,8 @@ OptimizationPRIMA = "72f8369c-a2ea-4298-9126-56167ce9cbc2"
OptimizationPolyalgorithms = "500b13db-7e66-49ce-bda4-eed966be6282"
OptimizationSpeedMapping = "3d669222-0d7d-4eb9-8a9f-d8528b0d9b91"
OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1"
Expand All @@ -45,40 +47,42 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[compat]
AmplNLWriter = "1"
ComponentArrays = "0.15"
Documenter = "1"
FiniteDiff = ">= 2.8.1"
Flux = "0.13, 0.14"
ForwardDiff = ">= 0.10.19"
HiGHS = "1"
Ipopt = "1"
IterTools = "1"
Juniper = "0.9"
Lux = "1"
Manifolds = "0.9"
Manopt = "0.4"
MLUtils = "0.4.4"
ModelingToolkit = "9"
NLPModels = "0.21"
NLPModelsTest = "0.10"
NLopt = "0.6, 1"
Optimization = "3"
OptimizationBBO = "0.1, 0.2, 0.3"
OptimizationBase = "0.0.5, 0.0.6, 0.0.7, 1"
OptimizationCMAEvolutionStrategy = "0.1, 0.2"
OptimizationEvolutionary = "0.1, 0.2, 0.3"
OptimizationFlux = "0.2.1"
OptimizationGCMAES = "0.1, 0.2"
OptimizationMOI = "0.1, 0.2, 0.3, 0.4"
OptimizationManopt = "0.0.2, 0.0.3"
OptimizationMetaheuristics = "0.1, 0.2"
OptimizationMultistartOptimization = "0.1, 0.2"
OptimizationNLPModels = "0.0.1"
OptimizationNLopt = "0.1, 0.2"
OptimizationNOMAD = "0.1, 0.2"
OptimizationOptimJL = "0.1, 0.2, 0.3"
OptimizationOptimisers = "0.1, 0.2"
OptimizationPRIMA = "0.1.0, 0.2"
OptimizationPolyalgorithms = "0.1, 0.2"
OptimizationSpeedMapping = "0.1, 0.2"
Optimization = "4"
OptimizationBBO = "0.4"
OptimizationBase = "2"
OptimizationCMAEvolutionStrategy = "0.3"
OptimizationEvolutionary = "0.4"
OptimizationGCMAES = "0.3"
OptimizationMOI = "0.5"
OptimizationManopt = "0.0.4"
OptimizationMetaheuristics = "0.3"
OptimizationNLPModels = "0.0.2"
OptimizationNLopt = "0.3"
OptimizationNOMAD = "0.3"
OptimizationOptimJL = "0.4"
OptimizationOptimisers = "0.3"
OptimizationPRIMA = "0.3"
OptimizationPolyalgorithms = "0.3"
OptimizationSpeedMapping = "0.3"
OrdinaryDiffEq = "6"
Plots = "1"
Random = "1"
ReverseDiff = ">= 1.9.0"
SciMLBase = "2.30.0"
SciMLSensitivity = "7"
Expand Down
4 changes: 2 additions & 2 deletions docs/src/optimization_packages/multistartoptimization.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ constraint equations. However, lower and upper constraints set by `lb` and `ub`

The Rosenbrock function can be optimized using `MultistartOptimization.TikTak()` with 100 initial points and the local method `NLopt.LD_LBFGS()` as follows:

```@example MultiStart
```julia
using Optimization, OptimizationMultistartOptimization, OptimizationNLopt
rosenbrock(x, p) = (p[1] - x[1])^2 + p[2] * (x[2] - x[1]^2)^2
x0 = zeros(2)
Expand All @@ -43,7 +43,7 @@ sol = solve(prob, MultistartOptimization.TikTak(100), NLopt.LD_LBFGS())

You can use any `Optimization` optimizers you like. The global method of the `MultistartOptimization` is a positional argument and followed by the local method. For example, we can perform a multistartoptimization with LBFGS as the optimizer using either the `NLopt.jl` or `Optim.jl` implementation as follows. Moreover, this interface allows you to access and adjust all the optimizer settings as you normally would:

```@example MultiStart
```julia
using OptimizationOptimJL
f = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff())
prob = Optimization.OptimizationProblem(f, x0, p, lb = [-1.0, -1.0], ub = [1.0, 1.0])
Expand Down
23 changes: 2 additions & 21 deletions docs/src/optimization_packages/optimisers.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,7 @@ Pkg.add("OptimizationOptimisers");
In addition to the optimisation algorithms provided by the Optimisers.jl package this subpackage
also provides the Sophia optimisation algorithm.

## Local Unconstrained Optimizers

- `Sophia`: Based on the recent paper https://arxiv.org/abs/2305.14342. It incorporates second order information
in the form of the diagonal of the Hessian matrix hence avoiding the need to compute the complete hessian. It has been shown to converge faster than other first order methods such as Adam and SGD.

+ `solve(problem, Sophia(; η, βs, ϵ, λ, k, ρ))`

+ `η` is the learning rate
+ `βs` are the decay of momentums
+ `ϵ` is the epsilon value
+ `λ` is the weight decay parameter
+ `k` is the number of iterations to re-compute the diagonal of the Hessian matrix
+ `ρ` is the momentum
+ Defaults:

* `η = 0.001`
* `βs = (0.9, 0.999)`
* `ϵ = 1e-8`
* `λ = 0.1`
* `k = 10`
* `ρ = 0.04`
## List of optimizers

- [`Optimisers.Descent`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.Descent): **Classic gradient descent optimizer with learning rate**

Expand All @@ -42,6 +22,7 @@ also provides the Sophia optimisation algorithm.
+ Defaults:

* `η = 0.1`

- [`Optimisers.Momentum`](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.Momentum): **Classic gradient descent optimizer with learning rate and momentum**

+ `solve(problem, Momentum(η, ρ))`
Expand Down
55 changes: 54 additions & 1 deletion docs/src/optimization_packages/optimization.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,35 @@ There are some solvers that are available in the Optimization.jl package directl

## Methods

`LBFGS`: The popular quasi-Newton method that leverages limited memory BFGS approximation of the inverse of the Hessian. Through a wrapper over the [L-BFGS-B](https://users.iems.northwestern.edu/%7Enocedal/lbfgsb.html) fortran routine accessed from the [LBFGSB.jl](https://github.com/Gnimuc/LBFGSB.jl/) package. It directly supports box-constraints.
- `LBFGS`: The popular quasi-Newton method that leverages limited memory BFGS approximation of the inverse of the Hessian. Through a wrapper over the [L-BFGS-B](https://users.iems.northwestern.edu/%7Enocedal/lbfgsb.html) fortran routine accessed from the [LBFGSB.jl](https://github.com/Gnimuc/LBFGSB.jl/) package. It directly supports box-constraints.

This can also handle arbitrary non-linear constraints through a Augmented Lagrangian method with bounds constraints described in 17.4 of Numerical Optimization by Nocedal and Wright. Thus serving as a general-purpose nonlinear optimization solver available directly in Optimization.jl.

- `Sophia`: Based on the recent paper https://arxiv.org/abs/2305.14342. It incorporates second order information in the form of the diagonal of the Hessian matrix hence avoiding the need to compute the complete hessian. It has been shown to converge faster than other first order methods such as Adam and SGD.

+ `solve(problem, Sophia(; η, βs, ϵ, λ, k, ρ))`

+ `η` is the learning rate
+ `βs` are the decay of momentums
+ `ϵ` is the epsilon value
+ `λ` is the weight decay parameter
+ `k` is the number of iterations to re-compute the diagonal of the Hessian matrix
+ `ρ` is the momentum
+ Defaults:

* `η = 0.001`
* `βs = (0.9, 0.999)`
* `ϵ = 1e-8`
* `λ = 0.1`
* `k = 10`
* `ρ = 0.04`

## Examples

### Unconstrained rosenbrock problem

```@example L-BFGS

using Optimization, Zygote

rosenbrock(x, p) = (p[1] - x[1])^2 + p[2] * (x[2] - x[1]^2)^2
Expand All @@ -27,6 +47,7 @@ sol = solve(prob, Optimization.LBFGS())
### With nonlinear and bounds constraints

```@example L-BFGS

function con2_c(res, x, p)
res .= [x[1]^2 + x[2]^2, (x[2] * sin(x[1]) + x[1]) - 5]
end
Expand All @@ -37,3 +58,35 @@ prob = OptimizationProblem(optf, x0, p, lcons = [1.0, -Inf],
ub = [1.0, 1.0])
res = solve(prob, Optimization.LBFGS(), maxiters = 100)
```

### Train NN with Sophia

```@example Sophia

using Optimization, Lux, Zygote, MLUtils, Statistics, Plots, Random, ComponentArrays

x = rand(10000)
y = sin.(x)
data = MLUtils.DataLoader((x, y), batchsize = 100)

# Define the neural network
model = Chain(Dense(1, 32, tanh), Dense(32, 1))
ps, st = Lux.setup(Random.default_rng(), model)
ps_ca = ComponentArray(ps)
smodel = StatefulLuxLayer{true}(model, nothing, st)

function callback(state, l)
state.iter % 25 == 1 && @show "Iteration: %5d, Loss: %.6e\n" state.iter l
return l < 1e-1 ## Terminate if loss is small
end

function loss(ps, data)
ypred = [smodel([data[1][i]], ps)[1] for i in eachindex(data[1])]
return sum(abs2, ypred .- data[2])
end

optf = OptimizationFunction(loss, AutoZygote())
prob = OptimizationProblem(optf, ps_ca, data)

res = Optimization.solve(prob, Optimization.Sophia(), callback = callback)
```
36 changes: 20 additions & 16 deletions docs/src/tutorials/minibatch.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
# Data Iterators and Minibatching

It is possible to solve an optimization problem with batches using a `Flux.Data.DataLoader`, which is passed to `Optimization.solve` with `ncycles`. All data for the batches need to be passed as a tuple of vectors.
It is possible to solve an optimization problem with batches using a `MLUtils.DataLoader`, which is passed to `Optimization.solve` with `ncycles`. All data for the batches need to be passed as a tuple of vectors.

!!! note

This example uses the OptimizationOptimisers.jl package. See the
[Optimisers.jl page](@ref optimisers) for details on the installation and usage.

```@example
using Flux, Optimization, OptimizationOptimisers, OrdinaryDiffEq, SciMLSensitivity
```@example minibatch

using Lux, Optimization, OptimizationOptimisers, OrdinaryDiffEq, SciMLSensitivity, MLUtils,
Random, ComponentArrays

function newtons_cooling(du, u, p, t)
temp = u[1]
Expand All @@ -21,14 +23,16 @@ function true_sol(du, u, p, t)
newtons_cooling(du, u, true_p, t)
end

ann = Chain(Dense(1, 8, tanh), Dense(8, 1, tanh))
pp, re = Flux.destructure(ann)
model = Chain(Dense(1, 32, tanh), Dense(32, 1))
ps, st = Lux.setup(Random.default_rng(), model)
ps_ca = ComponentArray(ps)
smodel = StatefulLuxLayer{true}(model, nothing, st)

function dudt_(u, p, t)
re(p)(u) .* u
smodel(u, p) .* u
end

callback = function (state, l, pred; doplot = false) #callback function to observe training
function callback(state, l, pred; doplot = false) #callback function to observe training
display(l)
# plot current prediction against data
if doplot
Expand All @@ -47,30 +51,30 @@ t = range(tspan[1], tspan[2], length = datasize)
true_prob = ODEProblem(true_sol, u0, tspan)
ode_data = Array(solve(true_prob, Tsit5(), saveat = t))

prob = ODEProblem{false}(dudt_, u0, tspan, pp)
prob = ODEProblem{false}(dudt_, u0, tspan, ps_ca)

function predict_adjoint(fullp, time_batch)
Array(solve(prob, Tsit5(), p = fullp, saveat = time_batch))
end

function loss_adjoint(fullp, batch, time_batch)
function loss_adjoint(fullp, data)
batch, time_batch = data
pred = predict_adjoint(fullp, time_batch)
sum(abs2, batch .- pred), pred
end

k = 10
# Pass the data for the batches as separate vectors wrapped in a tuple
train_loader = Flux.Data.DataLoader((ode_data, t), batchsize = k)
train_loader = MLUtils.DataLoader((ode_data, t), batchsize = k)

numEpochs = 300
l1 = loss_adjoint(pp, train_loader.data[1], train_loader.data[2])[1]
l1 = loss_adjoint(ps_ca, train_loader.data)[1]

optfun = OptimizationFunction(
(θ, p, batch, time_batch) -> loss_adjoint(θ, batch,
time_batch),
loss_adjoint,
Optimization.AutoZygote())
optprob = OptimizationProblem(optfun, pp)
optprob = OptimizationProblem(optfun, ps_ca, train_loader)
using IterTools: ncycle
res1 = Optimization.solve(optprob, Optimisers.ADAM(0.05), ncycle(train_loader, numEpochs),
callback = callback)
res1 = Optimization.solve(
optprob, Optimisers.ADAM(0.05); callback = callback, epochs = 1000)
```
2 changes: 1 addition & 1 deletion lib/OptimizationOptimisers/src/OptimizationOptimisers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ function SciMLBase.__solve(cache::OptimizationCache{
cache.solver_args.epochs
end

maxiters = Optimization._check_and_convert_maxiters(cache.solver_args.maxiters)
maxiters = Optimization._check_and_convert_maxiters(maxiters)
if maxiters === nothing
throw(ArgumentError("The number of epochs must be specified as the epochs or maxiters kwarg."))
end
Expand Down
4 changes: 2 additions & 2 deletions lib/OptimizationPolyalgorithms/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"

[compat]
Optimization = "4"
OptimizationOptimJL = "0.1, 0.2, 0.3"
OptimizationOptimisers = "0.1, 0.2"
OptimizationOptimJL = "0.4"
OptimizationOptimisers = "0.3"
Reexport = "1.2"
julia = "1.6"

Expand Down
9 changes: 4 additions & 5 deletions test/minibatch.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,10 @@ optfun = OptimizationFunction(loss_adjoint,
Optimization.AutoZygote())
optprob = OptimizationProblem(optfun, pp, train_loader)

# res1 = Optimization.solve(optprob,
# Optimization.Sophia(; η = 0.5,
# λ = 0.0), callback = callback,
# maxiters = 1000)
# @test 10res1.objective < l1
res1 = Optimization.solve(optprob,
Optimization.Sophia(), callback = callback,
maxiters = 1000)
@test 10res1.objective < l1

optfun = OptimizationFunction(loss_adjoint,
Optimization.AutoForwardDiff())
Expand Down
Loading