diff --git a/Project.toml b/Project.toml index 11c359df95..50d2996c17 100644 --- a/Project.toml +++ b/Project.toml @@ -24,6 +24,7 @@ LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78" +MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca" Optim = "429524aa-4258-5aef-a3af-852621145aeb" Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" @@ -87,4 +88,4 @@ SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "CUDA", "SafeTestsets", "OptimizationOptimisers", "OptimizationOptimJL", "Pkg", "OrdinaryDiffEq", "IntegralsCuba"] +test = ["Test", "CUDA", "SafeTestsets", "OptimizationOptimisers", "OptimizationOptimJL", "Pkg", "OrdinaryDiffEq", "IntegralsCuba"] \ No newline at end of file diff --git a/docs/src/examples/Lotka_Volterra_BPINNs.md b/docs/src/examples/Lotka_Volterra_BPINNs.md new file mode 100644 index 0000000000..937ea9b588 --- /dev/null +++ b/docs/src/examples/Lotka_Volterra_BPINNs.md @@ -0,0 +1,145 @@ +# Bayesian Physics informed Neural Network ODEs Solvers + +Bayesian inference for PINNs provides an approach to ODE solution finding and parameter estimation with quantified uncertainty. + +## The Lotka-Volterra Model + +The Lotka–Volterra equations, also known as the predator–prey equations, are a pair of first-order nonlinear differential equations. +These differential equations are frequently used to describe the dynamics of biological systems in which two species interact, one as a predator and the other as prey. +The populations change through time according to the pair of equations + +$$ +\begin{aligned} +\frac{\mathrm{d}x}{\mathrm{d}t} &= (\alpha - \beta y(t))x(t), \\ +\frac{\mathrm{d}y}{\mathrm{d}t} &= (\delta x(t) - \gamma)y(t) +\end{aligned} +$$ + +where $x(t)$ and $y(t)$ denote the populations of prey and predator at time $t$, respectively, and $\alpha, \beta, \gamma, \delta$ are positive parameters. + +We implement the Lotka-Volterra model and simulate it with ideal parameters $\alpha = 1.5$, $\beta = 1$, $\gamma = 3$, and $\delta = 1$ and initial conditions $x(0) = y(0) = 1$. + +We then solve the equations and estimate the parameters of the model with priors for $\alpha$, $\beta$, $\gamma$ and $\delta$ as Normal(1,2), Normal(2,2), Normal(2,2) and Normal(0,2) using a Flux.jl Neural Network, chain_flux. + +And also solve the equations for the constructed ODEProblem's provided ideal `p` values using a Lux.jl Neural Network, chain_lux. + +```julia +function lotka_volterra(u, p, t) + # Model parameters. + α, β, γ, δ = p + # Current state. + x, y = u + + # Evaluate differential equations. + dx = (α - β * y) * x # prey + dy = (δ * x - γ) * y # predator + + return [dx, dy] +end + +# initial-value problem. +u0 = [1.0, 1.0] +p = [1.5, 1.0, 3.0, 1.0] +tspan = (0.0, 6.0) +prob = ODEProblem(lotka_volterra, u0, tspan, p) + +# Plot simulation. +``` +With the [`saveat` argument](https://docs.sciml.ai/latest/basics/common_solver_opts/) we can specify that the solution is stored only at `saveat` time units(default saveat=1 / 50.0). + +```julia +solution = solve(prob, Tsit5(); saveat = 0.05) +plot(solve(prob, Tsit5())) + +``` + +We generate noisy observations to use for the parameter estimation tasks in this tutorial. +To make the example more realistic we add random normally distributed noise to the simulation. + + +```julia +# Dataset creation for parameter estimation +time = solution.t +u = hcat(solution.u...) +x = u[1, :] + 0.5 * randn(length(u[1, :])) +y = u[2, :] + 0.5 * randn(length(u[1, :])) +dataset = [x, y, time] + +# Neural Networks must have 2 outputs as u -> [dx,dy] in function lotka_volterra() +chainflux = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), Flux.Dense(6, 2)) |> Flux.f64 + +chainlux = Lux.Chain(Lux.Dense(1, 6, Lux.tanh), Lux.Dense(6, 6, Lux.tanh), Lux.Dense(6, 2)) +``` +A Dataset is required as parameter estimation is being done using provided priors in `param` keyword argument for BNNODE. + +```julia +alg1 = NeuralPDE.BNNODE(chainflux, + dataset = dataset, + draw_samples = 1000, + l2std = [ + 0.05, + 0.05, + ], + phystd = [ + 0.05, + 0.05, + ], + priorsNNw = (0.0, + 3.0), + param = [ + Normal(1, + 2), + Normal(2, + 2), + Normal(2, + 2), + Normal(0, + 2), + ], + n_leapfrog = 30, progress = true) + +sol_flux_pestim = solve(prob, alg1) + +# Dataset not needed as we are solving the equation with ideal parameters +alg2 = NeuralPDE.BNNODE(chainlux, + draw_samples = 1000, + l2std = [ + 0.05, + 0.05, + ], + phystd = [ + 0.05, + 0.05, + ], + priorsNNw = (0.0, + 3.0), + n_leapfrog = 30, progress = true) + +sol_lux = solve(prob, alg2) + +#testing timepoints must match keyword arg `saveat`` timepoints of solve() call +t=collect(Float64,prob.tspan[1]:1/50.0:prob.tspan[2]) + +``` + +the solution for the ODE is retured as a nested vector sol_flux_pestim.ensemblesol. +here, [$x$ , $y$] would be returned +All estimated ode parameters are returned as a vector sol_flux_pestim.estimated_ode_params. +here, [$\alpha$, $\beta$, $\gamma$, $\delta$] + +```julia +# plotting solution for x,y for chain_flux +plot(t,sol_flux_pestim.ensemblesol[1]) +plot!(t,sol_flux_pestim.ensemblesol[2]) + +# estimated ODE parameters by .estimated_ode_params, weights and biases by .estimated_nn_params +sol_flux_pestim.estimated_nn_params +sol_flux_pestim.estimated_ode_params + +# plotting solution for x,y for chain_lux +plot(t,sol_lux_pestim.ensemblesol[1]) +plot!(t,sol_lux_pestim.ensemblesol[2]) + +# estimated weights and biases by .estimated_nn_params for chain_lux +sol_lux_pestim.estimated_nn_params +``` diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl new file mode 100644 index 0000000000..f79f5208f2 --- /dev/null +++ b/src/BPINN_ode.jl @@ -0,0 +1,298 @@ +# HIGH level API for BPINN ODE solver + +""" +```julia +BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000, + priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05], + phystd = [0.05], dataset = [nothing], + init_params = nothing, physdt = 1 / 20.0, nchains = 1, + autodiff = false, Integrator = Leapfrog, + Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8, + Metric = DiagEuclideanMetric, jitter_rate = 3.0, + tempering_rate = 3.0, max_depth = 10, Δ_max = 1000, + n_leapfrog = 20, δ = 0.65, λ = 0.3, progress = false, + verbose = false) +``` + +Algorithm for solving ordinary differential equations using a Bayesian neural network. This is a specialization +of the physics-informed neural network which is used as a solver for a standard `ODEProblem`. + +!!! warn + + Note that BNNODE only supports ODEs which are written in the out-of-place form, i.e. + `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the BNNODE + will exit with an error. + +## Positional Arguments + +* `chain`: A neural network architecture, defined as either a `Flux.Chain` or a `Lux.AbstractExplicitLayer`. +* `Kernel`: Choice of MCMC Sampling Algorithm. Defaults to `AdvancedHMC.HMC` + +## Keyword Arguments +(refer ahmc_bayesian_pinn_ode() keyword arguments.) + +## Example + +```julia +linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t) +tspan = (0.0, 10.0) +u0 = 0.0 +p = [5.0, -5.0] +prob = ODEProblem(linear, u0, tspan, p) +linear_analytic = (u0, p, t) -> exp(-t / 5) * (u0 + sin(t)) + +sol = solve(prob, Tsit5(); saveat = 0.05) +u = sol.u[1:100] +time = sol.t[1:100] +x̂ = u .+ (u .* 0.2) .* randn(size(u)) +dataset = [x̂, time] + +chainlux = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1)) + +alg = NeuralPDE.BNNODE(chainlux, draw_samples = 2000, + l2std = [0.05], phystd = [0.05], + priorsNNw = (0.0, 3.0), + n_leapfrog = 30, progress = true) + +sol_lux = solve(prob, alg) + +# with parameter estimation +alg = NeuralPDE.BNNODE(chainlux,dataset = dataset, + draw_samples = 2000,l2std = [0.05], + phystd = [0.05],priorsNNw = (0.0, 10.0), + param = [Normal(6.5, 0.5), Normal(-3, 0.5)], + n_leapfrog = 30, progress = true) + +sol_lux_pestim = solve(prob, alg) +``` + +## Solution Notes + +Note that the solution is evaluated at fixed time points according to the strategy chosen. +ensemble solution is evaluated and given at steps of `saveat`. +Dataset should only be provided when ODE parameter Estimation is being done. +The neural network is a fully continuous solution so `BPINNsolution` +is an accurate interpolation (up to the neural network training result). In addition, the +`BPINNstats` is returned as `sol.fullsolution` for further analysis. + +## References + +Liu Yanga, Xuhui Menga, George Em Karniadakis. "B-PINNs: Bayesian Physics-Informed Neural Networks for +Forward and Inverse PDE Problems with Noisy Data" + +Kevin Linka, Amelie Schäfer, Xuhui Meng, Zongren Zou, George Em Karniadakis, Ellen Kuhl. +"Bayesian Physics Informed Neural Networks for real-world nonlinear dynamical systems" + +""" +struct BNNODE{C, K, ST <: Union{Nothing, AbstractTrainingStrategy}, IT, A, M, + I <: Union{Nothing, Vector{<:AbstractFloat}}, + P <: Union{Vector{Nothing}, Vector{<:Distribution}}, + D <: + Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}} <: + NeuralPDEAlgorithm + chain::C + Kernel::K + strategy::ST + draw_samples::Int64 + priorsNNw::Tuple{Float64, Float64} + param::P + l2std::Vector{Float64} + phystd::Vector{Float64} + dataset::D + init_params::I + physdt::Float64 + nchains::Int64 + autodiff::Bool + Integrator::IT + Adaptor::A + targetacceptancerate::Float64 + Metric::M + jitter_rate::Float64 + tempering_rate::Float64 + max_depth::Int64 + Δ_max::Int64 + n_leapfrog::Int64 + δ::Float64 + λ::Float64 + progress::Bool + verbose::Bool + + function BNNODE(chain, Kernel = HMC; strategy = nothing, + draw_samples = 2000, + priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05], + phystd = [0.05], dataset = [nothing], + init_params = nothing, + physdt = 1 / 20.0, nchains = 1, + autodiff = false, Integrator = Leapfrog, + Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8, + Metric = DiagEuclideanMetric, jitter_rate = 3.0, + tempering_rate = 3.0, max_depth = 10, Δ_max = 1000, + n_leapfrog = 20, δ = 0.65, λ = 0.3, progress = false, + verbose = false) + new{typeof(chain), typeof(Kernel), typeof(strategy), typeof(Integrator), + typeof(Adaptor), + typeof(Metric), typeof(init_params), typeof(param), + typeof(dataset)}(chain, Kernel, strategy, draw_samples, + priorsNNw, param, l2std, + phystd, dataset, init_params, + physdt, nchains, autodiff, Integrator, + Adaptor, targetacceptancerate, + Metric, jitter_rate, tempering_rate, + max_depth, Δ_max, n_leapfrog, + δ, λ, progress, verbose) + end +end + +""" +Contains ahmc_bayesian_pinn_ode() function output: +1> a MCMCChains.jl chain object for sampled parameters +2> The set of all sampled parameters +3> statistics like: + > n_steps + > acceptance_rate + > log_density + > hamiltonian_energy + > hamiltonian_energy_error + > numerical_error + > step_size + > nom_step_size +""" +struct BPINNstats{MC, S, ST} + mcmc_chain::MC + samples::S + statistics::ST +end + +""" +BPINN Solution contains the original solution from AdvancedHMC.jl sampling(BPINNstats contains fields related to that) +> ensemblesol is the Probabilistic Estimate(MonteCarloMeasurements.jl Particles type) of Ensemble solution from All Neural Network's(made using all sampled parameters) output's. +> estimated_nn_params - Probabilistic Estimate of NN params from sampled weights,biases +> estimated_ode_params - Probabilistic Estimate of ODE params from sampled unknown ode paramters +""" +struct BPINNsolution{O <: BPINNstats, E, + NP <: Vector{<:MonteCarloMeasurements.Particles{<:Float64}}, + OP <: Union{Vector{Nothing}, + Vector{<:MonteCarloMeasurements.Particles{<:Float64}}}} + original::O + ensemblesol::E + estimated_nn_params::NP + estimated_ode_params::OP + + function BPINNsolution(original, ensemblesol, estimated_nn_params, estimated_ode_params) + new{typeof(original), typeof(ensemblesol), typeof(estimated_nn_params), + typeof(estimated_ode_params)}(original, ensemblesol, estimated_nn_params, + estimated_ode_params) + end +end + +function DiffEqBase.__solve(prob::DiffEqBase.ODEProblem, + alg::BNNODE, + args...; + dt = nothing, + timeseries_errors = true, + save_everystep = true, + adaptive = false, + abstol = 1.0f-6, + reltol = 1.0f-3, + verbose = false, + saveat = 1 / 50.0, + maxiters = nothing, + numensemble = floor(Int, alg.draw_samples / 3)) + @unpack chain, l2std, phystd, param, priorsNNw, Kernel, strategy, + draw_samples, dataset, init_params, Integrator, Adaptor, Metric, + nchains, max_depth, Δ_max, n_leapfrog, physdt, targetacceptancerate, + jitter_rate, tempering_rate, δ, λ, autodiff, progress, verbose = alg + + # ahmc_bayesian_pinn_ode needs param=[] for easier vcat operation for full vector of parameters + param = param == [nothing] ? [] : param + strategy = strategy === nothing ? GridTraining : strategy + + if draw_samples < 0 + throw(error("Number of samples to be drawn has to be >=0.")) + end + + mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(prob, chain, + strategy = strategy, dataset = dataset, + draw_samples = draw_samples, + init_params = init_params, + physdt = physdt, l2std = l2std, + phystd = phystd, + priorsNNw = priorsNNw, + param = param, + nchains = nchains, + autodiff = autodiff, + Kernel = Kernel, + Integrator = Integrator, + Adaptor = Adaptor, + targetacceptancerate = targetacceptancerate, + Metric = Metric, + jitter_rate = jitter_rate, + tempering_rate = tempering_rate, + max_depth = max_depth, + Δ_max = Δ_max, + n_leapfrog = n_leapfrog, δ = δ, + λ = λ, progress = progress, + verbose = verbose) + + fullsolution = BPINNstats(mcmcchain, samples, statistics) + ninv = length(param) + t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2]) + + if chain isa Lux.AbstractExplicitLayer + θinit, st = Lux.setup(Random.default_rng(), chain) + θ = [vector_to_parameters(samples[i][1:(end - ninv)], θinit) + for i in (draw_samples - numensemble):draw_samples] + luxar = [chain(t', θ[i], st)[1] for i in 1:numensemble] + # only need for size + θinit = collect(ComponentArrays.ComponentArray(θinit)) + elseif chain isa Flux.Chain + θinit, re1 = Flux.destructure(chain) + out = re1.([samples[i][1:(end - ninv)] + for i in (draw_samples - numensemble):draw_samples]) + luxar = collect(out[i](t') for i in eachindex(out)) + else + throw(error("Only Lux.AbstractExplicitLayer and Flux.Chain neural networks are supported")) + end + + # contructing ensemble predictions + ensemblecurves = Vector{}[] + # check if NN output is more than 1 + numoutput = size(luxar[1])[1] + if numoutput > 1 + # Initialize a vector to store the separated outputs for each output dimension + output_matrices = [Vector{Vector{Float32}}() for _ in 1:numoutput] + + # Loop through each element in `luxar` + for element in luxar + for i in 1:numoutput + push!(output_matrices[i], element[i, :]) # Append the i-th output (i-th row) to the i-th output_matrices + end + end + + for r in 1:numoutput + ensem_r = hcat(output_matrices[r]...)' + ensemblecurve_r = prob.u0[r] .+ + [Particles(ensem_r[:, i]) for i in 1:length(t)] .* + (t .- prob.tspan[1]) + push!(ensemblecurves, ensemblecurve_r) + end + + else + ensemblecurve = prob.u0 .+ + [Particles(reduce(vcat, luxar)[:, i]) for i in 1:length(t)] .* + (t .- prob.tspan[1]) + push!(ensemblecurves, ensemblecurve) + end + + nnparams = length(θinit) + estimnnparams = [Particles(reduce(hcat, samples)[i, :]) for i in 1:nnparams] + + if ninv == 0 + estimated_params = [nothing] + else + estimated_params = [Particles(reduce(hcat, samples[(end - ninv + 1):end])[i, :]) + for i in (nnparams + 1):(nnparams + ninv)] + end + + BPINNsolution(fullsolution, ensemblecurves, estimnnparams, estimated_params) +end \ No newline at end of file diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl index c5cddfddff..945093ea04 100644 --- a/src/NeuralPDE.jl +++ b/src/NeuralPDE.jl @@ -23,6 +23,7 @@ using Symbolics using Symbolics: wrap, unwrap, arguments, operation using SymbolicUtils using AdvancedHMC, LogDensityProblems, LinearAlgebra, Functors, MCMCChains +using MonteCarloMeasurements import ModelingToolkit: value, nameof, toexpr, build_expr, expand_derivatives import DomainSets: Domain, ClosedInterval @@ -50,6 +51,7 @@ include("transform_inf_integral.jl") include("discretize.jl") include("neural_adapter.jl") include("advancedHMC_MCMC.jl") +include("BPINN_ode.jl") export NNODE, TerminalPDEProblem, NNPDEHan, NNPDENS, NNRODE, KolmogorovPDEProblem, NNKolmogorov, NNStopping, ParamKolmogorovPDEProblem, @@ -63,6 +65,6 @@ export NNODE, TerminalPDEProblem, NNPDEHan, NNPDENS, NNRODE, build_symbolic_equation, build_symbolic_loss_function, symbolic_discretize, AbstractAdaptiveLoss, NonAdaptiveLoss, GradientScaleAdaptiveLoss, MiniMaxAdaptiveLoss, - LogOptions, ahmc_bayesian_pinn_ode + LogOptions, ahmc_bayesian_pinn_ode, BNNODE end # module diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl index 6b7a1cfce3..9bd4243cf6 100644 --- a/src/advancedHMC_MCMC.jl +++ b/src/advancedHMC_MCMC.jl @@ -1,10 +1,13 @@ -mutable struct LogTargetDensity{C, S, I, P <: Vector{<:Distribution}, - D <: Vector{<:Vector{<:AbstractFloat}} - } +mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I, + P <: Vector{<:Distribution}, + D <: + Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}, +} dim::Int prob::DiffEqBase.ODEProblem chain::C st::S + strategy::ST dataset::D priors::P phystd::Vector{Float64} @@ -14,70 +17,56 @@ mutable struct LogTargetDensity{C, S, I, P <: Vector{<:Distribution}, extraparams::Int init_params::I - function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, dataset, - priors, phystd, l2std, autodiff, physdt, extraparams, - init_params::AbstractVector) - new{typeof(chain), Nothing, typeof(init_params), typeof(priors), typeof(dataset)}(dim, - prob, - chain, - nothing, - dataset, - priors, - phystd, - l2std, - autodiff, - physdt, - extraparams, - init_params) + function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy, + dataset, + priors, phystd, l2std, autodiff, physdt, extraparams, + init_params::AbstractVector) + new{ + typeof(chain), + Nothing, + typeof(strategy), + typeof(init_params), + typeof(priors), + typeof(dataset), + }(dim, + prob, + chain, + nothing, strategy, + dataset, + priors, + phystd, + l2std, + autodiff, + physdt, + extraparams, + init_params) end - function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, dataset, - priors, phystd, l2std, autodiff, physdt, extraparams, - init_params::NamedTuple) - new{typeof(chain), typeof(st), typeof(init_params), typeof(priors), typeof(dataset) - }(dim, - prob, - chain, st, - dataset, priors, - phystd, l2std, - autodiff, - physdt, - extraparams, - init_params) + function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy, + dataset, + priors, phystd, l2std, autodiff, physdt, extraparams, + init_params::NamedTuple) + new{ + typeof(chain), + typeof(st), + typeof(strategy), + typeof(init_params), + typeof(priors), + typeof(dataset), + }(dim, + prob, + chain, st, strategy, + dataset, priors, + phystd, l2std, + autodiff, + physdt, + extraparams, + init_params) end end -function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ) - return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ) -end - -LogDensityProblems.dimension(Tar::LogTargetDensity) = Tar.dim - -function LogDensityProblems.capabilities(::LogTargetDensity) - LogDensityProblems.LogDensityOrder{1}() -end - -function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params) - θ, st = Lux.setup(Random.default_rng(), chain) - return init_params, chain, st -end - -function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing) - θ, st = Lux.setup(Random.default_rng(), chain) - return θ, chain, st -end - -function generate_Tar(chain::Flux.Chain, init_params) - θ, re = Flux.destructure(chain) - return init_params, re, nothing -end - -function generate_Tar(chain::Flux.Chain, init_params::Nothing) - θ, re = Flux.destructure(chain) - # find_good_stepsize,phasepoint takes only float64 - return θ, re, nothing -end - -# For vector of samples to Lux ComponentArrays +""" +cool function to convert parameter's vector to ComponentArray of parameters (for Lux Chain: vector of samples -> Lux ComponentArrays) +""" function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple) @assert length(ps_new) == Lux.parameterlength(ps) i = 1 @@ -89,56 +78,49 @@ function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple) return Functors.fmap(get_ps, ps) end -# nn OUTPUT AT t -function (f::LogTargetDensity{C, S})(t::AbstractVector, - θ) where {C <: Optimisers.Restructure, S} - f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* f.chain(θ)(adapt(parameterless_type(θ), t')) -end - -function (f::LogTargetDensity{C, S})(t::AbstractVector, - θ) where {C <: Lux.AbstractExplicitLayer, S} - θ = vector_to_parameters(θ, f.init_params) - y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st) - ChainRulesCore.@ignore_derivatives f.st = st - f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y +function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ) + return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ) end -function (f::LogTargetDensity{C, S})(t::Number, - θ) where {C <: Optimisers.Restructure, S} - # must handle paired odes hence u0 broadcasted - f.prob.u0 .+ (t - f.prob.tspan[1]) * f.chain(θ)(adapt(parameterless_type(θ), [t])) -end +LogDensityProblems.dimension(Tar::LogTargetDensity) = Tar.dim -function (f::LogTargetDensity{C, S})(t::Number, - θ) where {C <: Lux.AbstractExplicitLayer, S} - θ = vector_to_parameters(θ, f.init_params) - y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st) - ChainRulesCore.@ignore_derivatives f.st = st - f.prob.u0 .+ (t .- f.prob.tspan[1]) .* y +function LogDensityProblems.capabilities(::LogTargetDensity) + LogDensityProblems.LogDensityOrder{1}() end -# ODE DU/DX -function NNodederi(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool) - if autodiff - hcat(ForwardDiff.derivative.(ti -> phi(ti, θ), t)...) +""" +L2 loss loglikelihood(needed for ODE parameter estimation) +""" +function L2LossData(Tar::LogTargetDensity, θ) + # check if dataset is provided + if Tar.dataset isa Vector{Nothing} || Tar.extraparams == 0 + return 0 else - (phi(t .+ sqrt(eps(eltype(t))), θ) - phi(t, θ)) ./ sqrt(eps(eltype(t))) + # matrix(each row corresponds to vector u's rows) + nn = Tar(Tar.dataset[end], θ[1:(length(θ) - Tar.extraparams)]) + + L2logprob = 0 + for i in 1:length(Tar.prob.u0) + # for u[i] ith vector must be added to dataset,nn[1,:] is the dx in lotka_volterra + L2logprob += logpdf(MvNormal(nn[i, :], + LinearAlgebra.Diagonal(map(abs2, + Tar.l2std[i] .* + ones(length(Tar.dataset[i]))))), + Tar.dataset[i]) + end + return L2logprob end end -# physics loglikelihood over problem timespan +""" +physics loglikelihood over problem timespan + dataset timepoints +""" function physloglikelihood(Tar::LogTargetDensity, θ) f = Tar.prob.f p = Tar.prob.p - dt = Tar.physdt - - # Timepoints to enforce Physics - if isempty(Tar.dataset[end]) - t = collect(eltype(dt), Tar.prob.tspan[1]:dt:Tar.prob.tspan[2]) - else - t = vcat(collect(eltype(dt), Tar.prob.tspan[1]:dt:Tar.prob.tspan[2]), - Tar.dataset[end]) - end + tspan = Tar.prob.tspan + autodiff = Tar.autodiff + strategy = Tar.strategy # parameter estimation chosen or not if Tar.extraparams > 0 @@ -149,13 +131,95 @@ function physloglikelihood(Tar::LogTargetDensity, θ) ode_params = p == SciMLBase.NullParameters() ? [] : p end - # train for NN deriative upon dataset as well as beyond but within timespan - autodiff = Tar.autodiff + return getlogpdf(strategy, Tar, f, autodiff, tspan, ode_params, θ) +end - # compare derivatives(matrix) +function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::Bool, + tspan, + ode_params, θ) + if Tar.dataset isa Vector{Nothing} + t = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2]) + else + t = vcat(collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2]), + Tar.dataset[end]) + end + + sum(innerdiff(Tar, f, autodiff, t, θ, + ode_params)) +end + +function getlogpdf(strategy::StochasticTraining, + Tar::LogTargetDensity, + f, + autodiff::Bool, + tspan, + ode_params, + θ) + if Tar.dataset isa Vector{Nothing} + t = [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)] + else + t = vcat([(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)], + Tar.dataset[end]) + end + + sum(innerdiff(Tar, f, autodiff, t, θ, + ode_params)) +end + +function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f, + autodiff::Bool, + tspan, + ode_params, θ) + function integrand(t::Number, θ) + innerdiff(Tar, f, autodiff, [t], θ, ode_params) + end + intprob = IntegralProblem(integrand, tspan[1], tspan[2], θ; nout = length(Tar.prob.u0)) + sol = solve(intprob, QuadGKJL(); abstol = strategy.abstol, reltol = strategy.reltol) + sum(sol.u) +end + +function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f, + autodiff::Bool, + tspan, + ode_params, θ) + minT = tspan[1] + maxT = tspan[2] + + weights = strategy.weights ./ sum(strategy.weights) + + N = length(weights) + points = strategy.points + + difference = (maxT - minT) / N + + data = Float64[] + for (index, item) in enumerate(weights) + temp_data = rand(1, trunc(Int, points * item)) .* difference .+ minT .+ + ((index - 1) * difference) + data = append!(data, temp_data) + end + + if Tar.dataset isa Vector{Nothing} + t = data + else + t = vcat(data, + Tar.dataset[end]) + end + + sum(innerdiff(Tar, f, autodiff, t, θ, + ode_params)) +end + +""" +MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ +""" +function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ, + ode_params) + + # Tar used for phi and LogTargetDensity object attributes access out = Tar(t, θ[1:(length(θ) - Tar.extraparams)]) - # reject samples case + # # reject samples case(write clear reason why) if any(isinf, out[:, 1]) || any(isinf, ode_params) return -Inf end @@ -163,55 +227,32 @@ function physloglikelihood(Tar::LogTargetDensity, θ) # this is a vector{vector{dx,dy}}(handle case single u(float passed)) if length(out[:, 1]) == 1 physsol = [f(out[:, i][1], - ode_params, - t[i]) + ode_params, + t[i]) for i in 1:length(out[1, :])] else physsol = [f(out[:, i], - ode_params, - t[i]) + ode_params, + t[i]) for i in 1:length(out[1, :])] end physsol = reduce(hcat, physsol) - # convert to matrix as nnsol nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff) - physlogprob = 0 - for i in 1:length(Tar.prob.u0) - # can add phystd[i] for u[i] - physlogprob += logpdf(MvNormal(nnsol[i, :], - LinearAlgebra.Diagonal(map(abs2, - Tar.phystd[i] .* - ones(length(physsol[i, :]))))), - physsol[i, :]) - end - return physlogprob -end - -# L2 losses loglikelihood(needed mainly for ODE parameter estimation) -function L2LossData(Tar::LogTargetDensity, θ) - # check if dataset is provided - if isempty(Tar.dataset[end]) || Tar.extraparams == 0 - return 0 - else - # matrix(each row corresponds to vector u's rows) - nn = Tar(Tar.dataset[end], θ[1:(length(θ) - Tar.extraparams)]) + vals = nnsol .- physsol - L2logprob = 0 - for i in 1:length(Tar.prob.u0) - # for u[i] ith vector must be added to dataset,nn[1,:] is the dx in lotka_volterra - L2logprob += logpdf(MvNormal(nn[i, :], - LinearAlgebra.Diagonal(map(abs2, - Tar.l2std[i] .* - ones(length(Tar.dataset[i]))))), - Tar.dataset[i]) - end - return L2logprob - end + # N dimensional vector if N outputs for NN(each row has logpdf of i[i] where u is vector of dependant variables) + return [logpdf(MvNormal(vals[i, :], + LinearAlgebra.Diagonal(map(abs2, + Tar.phystd[i] .* + ones(length(vals[i, :]))))), + zeros(length(vals[i, :]))) for i in 1:length(Tar.prob.u0)] end -# priors for NN parameters + ODE constants +""" +prior logpdf for NN parameters + ODE constants +""" function priorweights(Tar::LogTargetDensity, θ) allparams = Tar.priors # nn weights @@ -232,6 +273,68 @@ function priorweights(Tar::LogTargetDensity, θ) end end +function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params) + θ, st = Lux.setup(Random.default_rng(), chain) + return init_params, chain, st +end + +function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing) + θ, st = Lux.setup(Random.default_rng(), chain) + return θ, chain, st +end + +function generate_Tar(chain::Flux.Chain, init_params) + θ, re = Flux.destructure(chain) + return init_params, re, nothing +end + +function generate_Tar(chain::Flux.Chain, init_params::Nothing) + θ, re = Flux.destructure(chain) + # find_good_stepsize,phasepoint takes only float64 + return θ, re, nothing +end + +""" +nn OUTPUT AT t,θ ~ phi(t,θ) +""" +function (f::LogTargetDensity{C, S})(t::AbstractVector, + θ) where {C <: Optimisers.Restructure, S} + f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* f.chain(θ)(adapt(parameterless_type(θ), t')) +end + +function (f::LogTargetDensity{C, S})(t::AbstractVector, + θ) where {C <: Lux.AbstractExplicitLayer, S} + θ = vector_to_parameters(θ, f.init_params) + y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st) + ChainRulesCore.@ignore_derivatives f.st = st + f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y +end + +function (f::LogTargetDensity{C, S})(t::Number, + θ) where {C <: Optimisers.Restructure, S} + # must handle paired odes hence u0 broadcasted + f.prob.u0 .+ (t - f.prob.tspan[1]) * f.chain(θ)(adapt(parameterless_type(θ), [t])) +end + +function (f::LogTargetDensity{C, S})(t::Number, + θ) where {C <: Lux.AbstractExplicitLayer, S} + θ = vector_to_parameters(θ, f.init_params) + y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st) + ChainRulesCore.@ignore_derivatives f.st = st + f.prob.u0 .+ (t .- f.prob.tspan[1]) .* y +end + +""" +similar to ode_dfdx() in NNODE/ode_solve.jl +""" +function NNodederi(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool) + if autodiff + hcat(ForwardDiff.derivative.(ti -> phi(ti, θ), t)...) + else + (phi(t .+ sqrt(eps(eltype(t))), θ) - phi(t, θ)) ./ sqrt(eps(eltype(t))) + end +end + function kernelchoice(Kernel, max_depth, Δ_max, n_leapfrog, δ, λ) if Kernel == HMC Kernel(n_leapfrog) @@ -243,7 +346,7 @@ function kernelchoice(Kernel, max_depth, Δ_max, n_leapfrog, δ, λ) end function integratorchoice(Integrator, initial_ϵ, jitter_rate, - tempering_rate) + tempering_rate) if Integrator == JitteredLeapfrog Integrator(initial_ϵ, jitter_rate) elseif Integrator == TemperedLeapfrog @@ -263,8 +366,8 @@ end """ ```julia -ahmc_bayesian_pinn_ode(prob, chain; - dataset = [[]],init_params = nothing, +ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining, + dataset = [nothing],init_params = nothing, draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05], phystd = [0.05], priorsNNw = (0.0, 2.0), param = [],nchains = 1,autodiff = false, Kernel = HMC, @@ -274,6 +377,11 @@ ahmc_bayesian_pinn_ode(prob, chain; Δ_max = 1000, n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = false,verbose = false) ``` +!!! warn + + Note that ahmc_bayesian_pinn_ode() only supports ODEs which are written in the out-of-place form, i.e. + `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the ahmc_bayesian_pinn_ode() + will exit with an error. ## Example linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t) @@ -315,64 +423,68 @@ Dataset is required for accurate Parameter estimation + solving equations Incase you are only solving the Equations for solution, do not provide dataset ## Positional Arguments -prob -> DEProblem(out of place and the function signature should be f(u,p,t) -chain -> Lux/Flux Neural Netork which would be made the Bayesian PINN -dataset -> Vector containing Vectors of corresponding u,t values -init_params -> intial parameter values for BPINN (ideally for multiple chains different initializations preferred) -nchains -> number of chains you want to sample (random initialisation of params by default) -draw_samples -> number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples) -l2std -> standard deviation of BPINN predicition against L2 losses/Dataset -phystd -> standard deviation of BPINN predicition against Chosen Underlying ODE System -priorsNNw -> Vector of [mean, std] for BPINN parameter. Weights and Biases of BPINN are Normal Distributions by default -param -> Vector of chosen ODE parameters Distributions in case of Inverse problems. -autodiff -> Boolean Value for choice of Derivative Backend(default is numerical) -physdt -> Timestep for approximating ODE in it's Time domain. (1/20.0 by default) - -# AHMC still developing convenience structs so might need changes on new releases. -Kernel -> Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implemenations HMC/NUTS/HMCDA) -targetacceptancerate -> Target percentage(in decimal) of iterations in which the proposals were accepted(0.8 by default) -Integrator(jitter_rate, tempering_rate), Metric, Adaptor -> https://turinglang.org/AdvancedHMC.jl/stable/ -max_depth -> Maximum doubling tree depth (NUTS) -Δ_max -> Maximum divergence during doubling tree (NUTS) -n_leapfrog -> number of leapfrog steps for HMC -δ -> target acceptance probability for NUTS/HMCDA -λ -> target trajectory length for HMCDA -progress -> controls whether to show the progress meter or not. -verbose -> controls the verbosity. (Sample call args in AHMC) - -## References +* `prob`: DEProblem(out of place and the function signature should be f(u,p,t) +* `chain`: Lux/Flux Neural Netork which would be made the Bayesian PINN + +## Keyword Arguments +* `strategy`: The training strategy used to choose the points for the evaluations. By default GridTraining is used with given physdt discretization. +* `dataset`: Vector containing Vectors of corresponding u,t values +* `init_params`: intial parameter values for BPINN (ideally for multiple chains different initializations preferred) +* `nchains`: number of chains you want to sample (random initialisation of params by default) +* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples) +* `l2std`: standard deviation of BPINN predicition against L2 losses/Dataset +* `phystd`: standard deviation of BPINN predicition against Chosen Underlying ODE System +* `priorsNNw`: Vector of [mean, std] for BPINN parameter. Weights and Biases of BPINN are Normal Distributions by default +* `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems. +* `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical) +* `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default) + +# AHMC.jl is still developing convenience structs so might need changes on new releases. +* `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implemenations HMC/NUTS/HMCDA) +* `targetacceptancerate`: Target percentage(in decimal) of iterations in which the proposals were accepted(0.8 by default) +* `Integrator(jitter_rate, tempering_rate), Metric, Adaptor`: https://turinglang.org/AdvancedHMC.jl/stable/ +* `max_depth`: Maximum doubling tree depth (NUTS) +* `Δ_max`: Maximum divergence during doubling tree (NUTS) +* `n_leapfrog`: number of leapfrog steps for HMC +* `δ`: target acceptance probability for NUTS/HMCDA +* `λ`: target trajectory length for HMCDA +* `progress`: controls whether to show the progress meter or not. +* `verbose`: controls the verbosity. (Sample call args in AHMC) """ -# dataset would be (x̂,t) -# priors: pdf for W,b + pdf for ODE params -function ahmc_bayesian_pinn_ode(prob::DiffEqBase.DEProblem, chain; - dataset = [[]], - init_params = nothing, draw_samples = 1000, - physdt = 1 / 20.0, l2std = [0.05], - phystd = [0.05], priorsNNw = (0.0, 2.0), - param = [], nchains = 1, - autodiff = false, - Kernel = HMC, Integrator = Leapfrog, - Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8, - Metric = DiagEuclideanMetric, jitter_rate = 3.0, - tempering_rate = 3.0, max_depth = 10, Δ_max = 1000, - n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = false, - verbose = false) +""" +dataset would be (x̂,t) +priors: pdf for W,b + pdf for ODE params +""" +function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain; + strategy = GridTraining, dataset = [nothing], + init_params = nothing, draw_samples = 1000, + physdt = 1 / 20.0, l2std = [0.05], + phystd = [0.05], priorsNNw = (0.0, 2.0), + param = [], nchains = 1, autodiff = false, + Kernel = HMC, Integrator = Leapfrog, + Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8, + Metric = DiagEuclideanMetric, jitter_rate = 3.0, + tempering_rate = 3.0, max_depth = 10, Δ_max = 1000, + n_leapfrog = 10, δ = 0.65, λ = 0.3, progress = false, + verbose = false) # NN parameter prior mean and variance(PriorsNN must be a tuple) if isinplace(prob) throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t).")) end - if dataset != [] && + strategy = strategy == GridTraining ? strategy(physdt) : strategy + + if dataset != [nothing] && (length(dataset) < 2 || !(typeof(dataset) <: Vector{<:Vector{<:AbstractFloat}})) throw(error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}")) end - if dataset != [] && param == [] + if dataset != [nothing] && param == [] println("Dataset is only needed for Parameter Estimation + Forward Problem, not in only Forward Problem case.") - elseif dataset == [] && param != [] + elseif dataset == [nothing] && param != [] throw(error("Dataset Required for Parameter Estimation.")) end @@ -393,7 +505,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.DEProblem, chain; if chain isa Lux.AbstractExplicitLayer # Lux chain(using component array later as vector_to_parameter need namedtuple) initial_θ = collect(eltype(physdt), - vcat(ComponentArrays.ComponentArray(initial_nnθ))) + vcat(ComponentArrays.ComponentArray(initial_nnθ))) else initial_θ = collect(eltype(physdt), initial_nnθ) end @@ -403,7 +515,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.DEProblem, chain; ninv = length(param) priors = [ MvNormal(priorsNNw[1] * ones(nparameters), - LinearAlgebra.Diagonal(map(abs2, priorsNNw[2] .* ones(nparameters)))), + LinearAlgebra.Diagonal(map(abs2, priorsNNw[2] .* ones(nparameters)))), ] # append Ode params to all paramvector @@ -416,9 +528,8 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.DEProblem, chain; t0 = prob.tspan[1] # dimensions would be total no of params,initial_nnθ for Lux namedTuples - ℓπ = LogTargetDensity(nparameters, prob, recon, st, dataset, priors, - phystd, l2std, autodiff, physdt, ninv, - initial_nnθ) + ℓπ = LogTargetDensity(nparameters, prob, recon, st, strategy, dataset, priors, + phystd, l2std, autodiff, physdt, ninv, initial_nnθ) try ℓπ(t0, initial_θ[1:(nparameters - ninv)]) @@ -444,16 +555,16 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.DEProblem, chain; Threads.@threads for i in 1:nchains # each chain has different initial NNparameter values(better posterior exploration) initial_θ = vcat(randn(nparameters - ninv), - initial_θ[(nparameters - ninv + 1):end]) + initial_θ[(nparameters - ninv + 1):end]) initial_ϵ = find_good_stepsize(hamiltonian, initial_θ) integrator = integratorchoice(Integrator, initial_ϵ, jitter_rate, - tempering_rate) + tempering_rate) adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric), - StepSizeAdaptor(targetacceptancerate, integrator)) + StepSizeAdaptor(targetacceptancerate, integrator)) Kernel = AdvancedHMC.make_kernel(kernelchoice(Kernel, max_depth, Δ_max, - n_leapfrog, δ, λ), integrator) + n_leapfrog, δ, λ), integrator) samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor; - progress = progress, verbose = verbose) + progress = progress, verbose = verbose) samplesc[i] = samples statsc[i] = stats @@ -466,11 +577,11 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.DEProblem, chain; initial_ϵ = find_good_stepsize(hamiltonian, initial_θ) integrator = integratorchoice(Integrator, initial_ϵ, jitter_rate, tempering_rate) adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric), - StepSizeAdaptor(targetacceptancerate, integrator)) + StepSizeAdaptor(targetacceptancerate, integrator)) Kernel = AdvancedHMC.make_kernel(kernelchoice(Kernel, max_depth, Δ_max, n_leapfrog, - δ, λ), integrator) + δ, λ), integrator) samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, - adaptor; progress = progress, verbose = verbose) + adaptor; progress = progress, verbose = verbose) # return a chain(basic chain),samples and stats matrix_samples = hcat(samples...) diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl index 3fabbdfc3b..b04483015b 100644 --- a/test/BPINN_Tests.jl +++ b/test/BPINN_Tests.jl @@ -1,9 +1,12 @@ -# Testing Code +# # Testing Code using Test, MCMCChains using ForwardDiff, Distributions, OrdinaryDiffEq -using NeuralPDE, Flux, OptimizationOptimisers, AdvancedHMC, Lux +using Flux, OptimizationOptimisers, AdvancedHMC, Lux using Statistics, Random, Functors, ComponentArrays +using NeuralPDE, MonteCarloMeasurements +# note that current testing bounds can be easily further tightened but have been inflated for support for Julia build v1 +# on latest Julia version it performs much better for below tests Random.seed!(100) # for sampled params->lux ComponentArray @@ -24,57 +27,74 @@ linear = (u, p, t) -> cos(2 * π * t) tspan = (0.0, 2.0) u0 = 0.0 prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan) +p = prob.p -# Numerical and Analytical Solutions +# Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode() ta = range(tspan[1], tspan[2], length = 300) u = [linear_analytic(u0, nothing, ti) for ti in ta] -sol1 = solve(prob, Tsit5()) - -# BPINN AND TRAINING DATASET CREATION, NN create, Reconstruct x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u))) time = vec(collect(Float64, ta)) -dataset = [x̂[1:100], time[1:100]] +physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)] + +# testing points for solve() call must match saveat(1/50.0) arg +ta0 = range(tspan[1], tspan[2], length = 101) +u1 = [linear_analytic(u0, nothing, ti) for ti in ta0] +x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1))) +time1 = vec(collect(Float64, ta0)) +physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)] -# Call BPINN, create chain -chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> f64 +chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64 chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1)) +init1, re1 = destructure(chainflux) +θinit, st = Lux.setup(Random.default_rng(), chainlux) fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux, - dataset = dataset, - draw_samples = 2500, - n_leapfrog = 30) + draw_samples = 2500, + n_leapfrog = 30) fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux, - dataset = dataset, - draw_samples = 2500, - n_leapfrog = 30) + draw_samples = 2500, + n_leapfrog = 30) -init1, re1 = destructure(chainflux) -θinit, st = Lux.setup(Random.default_rng(), chainlux) +# can change training strategies by adding this to call (Quadratuer and GridTraining show good results but stochastics sampling techniques perform bad) +# strategy = QuadratureTraining(; quadrature_alg = QuadGKJL(), +# reltol = 1e-6, +# abstol = 1e-3, maxiters = 1000, +# batch = 0) -# TESTING TIMEPOINTS TO PLOT ON,Actual Sols and actual data -t = time -p = prob.p -physsol1 = [linear_analytic(prob.u0, p, t[i]) for i in eachindex(t)] -physsol2 = [linear(physsol1[i], p, t[i]) for i in eachindex(t)] +alg = NeuralPDE.BNNODE(chainflux, draw_samples = 2500, + n_leapfrog = 30) +sol1flux = solve(prob, alg) -# Mean of last 1000 sampled parameter's curves(flux and lux chains)[Ensemble predictions] +alg = NeuralPDE.BNNODE(chainlux, draw_samples = 2500, + n_leapfrog = 30) +sol1lux = solve(prob, alg) + +# testing points +t = time +# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions] out = re1.(fhsamples1[(end - 500):end]) yu = collect(out[i](t') for i in eachindex(out)) fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)] meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean -θ = [vector_to_parameters(fhsamples2[i], θinit) for i in 2000:2500] +θ = [vector_to_parameters(fhsamples1[i], θinit) for i in 2000:2500] luxar = [chainlux(t', θ[i], st)[1] for i in 1:500] luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)] meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean +# --------------------- ahmc_bayesian_pinn_ode() call @test mean(abs.(x̂ .- meanscurve1)) < 0.05 @test mean(abs.(physsol1 .- meanscurve1)) < 0.005 @test mean(abs.(x̂ .- meanscurve2)) < 0.05 @test mean(abs.(physsol1 .- meanscurve2)) < 0.005 -println("now parameter estimation problem 1") +#--------------------- solve() call +@test mean(abs.(x̂1 .- sol1flux.ensemblesol[1])) < 0.05 +@test mean(abs.(physsol0_1 .- sol1flux.ensemblesol[1])) < 0.05 +@test mean(abs.(x̂1 .- sol1lux.ensemblesol[1])) < 0.05 +@test mean(abs.(physsol0_1 .- sol1lux.ensemblesol[1])) < 0.05 + ## PROBLEM-1 (WITH PARAMETER ESTIMATION) linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p) linear = (u, p, t) -> cos(p * t) @@ -88,45 +108,74 @@ sol1 = solve(prob, Tsit5(); saveat = 0.01) u = sol1.u time = sol1.t -# BPINN AND TRAINING DATASET CREATION -ta = range(tspan[1], tspan[2], length = 200) +# BPINN AND TRAINING DATASET CREATION(dataset must be defined only inside problem timespan!) +ta = range(tspan[1], tspan[2], length = 100) u = [linear_analytic(u0, p, ti) for ti in ta] -x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u))) +x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u))) time = vec(collect(Float64, ta)) -dataset = [x̂[1:50], time[1:50]] +dataset = [x̂, time] +physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)] -# comparing how diff NNs capture non-linearity -chainflux1 = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> f64 +# testing points for solve call(saveat=1/50.0 ∴ at t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2] internally estimates) +ta0 = range(tspan[1], tspan[2], length = 101) +u1 = [linear_analytic(u0, p, ti) for ti in ta0] +x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1))) +time1 = vec(collect(Float64, ta0)) +physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)] + +chainflux1 = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64 chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1)) +init1, re1 = destructure(chainflux1) +θinit, st = Lux.setup(Random.default_rng(), chainlux1) fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux1, - dataset = dataset, - draw_samples = 2500, - physdt = 1 / 50.0f0, - priorsNNw = (0.0, 3.0), - param = [LogNormal(9, 0.5)], - Metric = DiagEuclideanMetric, - n_leapfrog = 30) + dataset = dataset, + draw_samples = 2500, + physdt = 1 / 50.0f0, + priorsNNw = (0.0, + 3.0), + param = [ + LogNormal(9, + 0.5), + ], + Metric = DiagEuclideanMetric, + n_leapfrog = 30) fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux1, - dataset = dataset, - draw_samples = 2500, - physdt = 1 / 50.0f0, - priorsNNw = (0.0, 3.0), - param = [LogNormal(9, 0.5)], - Metric = DiagEuclideanMetric, - n_leapfrog = 30) - -init1, re1 = destructure(chainflux1) -θinit, st = Lux.setup(Random.default_rng(), chainlux1) - -# PLOT testing points + dataset = dataset, + draw_samples = 2500, + physdt = 1 / 50.0f0, + priorsNNw = (0.0, 3.0), + param = [LogNormal(9, 0.5)], + Metric = DiagEuclideanMetric, + n_leapfrog = 30) + +alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset, + draw_samples = 2500, physdt = 1 / 50.0f0, + priorsNNw = (0.0, 3.0), + param = [LogNormal(9, 0.5)], + Metric = DiagEuclideanMetric, + n_leapfrog = 30) + +sol2flux = solve(prob, alg) + +alg = NeuralPDE.BNNODE(chainlux1, dataset = dataset, + draw_samples = 2500, + physdt = 1 / 50.0f0, + priorsNNw = (0.0, + 3.0), + param = [ + LogNormal(9, + 0.5), + ], + Metric = DiagEuclideanMetric, + n_leapfrog = 30) + +sol2lux = solve(prob, alg) + +# testing points t = time -p = prob.p -physsol1 = [linear_analytic(prob.u0, p, t[i]) for i in eachindex(t)] -physsol2 = [linear(physsol1[i], p, t[i]) for i in eachindex(t)] - -# Mean of last 1000 sampled parameter's curves(flux and lux chains)[Ensemble predictions] +# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions] out = re1.([fhsamples1[i][1:22] for i in 2000:2500]) yu = collect(out[i](t') for i in eachindex(out)) fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)] @@ -137,142 +186,181 @@ luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500] luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)] meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean -@test mean(abs.(x̂ .- meanscurve1)) < 5e-1 -@test mean(abs.(physsol1 .- meanscurve1)) < 5e-1 -@test mean(abs.(x̂ .- meanscurve2)) < 5e-2 -@test mean(abs.(physsol1 .- meanscurve2)) < 5e-2 +# --------------------- ahmc_bayesian_pinn_ode() call +@test mean(abs.(physsol1 .- meanscurve1)) < 0.15 +@test mean(abs.(physsol1 .- meanscurve2)) < 0.15 # ESTIMATED ODE PARAMETERS (NN1 AND NN2) -@test abs(p - mean([fhsamples2[i][23] for i in 2000:2500])) < abs(0.2 * p) -@test abs(p - mean([fhsamples1[i][23] for i in 2000:2500])) < abs(0.2 * p) +@test abs(p - mean([fhsamples2[i][23] for i in 2000:2500])) < abs(0.25 * p) +@test abs(p - mean([fhsamples1[i][23] for i in 2000:2500])) < abs(0.25 * p) + +#-------------------------- solve() call +@test mean(abs.(physsol1_1 .- sol2flux.ensemblesol[1])) < 8e-2 +@test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2 + +# ESTIMATED ODE PARAMETERS (NN1 AND NN2) +@test abs(p - sol2flux.estimated_ode_params[1]) < abs(0.15 * p) +@test abs(p - sol2lux.estimated_ode_params[1]) < abs(0.15 * p) ## PROBLEM-2 -linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t) +linear = (u, p, t) -> u / p + exp(t / p) * cos(t) tspan = (0.0, 10.0) u0 = 0.0 -p = [5.0, -5.0] +p = -5.0 prob = ODEProblem(linear, u0, tspan, p) +linear_analytic = (u0, p, t) -> exp(t / p) * (u0 + sin(t)) -# PROBLEM-2 -linear_analytic = (u0, p, t) -> exp(-t / 5) * (u0 + sin(t)) - -# PLOT SOLUTION AND CREATE DATASET -sol = solve(prob, Tsit5(); saveat = 0.05) -u = sol.u[1:100] -time = sol.t[1:100] - -# dataset and BPINN create -x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u))) +# SOLUTION AND CREATE DATASET +sol = solve(prob, Tsit5(); saveat = 0.1) +u = sol.u +time = sol.t +x̂ = u .+ (u .* 0.2) .* randn(size(u)) dataset = [x̂, time] +t = sol.t +physsol1 = [linear_analytic(prob.u0, p, t[i]) for i in eachindex(t)] + +ta0 = range(tspan[1], tspan[2], length = 501) +u1 = [linear_analytic(u0, p, ti) for ti in ta0] +time1 = vec(collect(Float64, ta0)) +physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)] chainflux12 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh), - Flux.Dense(6, 1)) |> f64 + Flux.Dense(6, 1)) |> Flux.f64 chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1)) +init1, re1 = destructure(chainflux12) +θinit, st = Lux.setup(Random.default_rng(), chainlux12) fh_mcmc_chainflux12, fhsamplesflux12, fhstatsflux12 = ahmc_bayesian_pinn_ode(prob, - chainflux12, - dataset = dataset, - draw_samples = 2000, - l2std = [0.05], - phystd = [ - 0.05, - ], - priorsNNw = (0.0, - 3.0), - n_leapfrog = 30) + chainflux12, + draw_samples = 1500, + l2std = [0.03], + phystd = [ + 0.03], + priorsNNw = (0.0, + 10.0), + n_leapfrog = 30) fh_mcmc_chainflux22, fhsamplesflux22, fhstatsflux22 = ahmc_bayesian_pinn_ode(prob, - chainflux12, - dataset = dataset, - draw_samples = 2000, - l2std = [0.05], - phystd = [ - 0.05, - ], - priorsNNw = (0.0, - 3.0), - param = [ - Normal(6.5, - 0.5), - Normal(-3, - 0.5), - ], - n_leapfrog = 30) + chainflux12, + dataset = dataset, + draw_samples = 1500, + l2std = [0.03], + phystd = [ + 0.03, + ], + priorsNNw = (0.0, + 10.0), + param = [ + Normal(-7, + 4), + ], + n_leapfrog = 30) fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(prob, chainlux12, - dataset = dataset, - draw_samples = 2000, - l2std = [0.05], - phystd = [0.05], - priorsNNw = (0.0, - 3.0), - n_leapfrog = 30) + draw_samples = 1500, + l2std = [0.03], + phystd = [0.03], + priorsNNw = (0.0, + 10.0), + n_leapfrog = 30) fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob, chainlux12, - dataset = dataset, - draw_samples = 2000, - l2std = [0.05], - phystd = [0.05], - priorsNNw = (0.0, - 3.0), - param = [ - Normal(6.5, - 0.5), - Normal(-3, - 0.5), - ], - n_leapfrog = 30) - -init1, re1 = destructure(chainflux12) -θinit, st = Lux.setup(Random.default_rng(), chainlux12) - -# PLOT testing points + dataset = dataset, + draw_samples = 1500, + l2std = [0.03], + phystd = [0.03], + priorsNNw = (0.0, + 10.0), + param = [ + Normal(-7, + 4), + ], + n_leapfrog = 30) + +alg = NeuralPDE.BNNODE(chainflux12, + dataset = dataset, + draw_samples = 1500, + l2std = [0.03], + phystd = [ + 0.03, + ], + priorsNNw = (0.0, + 10.0), + param = [ + Normal(-7, + 4), + ], + n_leapfrog = 30) + +sol3flux_pestim = solve(prob, alg) + +alg = NeuralPDE.BNNODE(chainlux12, + dataset = dataset, + draw_samples = 1500, + l2std = [0.03], + phystd = [0.03], + priorsNNw = (0.0, + 10.0), + param = [ + Normal(-7, + 4), + ], + n_leapfrog = 30) + +sol3lux_pestim = solve(prob, alg) + +# testing timepoints t = sol.t -p = prob.p -physsol1 = [linear_analytic(prob.u0, p, t[i]) for i in eachindex(t)] - +#------------------------------ ahmc_bayesian_pinn_ode() call # Mean of last 500 sampled parameter's curves(flux chains)[Ensemble predictions] -out = re1.([fhsamplesflux12[i][1:61] for i in 1500:2000]) +out = re1.([fhsamplesflux12[i][1:61] for i in 1000:1500]) yu = [out[i](t') for i in eachindex(out)] fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)] meanscurve1_1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean -@test mean(abs.(sol.u .- meanscurve1_1)) < 1e-2 -@test mean(abs.(physsol1 .- meanscurve1_1)) < 1e-2 - -out = re1.([fhsamplesflux22[i][1:61] for i in 1500:2000]) +out = re1.([fhsamplesflux22[i][1:61] for i in 1000:1500]) yu = [out[i](t') for i in eachindex(out)] fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)] meanscurve1_2 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean +@test mean(abs.(sol.u .- meanscurve1_1)) < 1e-2 +@test mean(abs.(physsol1 .- meanscurve1_1)) < 1e-2 @test mean(abs.(sol.u .- meanscurve1_2)) < 5e-2 @test mean(abs.(physsol1 .- meanscurve1_2)) < 5e-2 # estimated parameters(flux chain) -param1 = mean(i[62] for i in fhsamplesflux22[1500:2000]) -param2 = mean(i[63] for i in fhsamplesflux22[1500:2000]) -@test abs(param1 - p[1]) < abs(0.3 * p[1]) -@test abs(param2 - p[2]) < abs(0.3 * p[2]) +param1 = mean(i[62] for i in fhsamplesflux22[1000:1500]) +@test abs(param1 - p) < abs(0.3 * p) # Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions] -θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1500:2000] +θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1000:1500] luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500] luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)] meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean -@test mean(abs.(sol.u .- meanscurve2_1)) < 1e-2 -@test mean(abs.(physsol1 .- meanscurve2_1)) < 1e-2 - -θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 2)], θinit) for i in 1500:2000] +θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit) for i in 1000:1500] luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500] luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)] meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean +@test mean(abs.(sol.u .- meanscurve2_1)) < 1e-1 +@test mean(abs.(physsol1 .- meanscurve2_1)) < 1e-1 @test mean(abs.(sol.u .- meanscurve2_2)) < 5e-2 @test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2 # estimated parameters(lux chain) -param1 = mean(i[62] for i in fhsampleslux22[1500:2000]) -param2 = mean(i[63] for i in fhsampleslux22[1500:2000]) -@test abs(param1 - p[1]) < abs(0.3 * p[1]) -@test abs(param2 - p[2]) < abs(0.3 * p[2]) \ No newline at end of file +param1 = mean(i[62] for i in fhsampleslux22[1000:1500]) +@test abs(param1 - p) < abs(0.3 * p) + +#-------------------------- solve() call +# (flux chain) +@test mean(abs.(physsol2 .- sol3flux_pestim.ensemblesol[1])) < 0.15 +# estimated parameters(flux chain) +param1 = sol3flux_pestim.estimated_ode_params[1] +@test abs(param1 - p) < abs(0.45 * p) + +# (lux chain) +@test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15 +# estimated parameters(lux chain) +param1 = sol3lux_pestim.estimated_ode_params[1] +@test abs(param1 - p) < abs(0.45 * p) \ No newline at end of file