diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml index 9c79359112..320e0c0737 100644 --- a/.JuliaFormatter.toml +++ b/.JuliaFormatter.toml @@ -1,2 +1,3 @@ style = "sciml" -format_markdown = true \ No newline at end of file +format_markdown = true +annotate_untyped_fields_with_any = false diff --git a/Project.toml b/Project.toml index dbf147696f..f6c0064ff0 100644 --- a/Project.toml +++ b/Project.toml @@ -9,6 +9,7 @@ AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d" ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" +ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471" Cubature = "667455a9-e2ce-5579-9412-b964f529a492" DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" @@ -20,6 +21,7 @@ Integrals = "de52edbc-65ea-441a-8357-d3a637375a31" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" +LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78" MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca" @@ -35,6 +37,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" SymbolicUtils = "d1185830-fcd6-423d-90d6-eec64667417b" Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" +WeightInitializers = "d49dbf32-c5c2-4618-8acc-27bb2598ef2d" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] @@ -45,6 +48,7 @@ ArrayInterface = "7.11" CUDA = "5.5.2" ChainRulesCore = "1.24" ComponentArrays = "0.15.16" +ConcreteStructs = "0.2.3" Cubature = "1.5" DiffEqNoiseProcess = "5.20" Distributions = "0.25.107" @@ -81,6 +85,7 @@ SymbolicUtils = "3.7" Symbolics = "6.14" Test = "1.10" UnPack = "1" +WeightInitializers = "1.0.3" Zygote = "0.6.71" julia = "1.10" diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl index 45b99ede27..259e3f3cb2 100644 --- a/src/NeuralPDE.jl +++ b/src/NeuralPDE.jl @@ -30,9 +30,16 @@ using DomainSets: Domain, ClosedInterval, AbstractInterval, leftendpoint, righte ProductDomain using SciMLBase: @add_kwonly, parameterless_type using UnPack: @unpack -import ChainRulesCore, Lux, ComponentArrays +import ChainRulesCore, ComponentArrays using Lux: FromFluxAdaptor, recursive_eltype + using ChainRulesCore: @non_differentiable +using ConcreteStructs: @concrete +using Lux: Lux, Chain, Dense, SkipConnection +using LuxCore: AbstractLuxLayer, AbstractLuxWrapperLayer, AbstractLuxContainerLayer +using WeightInitializers: glorot_uniform, zeros32 + +import LuxCore: initialparameters, initialstates, parameterlength, statelength RuntimeGeneratedFunctions.init(@__MODULE__) diff --git a/src/dgm.jl b/src/dgm.jl index 44e60665de..f4d1d4a20e 100644 --- a/src/dgm.jl +++ b/src/dgm.jl @@ -1,22 +1,19 @@ -struct dgm_lstm_layer{F1, F2} <: Lux.AbstractLuxLayer - activation1::Function - activation2::Function +@concrete struct DGMLSTMLayer <: AbstractLuxLayer + activation1 + activation2 in_dims::Int out_dims::Int - init_weight::F1 - init_bias::F2 + init_weight + init_bias end -function dgm_lstm_layer(in_dims::Int, out_dims::Int, activation1, activation2; - init_weight = Lux.glorot_uniform, init_bias = Lux.zeros32) - return dgm_lstm_layer{typeof(init_weight), typeof(init_bias)}( - activation1, activation2, in_dims, out_dims, init_weight, init_bias) +function DGMLSTMLayer(in_dims::Int, out_dims::Int, activation1, activation2; + init_weight = glorot_uniform, init_bias = zeros32) + return DGMLSTMLayer(activation1, activation2, in_dims, out_dims, init_weight, init_bias) end -import Lux: initialparameters, initialstates, parameterlength, statelength - -function Lux.initialparameters(rng::AbstractRNG, l::dgm_lstm_layer) - return ( +function initialparameters(rng::AbstractRNG, l::DGMLSTMLayer) + return (; Uz = l.init_weight(rng, l.out_dims, l.in_dims), Ug = l.init_weight(rng, l.out_dims, l.in_dims), Ur = l.init_weight(rng, l.out_dims, l.in_dims), @@ -32,75 +29,43 @@ function Lux.initialparameters(rng::AbstractRNG, l::dgm_lstm_layer) ) end -Lux.initialstates(::AbstractRNG, ::dgm_lstm_layer) = NamedTuple() -function Lux.parameterlength(l::dgm_lstm_layer) - 4 * (l.out_dims * l.in_dims + l.out_dims * l.out_dims + l.out_dims) -end -Lux.statelength(l::dgm_lstm_layer) = 0 - -function (layer::dgm_lstm_layer)( - S::AbstractVecOrMat{T}, x::AbstractVecOrMat{T}, ps, st::NamedTuple) where {T} - @unpack Uz, Ug, Ur, Uh, Wz, Wg, Wr, Wh, bz, bg, br, bh = ps - Z = layer.activation1.(Uz * x + Wz * S .+ bz) - G = layer.activation1.(Ug * x + Wg * S .+ bg) - R = layer.activation1.(Ur * x + Wr * S .+ br) - H = layer.activation2.(Uh * x + Wh * (S .* R) .+ bh) - S_new = (1.0 .- G) .* H .+ Z .* S - return S_new, st -end - -struct dgm_lstm_block{L <: NamedTuple} <: Lux.AbstractLuxContainerLayer{(:layers,)} - layers::L -end - -function dgm_lstm_block(l...) - names = ntuple(i -> Symbol("dgm_lstm_$i"), length(l)) - layers = NamedTuple{names}(l) - return dgm_lstm_block(layers) +function parameterlength(l::DGMLSTMLayer) + return 4 * (l.out_dims * l.in_dims + l.out_dims * l.out_dims + l.out_dims) end -dgm_lstm_block(xs::AbstractVector) = dgm_lstm_block(xs...) - -@generated function apply_dgm_lstm_block(layers::NamedTuple{fields}, S::AbstractVecOrMat, - x::AbstractVecOrMat, ps, st::NamedTuple) where {fields} - N = length(fields) - S_symbols = vcat([:S], [gensym() for _ in 1:N]) - x_symbol = :x - st_symbols = [gensym() for _ in 1:N] - calls = [:(($(S_symbols[i + 1]), $(st_symbols[i])) = layers.$(fields[i])( - $(S_symbols[i]), $(x_symbol), ps.$(fields[i]), st.$(fields[i]))) - for i in 1:N] - push!(calls, :(st = NamedTuple{$fields}((($(Tuple(st_symbols)...),))))) - push!(calls, :(return $(S_symbols[N + 1]), st)) - return Expr(:block, calls...) -end - -function (L::dgm_lstm_block)( - S::AbstractVecOrMat{T}, x::AbstractVecOrMat{T}, ps, st::NamedTuple) where {T} - return apply_dgm_lstm_block(L.layers, S, x, ps, st) +# TODO: use more optimized versions from LuxLib +# XXX: Why not use the one from Lux? +function (layer::DGMLSTMLayer)((S, x), ps, st::NamedTuple) + (; Uz, Ug, Ur, Uh, Wz, Wg, Wr, Wh, bz, bg, br, bh) = ps + Z = layer.activation1.(Uz * x .+ Wz * S .+ bz) + G = layer.activation1.(Ug * x .+ Wg * S .+ bg) + R = layer.activation1.(Ur * x .+ Wr * S .+ br) + H = layer.activation2.(Uh * x .+ Wh * (S .* R) .+ bh) + S_new = (1 .- G) .* H .+ Z .* S + return S_new, st end -struct dgm{S, L, E} <: Lux.AbstractLuxContainerLayer{(:d_start, :lstm, :d_end)} - d_start::S - lstm::L - d_end::E +dgm_lstm_block_rearrange(mx, (S, x)) = (mx, x) + +function DGMLSTMBlock(layers...) + blocks = AbstractLuxLayer[] + for (i, layer) in enumerate(layers) + if i == length(layers) + push!(blocks, layer) + else + push!(blocks, SkipConnection(layer, dgm_lstm_block_rearrange)) + end + end + return Chain(blocks...) end -function (l::dgm)(x::AbstractVecOrMat{T}, ps, st::NamedTuple) where {T} - S, st_start = l.d_start(x, ps.d_start, st.d_start) - S, st_lstm = l.lstm(S, x, ps.lstm, st.lstm) - y, st_end = l.d_end(S, ps.d_end, st.d_end) - - st_new = ( - d_start = st_start, - lstm = st_lstm, - d_end = st_end - ) - return y, st_new +@concrete struct DGM <: AbstractLuxWrapperLayer{:model} + model end """ - dgm(in_dims::Int, out_dims::Int, modes::Int, L::Int, activation1, activation2, out_activation= Lux.identity) + DGM(in_dims::Int, out_dims::Int, modes::Int, L::Int, activation1, activation2, + out_activation=identity) returns the architecture defined for Deep Galerkin method. @@ -127,14 +92,13 @@ f(t, x, \\theta) &= \\sigma_{out}(W S^{L+1} + b). - `out_activation`: activation fn used for the output of the network. - `kwargs`: additional arguments to be splatted into [`PhysicsInformedNN`](@ref). """ -function dgm(in_dims::Int, out_dims::Int, modes::Int, layers::Int, +function DGM(in_dims::Int, out_dims::Int, modes::Int, layers::Int, activation1, activation2, out_activation) - dgm( - Lux.Dense(in_dims, modes, activation1), - dgm_lstm_block([dgm_lstm_layer(in_dims, modes, activation1, activation2) - for i in 1:layers]), - Lux.Dense(modes, out_dims, out_activation) - ) + return DGM(Chain(SkipConnection( + Dense(in_dims => modes, activation1; init_bias = zeros32), + DGMLSTMBlock([DGMLSTMLayer(in_dims, modes, activation1, activation2) + for _ in 1:layers]...), + Dense(modes => out_dims, out_activation; init_bias = zeros32)))) end """ @@ -168,8 +132,8 @@ function DeepGalerkin( in_dims::Int, out_dims::Int, modes::Int, L::Int, activation1::Function, activation2::Function, out_activation::Function, strategy::NeuralPDE.AbstractTrainingStrategy; kwargs...) - PhysicsInformedNN( - dgm(in_dims, out_dims, modes, L, activation1, activation2, out_activation), + return PhysicsInformedNN( + DGM(in_dims, out_dims, modes, L, activation1, activation2, out_activation), strategy; kwargs... ) end diff --git a/test/dgm_test.jl b/test/dgm_test.jl index de29888f96..5da01a0282 100644 --- a/test/dgm_test.jl +++ b/test/dgm_test.jl @@ -3,7 +3,6 @@ using NeuralPDE, Test using ModelingToolkit, Optimization, OptimizationOptimisers, Distributions, MethodOfLines, OrdinaryDiffEq import ModelingToolkit: Interval, infimum, supremum -import Lux: tanh, identity @testset "Poisson's equation" begin @parameters x y @@ -35,9 +34,9 @@ import Lux: tanh, identity return false end - res = Optimization.solve(prob, Adam(0.01); callback = callback, maxiters = 500) + res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.01); callback, maxiters = 500) prob = remake(prob, u0 = res.u) - res = Optimization.solve(prob, Adam(0.001); callback = callback, maxiters = 200) + res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.001); callback, maxiters = 200) phi = discretization.phi xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains] @@ -47,7 +46,7 @@ import Lux: tanh, identity (length(xs), length(ys))) u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys], (length(xs), length(ys))) - @test u_predict≈u_real atol=0.1 + @test u_predict≈u_real rtol=0.1 end @testset "Black-Scholes PDE: European Call Option" begin @@ -87,9 +86,9 @@ end return false end - res = Optimization.solve(prob, Adam(0.1); callback = callback, maxiters = 100) + res = Optimization.solve(prob, Adam(0.1); callback, maxiters = 500) prob = remake(prob, u0 = res.u) - res = Optimization.solve(prob, Adam(0.01); callback = callback, maxiters = 500) + res = Optimization.solve(prob, Adam(0.01); callback, maxiters = 500) phi = discretization.phi function analytical_soln(t, x, K, σ, T) diff --git a/test/runtests.jl b/test/runtests.jl index 7bd4194dc3..562a453e4d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -12,6 +12,7 @@ end @time begin if GROUP == "All" || GROUP == "QA" + # Failure is due to ModelingToolkit exporting @time @safetestset "Quality Assurance" include("qa.jl") end