From 98e0a94409381fecfed5d1b160bd5828a3fd3de4 Mon Sep 17 00:00:00 2001
From: Astitva Aggarwal <astitvaaggarwalimportant@gmail.com>
Date: Thu, 23 Nov 2023 12:14:11 +0530
Subject: [PATCH] include dataset points into physicsloglikelhiood

---
 src/PDE_BPINN.jl           | 114 +++++++--
 src/discretize.jl          |  33 ++-
 src/training_strategies.jl |  29 +++
 test/BPINN_PDE_tests.jl    | 485 +++++++++++++++++++++++++++----------
 4 files changed, 499 insertions(+), 162 deletions(-)

diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 652cfe1952..501b08de04 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -1,6 +1,6 @@
 mutable struct PDELogTargetDensity{
     ST <: AbstractTrainingStrategy,
-    D <: Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}},
+    D <: Union{Vector{Nothing}, Vector{<:Matrix{<:Real}}},
     P <: Vector{<:Distribution},
     I,
     F,
@@ -12,7 +12,7 @@ mutable struct PDELogTargetDensity{
     priors::P
     allstd::Vector{Vector{Float64}}
     names::Tuple
-    physdt::Float64
+    physdt::Vector{Float64}
     extraparams::Int
     init_params::I
     full_loglikelihood::F
@@ -42,7 +42,8 @@ mutable struct PDELogTargetDensity{
     end
     function PDELogTargetDensity(dim, strategy, dataset,
             priors, allstd, names, physdt, extraparams,
-            init_params::NamedTuple, full_loglikelihood, Phi)
+            init_params::Union{NamedTuple, ComponentArrays.ComponentVector},
+            full_loglikelihood, Phi)
         new{
             typeof(strategy),
             typeof(dataset),
@@ -126,22 +127,48 @@ end
 function L2loss2(Tar::PDELogTargetDensity, θ)
     return logpdf(MvNormal(pde(phi, Tar.dataset[end], θ)), zeros(length(pde_eqs)))
 end
+
 # L2 losses loglikelihood(needed mainly for ODE parameter estimation)
 function L2LossData(Tar::PDELogTargetDensity, θ)
+    Phi = Tar.Phi
+    init_params = Tar.init_params
+    dataset = Tar.dataset
+    sumt = 0
+    L2stds = Tar.allstd[3]
+    # each dep var has a diff dataset depending on its indep var and thier domains
+    # these datasets are matrices of first col-dep var and remaining cols-all indep var
+    # Tar.init_params is needed to contruct a vector of parameters into a ComponentVector
+
+    # dataset of form Vector[matrix_x, matrix_y, matrix_z]
+    # matrix_i is of form [i,indvar1,indvar2,..] (needed in case if heterogenous domains)
+
+    # Phi is the trial solution for each NN in chain array
+    # Creating logpdf( MvNormal(Phi(t,θ),std), dataset[i] )
+    # dataset[i][:, 2:end] -> indepvar cols of a particular depvar's dataset 
+    # dataset[i][:, 1] -> depvar col of depvar's dataset
+
     if Tar.extraparams > 0
         if Tar.init_params isa ComponentArrays.ComponentVector
-            return sum([logpdf(MvNormal(Tar.Phi[i](Tar.dataset[end]',
-                        vector_to_parameters(θ[1:(end - Tar.extraparams)],
-                            Tar.init_params)[Tar.names[i]])[1,
-                        :], ones(length(Tar.dataset[end])) .* Tar.allstd[3][i]), Tar.dataset[i])
-                        for i in eachindex(Tar.Phi)])
+            for i in eachindex(Phi)
+                sumt += logpdf(MvNormal(Phi[i](dataset[i][:, 2:end]',
+                            vector_to_parameters(θ[1:(end - Tar.extraparams)],
+                                init_params)[Tar.names[i]])[1,
+                            :],
+                        ones(size(dataset[i])[1]) .* L2stds[i]),
+                    dataset[i][:, 1])
+            end
+            sumt
         else
             # Flux case needs subindexing wrt Tar.names indices(hence stored in Tar.names)
-            return sum([logpdf(MvNormal(Tar.Phi[i](Tar.dataset[end]',
-                        vector_to_parameters(θ[1:(end - Tar.extraparams)],
-                            Tar.init_params)[Tar.names[2][i]])[1,
-                        :], ones(length(Tar.dataset[end])) .* Tar.allstd[3][i]), Tar.dataset[i])
-                        for i in eachindex(Tar.Phi)])
+            for i in eachindex(Phi)
+                sumt += logpdf(MvNormal(Phi[i](dataset[i][:, 2:end]',
+                            vector_to_parameters(θ[1:(end - Tar.extraparams)],
+                                init_params)[Tar.names[2][i]])[1,
+                            :],
+                        ones(size(dataset[i])[1]) .* L2stds[i]),
+                    dataset[i][:, 1])
+            end
+            sumt
         end
     else
         return 0
@@ -204,21 +231,58 @@ function adaptorchoice(Adaptor, mma, ssa)
     end
 end
 
-# dataset would be (x̂,t)
+# function inference(samples, discretization, saveat, numensemble, ℓπ)
+#     ranges = []
+#     for i in eachindex(domains)
+#         push!(ranges, [infimum(domains[i].domain), supremum(infimum(domains[i].domain))])
+#     end
+#     ranges = map(ranges) do x
+#         collect(x[1]:saveat:x[2])
+#     end
+#     samples = samples[(end - numensemble):end]
+#     chain = discretization.chain
+
+#     if discretization.multioutput && chain[1] isa Lux.AbstractExplicitLayer
+#         temp = [setparameters(ℓπ, samples[i]) for i in eachindex(samples)]
+
+#         luxar = map(temp) do x
+#             chain(t', x, st[i])
+#         end
+
+#     elseif discretization.multioutput && chain[1] isa Flux.chain
+
+#     elseif chain isa Flux.Chain
+#         re = Flux.destructure(chain)[2]
+#         out1 = re.([sample for sample in samples])
+#         luxar = [collect(out1[i](t') for t in ranges)]
+#         fluxmean = map(luxar) do x
+#             mean(vcat(x...)[:, i]) for i in eachindex(x)
+#         end
+#     else
+#         transsamples = [vector_to_parameters(sample, initl) for sample in samples]
+#         luxar2 = [chainl(t1', transsamples[i], st)[1] for i in 800:1000]
+#         luxmean = [mean(vcat(luxar2...)[:, i]) for i in eachindex(t1)]
+#     end
+# end
+
 # priors: pdf for W,b + pdf for ODE params
-# lotka specific kwargs here
 function ahmc_bayesian_pinn_pde(pde_system, discretization;
         strategy = GridTraining, dataset = [nothing],
-        init_params = nothing, draw_samples = 1000,
-        physdt = 1 / 20.0, bcstd = [0.01], l2std = [0.05],
+        draw_samples = 1000, physdt = [1 / 20.0],
+        bcstd = [0.01], l2std = [0.05],
         phystd = [0.05], priorsNNw = (0.0, 2.0),
         param = [], nchains = 1, Kernel = HMC,
         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
             Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
         Integratorkwargs = (Integrator = Leapfrog,),
-        MCMCkwargs = (n_leapfrog = 30,),
+        MCMCkwargs = (n_leapfrog = 30,), saveat = 1 / 50.0,
+        numensemble = 100,
+        #  floor(Int, alg.draw_samples / 3),
         progress = false, verbose = false)
-    pinnrep = symbolic_discretize(pde_system, discretization, bayesian = true)
+    pinnrep = symbolic_discretize(pde_system,
+        discretization,
+        bayesian = true,
+        dataset_given = dataset)
 
     # for physics loglikelihood
     full_weighted_loglikelihood = pinnrep.loss_functions.full_loss_function
@@ -245,7 +309,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         # converting vector of parameters to ComponentArray for runtimegenerated functions
         names = ntuple(i -> pinnrep.depvars[i], length(chain))
     else
-        # this case is for Flux multioutput
+        # Flux multioutput
         i = 0
         temp = []
         for j in eachindex(initial_nnθ)
@@ -270,7 +334,8 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         nparameters += ninv
     end
 
-    strategy = strategy(physdt)
+    # physdt vector in case of N-dimensional domains
+    strategy = discretization.strategy
 
     # dimensions would be total no of params,initial_nnθ for Lux namedTuples 
     ℓπ = PDELogTargetDensity(nparameters,
@@ -318,10 +383,13 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
             Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
             samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor;
                 progress = progress, verbose = verbose)
+            mcmc_chain = Chains(hcat(samples...)')
+
+            fullsolution = BPINNstats(mcmcchain, samples, statistics)
+            estimsol = inference(samples, discretization, saveat, numensemble, ℓπ)
 
             samplesc[i] = samples
             statsc[i] = stats
-            mcmc_chain = Chains(hcat(samples...)')
             chains[i] = mcmc_chain
         end
 
@@ -348,7 +416,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
         println("Current Prior Log-likelihood : ", priorlogpdf(ℓπ, samples[end]))
         println("Current MSE against dataset Log-likelihood : ",
             L2LossData(ℓπ, samples[end]))
-
+        fullsolution = BPINNstats(mcmc_chain, samples, stats)
         return mcmc_chain, samples, stats
     end
 end
\ No newline at end of file
diff --git a/src/discretize.jl b/src/discretize.jl
index f9fb86d5eb..809e9f3327 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -401,7 +401,7 @@ to the PDE.
 For more information, see `discretize` and `PINNRepresentation`.
 """
 function SciMLBase.symbolic_discretize(pde_system::PDESystem,
-    discretization::PhysicsInformedNN; bayesian::Bool = false)
+    discretization::PhysicsInformedNN; bayesian::Bool = false,dataset_given=[nothing])
     eqs = pde_system.eqs
     bcs = pde_system.bcs
     chain = discretization.chain
@@ -567,7 +567,6 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
                                                                               strategy,
                                                                               datafree_pde_loss_functions,
                                                                               datafree_bc_loss_functions)
-
     # setup for all adaptive losses
     num_pde_losses = length(pde_loss_functions)
     num_bc_losses = length(bc_loss_functions)
@@ -587,14 +586,34 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
                                                            bc_loss_functions)
 
     if bayesian
+        # required as Physics loss also needed on dataset domain points
+        pde_loss_functions1, bc_loss_functions1 = if !(dataset_given[1] isa Nothing)
+            if !(strategy isa  GridTraining)
+                println("only GridTraining strategy allowed")
+            else
+                merge_strategy_with_loglikelihood_function(pinnrep,
+                strategy,
+                datafree_pde_loss_functions,
+                datafree_bc_loss_functions, train_sets_L2loss2 = dataset_given)
+            end
+        end
+
         function full_likelihood_function(θ, allstd)
             stdpdes, stdbcs, stdextra = allstd
             # the aggregation happens on cpu even if the losses are gpu, probably fine since it's only a few of them
             pde_loglikelihoods = [logpdf(Normal(0, stdpdes[i]), pde_loss_function(θ))
-                                  for (i, pde_loss_function) in enumerate(pde_loss_functions)]
+                                      for (i, pde_loss_function) in enumerate(pde_loss_functions)] 
 
             bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
-                                 for (j, bc_loss_function) in enumerate(bc_loss_functions)]
+                                 for (j, bc_loss_function) in enumerate(bc_loss_functions)]    
+
+            if !(dataset_given[1] isa Nothing)
+                pde_loglikelihoods += [logpdf(Normal(0, stdpdes[j]), pde_loss_function1(θ))
+                                       for (j, pde_loss_function1) in enumerate(pde_loss_functions1)]
+
+                bc_loglikelihoods += [logpdf(Normal(0, stdbcs[j]), bc_loss_function1(θ))
+                                          for (j, bc_loss_function1) in enumerate(bc_loss_functions1)]
+            end
 
             # this is kind of a hack, and means that whenever the outer function is evaluated the increment goes up, even if it's not being optimized
             # that's why we prefer the user to maintain the increment in the outer loop callback during optimization
@@ -634,8 +653,8 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
                     return additional_loss(phi, θ_, p_)
                 end
 
-                _additional_loglikelihood = logpdf(Normal(0, stdextra),
-                    _additional_loss(phi, θ))
+                _additional_loglikelihood = logpdf(Normal(0, stdextra) _additional_loss(phi, θ))
+
                 weighted_additional_loglikelihood = adaloss.additional_loss_weights[1] *
                                                     _additional_loglikelihood
 
@@ -645,7 +664,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
         end
 
         pinnrep.loss_functions = PINNLossFunctions(bc_loss_functions, pde_loss_functions,
-                                        full_likelihood_function, additional_loss,
+                                        full_likelihood_function, additional_loss, 
                                         datafree_pde_loss_functions,
                                         datafree_bc_loss_functions)
     else
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index 8af7358753..8f39ee04ac 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -16,6 +16,35 @@ struct GridTraining{T} <: AbstractTrainingStrategy
     dx::T
 end
 
+# include dataset points in pde_residual loglikelihood
+function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
+        strategy::GridTraining,
+        datafree_pde_loss_function,
+        datafree_bc_loss_function; train_sets_L2loss2 = nothing)
+    @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
+    dx = strategy.dx
+    eltypeθ = eltype(pinnrep.flat_init_params)
+
+    train_sets = generate_training_sets(domains, dx, eqs, bcs, eltypeθ,
+                dict_indvars, dict_depvars)
+            
+    bcs_train_sets = train_sets[2]
+    pde_train_sets = [train_set[:, 2:end] for train_set in train_sets_L2loss2]
+    # the points in the domain and on the boundary
+    pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+        pde_train_sets)
+    bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+        bcs_train_sets)
+    pde_loss_functions = [get_loss_function(_loss, _set, eltypeθ, strategy)
+                          for (_loss, _set) in zip(datafree_pde_loss_function,
+        pde_train_sets)]
+
+    bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ, strategy)
+                         for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
+
+    pde_loss_functions, bc_loss_functions
+end
+
 function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
                                            strategy::GridTraining,
                                            datafree_pde_loss_function,
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index df2e11555c..192165d2b0 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -1,132 +1,353 @@
-using Test, MCMCChains, Lux, ModelingToolkit
-import ModelingToolkit: Interval, infimum, supremum
-using ForwardDiff, Distributions, OrdinaryDiffEq
-using Flux, AdvancedHMC, Statistics, Random, Functors
-using NeuralPDE, MonteCarloMeasurements
-
-# Forward solving example
-@parameters t
-@variables u(..)
-
-Dt = Differential(t)
-linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
-linear = (u, p, t) -> cos(2 * π * t)
-
-Dt = Differential(t)
-eqs = Dt(u(t)) - cos(2 * π * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 4.0)]
-
-chainf = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 1))
-init1, re1 = destructure(chainf)
-chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs, bcs, domains, [t], [u(t)])
-
-# non adaptive case
-discretization = NeuralPDE.PhysicsInformedNN(chainf, GridTraining([0.01]))
-mcmc_chain, samples, stats = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1000,
-    bcstd = [0.02],
-    phystd = [0.01],
-    priorsNNw = (0.0, 1.0),
-    progress = true)
-
-discretization = NeuralPDE.PhysicsInformedNN(chainl, GridTraining([0.01]))
-mcmc_chain, samples, stats = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1000,
-    bcstd = [0.02],
-    phystd = [0.01],
-    priorsNNw = (0.0, 1.0),
-    progress = true)
-
-tspan = (0.0, 4.0)
-t1 = collect(tspan[1]:0.01:tspan[2])
-
-out1 = re.([samples[i] for i in 800:1000])
-luxar1 = collect(out1[i](t1') for i in eachindex(out1))
-fluxmean = [mean(vcat(luxar1...)[:, i]) for i in eachindex(t1)]
-
-transsamples = [vector_to_parameters(sample, initl) for sample in samples]
-luxar2 = [chainl(t1', transsamples[i], st)[1] for i in 800:1000]
-luxmean = [mean(vcat(luxar2...)[:, i]) for i in eachindex(t1)]
-
-u = [linear_analytic(0, nothing, t) for t in t1]
-
-@test mean(abs.(u .- fluxmean)) < 5e-2
-@test mean(abs.(u .- luxmean)) < 5e-2
-
-# Parameter estimation example
-@parameters t p
-@variables u(..)
-
-Dt = Differential(t)
-linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
-linear = (u, p, t) -> cos(2 * π * t)
-eqs = Dt(u(t)) - cos(p * t) ~ 0
-bcs = [u(0) ~ 0.0]
-domains = [t ∈ Interval(0.0, 4.0)]
-p = 2 * π
-
-chainf = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 1))
-init1, re1 = destructure(chainf)
-chainl = Lux.Chain(Lux.Dense(1, 8, tanh), Lux.Dense(8, 1))
-initl, st = Lux.setup(Random.default_rng(), chainl)
-
-@named pde_system = PDESystem(eqs, bcs, domains, [t], [u(t)], [p],
-    defaults = Dict(p => 3))
-
-ta = range(0.0, 4.0, length = 50)
-u = [linear_analytic(0.0, p, ti) for ti in ta]
-x̂ = collect(Float64, Array(u) + 0.2 .* Array(u) .* randn(size(u)))
-time = vec(collect(Float64, ta))
-dataset = [x̂, time]
-
-discretization = NeuralPDE.PhysicsInformedNN([chainf],
-    GridTraining(0.01),
-    param_estim = true)
-
-mcmc_chain, samples, stats = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500, physdt = 1 / 20.0,
-    bcstd = [1.0],
-    phystd = [0.01], l2std = [0.01],
-    param = [Normal(9, 2)],
-    priorsNNw = (0.0, 10.0),
-    dataset = dataset,
-    progress = true)
-
-discretization = NeuralPDE.PhysicsInformedNN([chainl],
-    GridTraining(0.01),
-    param_estim = true)
-
-mcmc_chain, samples, stats = ahmc_bayesian_pinn_pde(pde_system,
-    discretization;
-    draw_samples = 1500,
-    bcstd = [0.1],
-    phystd = [0.01], l2std = [0.01], param = [LogNormal(4, 2)],
-    priorsNNw = (0.0, 10.0),
-    dataset = dataset,
-    progress = true)
-
-tspan = (0.0, 4.0)
-t1 = collect(tspan[1]:0.01:tspan[2])
-
-out1 = re.([samples[i][1:(end - 1)] for i in 1300:1500])
-luxar1 = collect(out1[i](t1') for i in eachindex(out1))
-fluxmean = [mean(vcat(luxar1...)[:, i]) for i in eachindex(t1)]
-
-transsamples = [vector_to_parameters(sample, initl) for sample[1:(end - 1)] in samples]
-luxar2 = [chainl(t1', transsamples[i], st)[1] for i in 1300:1500]
-luxmean = [mean(vcat(luxar2...)[:, i]) for i in eachindex(t1)]
-
-u = [linear_analytic(0, nothing, t) for t in t1]
-
-@test mean(abs.(u .- fluxmean)) < 5e-2
-@test mean(abs.(u .- luxmean)) < 5e-2
-
-@test mean(p .- [samples[i][end] for i in 1300:1500]) < 0.4 * p
-@test mean(p .- [samples[i][end] for i in 1300:1500]) < 0.4 * p
\ No newline at end of file
+# using Test, MCMCChains, Lux, ModelingToolkit
+# import ModelingToolkit: Interval, infimum, supremum
+# using ForwardDiff, Distributions, OrdinaryDiffEq
+# using Flux, AdvancedHMC, Statistics, Random, Functors
+# using NeuralPDE, MonteCarloMeasurements
+# using ComponentArrays
+
+# # Forward solving example
+# @parameters t
+# @variables u(..)
+
+# Dt = Differential(t)
+# linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+# linear = (u, p, t) -> cos(2 * π * t)
+
+# Dt = Differential(t)
+# eqs = Dt(u(t)) - cos(2 * π * t) ~ 0
+# bcs = [u(0) ~ 0.0]
+# domains = [t ∈ Interval(0.0, 4.0)]
+
+# chainf = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 1))
+# init1, re1 = Flux.destructure(chainf)
+# chainl = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 1))
+# initl, st = Lux.setup(Random.default_rng(), chainl)
+
+# @named pde_system = PDESystem(eqs, bcs, domains, [t], [u(t)])
+
+# # non adaptive case
+# discretization = NeuralPDE.PhysicsInformedNN([chainf], GridTraining([0.01]))
+# mcmc_chain, samples, stats = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 1000,
+#     bcstd = [0.02],
+#     phystd = [0.01],
+#     priorsNNw = (0.0, 1.0),
+#     progress = true)
+
+# discretization = NeuralPDE.PhysicsInformedNN([chainl], GridTraining([0.01]))
+# mcmc_chain, samples, stats = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 1000,
+#     bcstd = [0.02],
+#     phystd = [0.01],
+#     priorsNNw = (0.0, 1.0),
+#     progress = true)
+
+# tspan = (0.0, 4.0)
+# t1 = collect(tspan[1]:0.01:tspan[2])
+
+# out1 = re.([samples[i] for i in 800:1000])
+# luxar1 = collect(out1[i](t1') for i in eachindex(out1))
+# fluxmean = [mean(vcat(luxar1...)[:, i]) for i in eachindex(t1)]
+
+# transsamples = [vector_to_parameters(sample, initl) for sample in samples]
+# luxar2 = [chainl(t1', transsamples[i], st)[1] for i in 800:1000]
+# luxmean = [mean(vcat(luxar2...)[:, i]) for i in eachindex(t1)]
+
+# u = [linear_analytic(0, nothing, t) for t in t1]
+
+# @test mean(abs.(u .- fluxmean)) < 5e-2
+# @test mean(abs.(u .- luxmean)) < 5e-2
+
+# # Parameter estimation example
+# @parameters t p
+# @variables u(..)
+
+# Dt = Differential(t)
+# # linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+# # linear = (u, p, t) -> cos(2 * π * t)
+# eqs = Dt(u(t)) - cos(p * t) ~ 0
+# bcs = [u(0.0) ~ 0.0]
+# domains = [t ∈ Interval(0.0, 4.0)]
+
+# p = 2 * π
+
+# chainf = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 1))
+# init1, re = Flux.destructure(chainf)
+# chainl = Lux.Chain(Lux.Dense(1, 8, tanh), Lux.Dense(8, 1))
+# initl, st = Lux.setup(Random.default_rng(), chainl)
+
+# # chainl([1, 2], initl, st)
+
+# # using ComponentArrays
+# # c = ComponentArrays.ComponentVector(a = [1, 2, 3], b = [1, 2, 3])
+
+# # @parameters x y
+# # @variables p(..) q(..) r(..) s(..)
+# # Dx = Differential(x)
+# # Dy = Differential(y)
+
+# # # 2D PDE
+# # eq = p(x) + q(y) + Dx(r(x, y)) + Dy(s(y, x)) ~ 0
+
+# # # Initial and boundary conditions
+# # bcs = [p(1) ~ 0.0f0, q(-1) ~ 0.0f0,
+# #     r(x, -1) ~ 0.0f0, r(1, y) ~ 0.0f0,
+# #     s(y, 1) ~ 0.0f0, s(-1, x) ~ 0.0f0]
+
+# # # Space and time domains
+# # domains = [x ∈ Interval(0.0, 1.0),
+# #     y ∈ Interval(0.0, 1.0)]
+
+# # numhid = 3
+# # chains = [[Lux.Chain(Lux.Dense(1, numhid, Lux.σ), Lux.Dense(numhid, numhid, Lux.σ),
+# #     Lux.Dense(numhid, 1)) for i in 1:2]
+# #     [Lux.Chain(Lux.Dense(2, numhid, Lux.σ), Lux.Dense(numhid, numhid, Lux.σ),
+# #     Lux.Dense(numhid, 1)) for i in 1:2]]
+# # discretization = NeuralPDE.PhysicsInformedNN(chains, QuadratureTraining())
+
+# # @named pde_system = PDESystem(eq, bcs, domains, [x, y], [p(x), q(y), r(x, y), s(y, x)])
+
+# # de = [:x, :y]
+# # a[de[1]]
+# # a = ComponentArrays.ComponentVector(x = 1)
+# # a[:x]
+
+# # pde_system.indvars
+# # SymbolicUtils.istree(pde_system.depvars[3])
+# # Symbolics.value(pde_system.depvars[3])
+
+# @named pde_system = PDESystem(eqs, bcs, domains, [t], [u(t)], [p],
+#     defaults = Dict(p => 3))
+
+# ta = range(0.0, 4.0, length = 50)
+# u = [linear_analytic(0.0, p, ti) for ti in ta]
+# x̂ = collect(Float64, Array(u) + 0.2 .* Array(u) .* randn(size(u)))
+# time = vec(collect(Float64, ta))
+# # x = time .* 2.0
+# dataset = [hcat(x̂, time)]
+# hcat(datase[:, 2:end] for datase in dataset)
+
+# discretization = NeuralPDE.PhysicsInformedNN([chainf],
+#     GridTraining([0.01]),
+#     param_estim = true)
+# # println()
+
+# mcmc_chain, samples, stats = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 1500, physdt = [1 / 20.0],
+#     bcstd = [0.5],
+#     phystd = [0.01], l2std = [0.02],
+#     param = [Normal(9, 2)],
+#     priorsNNw = (0.0, 1.0),
+#     dataset = dataset,
+#     progress = true)
+
+# discretization = NeuralPDE.PhysicsInformedNN([chainl],
+#     GridTraining(0.01),
+#     param_estim = true)
+
+# mcmc_chain, samples, stats = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 1500,
+#     bcstd = [0.1],
+#     phystd = [0.01], l2std = [0.01], param = [LogNormal(4, 2)],
+#     priorsNNw = (0.0, 10.0),
+#     dataset = dataset,
+#     progress = true)
+
+# tspan = (0.0, 4.0)
+# t1 = collect(tspan[1]:0.01:tspan[2])
+
+# out1 = re.([samples[i][1:(end - 1)] for i in 1300:1500])
+# luxar1 = collect(out1[i](t1') for i in eachindex(out1))
+# fluxmean = [mean(vcat(luxar1...)[:, i]) for i in eachindex(t1)]
+
+# using Plots
+# plotly()
+# plot!(t1, fluxmean)
+# plot!(dataset[1][:, 2], dataset[1][:, 1])
+
+# transsamples = [vector_to_parameters(sample, initl) for sample[1:(end - 1)] in samples]
+# luxar2 = [chainl(t1', transsamples[i], st)[1] for i in 1300:1500]
+# luxmean = [mean(vcat(luxar2...)[:, i]) for i in eachindex(t1)]
+
+# u = [linear_analytic(0, nothing, t) for t in t1]
+
+# @test mean(abs.(u .- fluxmean)) < 5e-2
+# @test mean(abs.(u .- luxmean)) < 5e-2
+
+# @test mean(p .- [samples[i][end] for i in 1300:1500]) < 0.4 * p
+# @test mean(p .- [samples[i][end] for i in 1300:1500]) < 0.4 * p
+
+# plot(u)
+# plot!(fluxmean)
+# plot!(luxmean)
+
+# using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL
+# import ModelingToolkit: Interval
+
+# @parameters x y
+# @variables p(..) q(..) r(..) s(..)
+# Dx = Differential(x)
+# Dy = Differential(y)
+
+# # 2D PDE
+# eq = p(x) + q(y) + Dx(r(x, y)) + Dy(s(y, x)) ~ 0
+
+# # Initial and boundary conditions
+# bcs = [p(1) ~ 0.0f0, q(-1) ~ 0.0f0,
+#     r(x, -1) ~ 0.0f0, r(1, y) ~ 0.0f0,
+#     s(y, 1) ~ 0.0f0, s(-1, x) ~ 0.0f0]
+
+# # Space and time domains
+# domains = [x ∈ Interval(0.0, 1.0),
+#     y ∈ Interval(0.0, 1.0)]
+
+# numhid = 3
+# chains = [[Lux.Chain(Lux.Dense(1, numhid, Lux.σ), Lux.Dense(numhid, numhid, Lux.σ),
+#     Lux.Dense(numhid, 1)) for i in 1:2]
+#     [Lux.Chain(Lux.Dense(2, numhid, Lux.σ), Lux.Dense(numhid, numhid, Lux.σ),
+#     Lux.Dense(numhid, 1)) for i in 1:2]]
+# discretization = NeuralPDE.PhysicsInformedNN(chains, GridTraining([0.01, 0.01]))
+
+# @named pde_system = PDESystem(eq, bcs, domains, [x, y], [p(x), q(y), r(x, y), s(y, x)])
+# prob = SciMLBase.discretize(pde_system, discretization)
+
+# mcmc_chain, samples, stats = ahmc_bayesian_pinn_pde(pde_system,
+#     discretization;
+#     draw_samples = 1500,
+#     bcstd = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
+#     phystd = [0.1],
+#     priorsNNw = (0.0, 10.0),
+#     progress = true)
+
+# firstelement(domains[1])
+# infimum(domains[1])
+# infimum(domains[1].domain)
+# domains = [x ∈ Interval(0.0, 1.0)]
+# size(domains)
+
+# # callback = function (p, l)
+# #     println("Current loss is: $l")
+# #     return false
+# # end
+
+# # res = Optimization.solve(prob, BFGS(); callback = callback, maxiters = 100)
+
+# # Paper experiments
+# # function sir_ode!(u, p, t)
+# #     (S, I, R) = u
+# #     (β, γ) = p
+# #     N = S + I + R
+
+# #     dS = -β * I / N * S
+# #     dI = β * I / N * S - γ * I
+# #     dR = γ * I
+# #     return [dS, dI, dR]
+# # end;
+
+# # δt = 1.0
+# # tmax = 40.0
+# # tspan = (0.0, tmax)
+# # u0 = [990.0, 10.0, 0.0]; # S,I,R
+# # p = [0.5, 0.25]; # β,γ (removed c as a parameter as it was just getting multipled with β, so ideal value for c and β taken in new ideal β value)
+# # prob_ode = ODEProblem(sir_ode!, u0, tspan, p)
+# # sol = solve(prob_ode, Tsit5(), saveat = δt / 5)
+# # sig = 0.20
+# # data = Array(sol)
+# # dataset = [
+# #     data[1, :] .+ (minimum(data[1, :]) * sig .* rand(length(sol.t))),
+# #     data[2, :] .+ (mean(data[2, :]) * sig .* rand(length(sol.t))),
+# #     data[3, :] .+ (mean(data[3, :]) * sig .* rand(length(sol.t))),
+# #     sol.t,
+# # ]
+# # priors = [Normal(1.0, 1.0), Normal(0.5, 1.0)]
+
+# # plot(sol.t, dataset[1], label = "noisy s")
+# # plot!(sol.t, dataset[2], label = "noisy i")
+# # plot!(sol.t, dataset[3], label = "noisy r")
+# # plot!(sol, labels = ["s" "i" "r"])
+
+# # chain = Flux.Chain(Flux.Dense(1, 8, tanh), Flux.Dense(8, 8, tanh),
+# #     Flux.Dense(8, 3))
+
+# # Adaptorkwargs = (Adaptor = AdvancedHMC.StanHMCAdaptor,
+# #     Metric = AdvancedHMC.DiagEuclideanMetric, targetacceptancerate = 0.8)
+
+# # alg = BNNODE(chain;
+# #     dataset = dataset,
+# #     draw_samples = 500,
+# #     l2std = [5.0, 5.0, 10.0],
+# #     phystd = [1.0, 1.0, 1.0],
+# #     priorsNNw = (0.01, 3.0),
+# #     Adaptorkwargs = Adaptorkwargs,
+# #     param = priors, progress = true)
+
+# # # our version
+# # @time sol_pestim3 = solve(prob_ode, alg; estim_collocate = true, saveat = δt)
+# # @show sol_pestim3.estimated_ode_params
+
+# # # old version
+# # @time sol_pestim4 = solve(prob_ode, alg; saveat = δt)
+# # @show sol_pestim4.estimated_ode_params
+
+# # # plotting solutions
+# # plot(sol_pestim3.ensemblesol[1], label = "estimated x1")
+# # plot!(sol_pestim3.ensemblesol[2], label = "estimated y1")
+# # plot!(sol_pestim3.ensemblesol[3], label = "estimated z1")
+
+# # plot(sol_pestim4.ensemblesol[1], label = "estimated x2_1")
+# # plot!(sol_pestim4.ensemblesol[2], label = "estimated y2_1")
+# # plot!(sol_pestim4.ensemblesol[3], label = "estimated z2_1")
+
+# # discretization = NeuralPDE.PhysicsInformedNN(chainf,
+# #     GridTraining([
+# #         0.01,
+# #     ]),
+# #     adaptive_loss = MiniMaxAdaptiveLoss(2;
+# #         pde_max_optimiser = Flux.ADAM(1e-4),
+# #         bc_max_optimiser = Flux.ADAM(0.5),
+# #         pde_loss_weights = 1,
+# #         bc_loss_weights = 1,
+# #         additional_loss_weights = 1)
+
+# #     # GradientScaleAdaptiveLoss{Float64, ForwardDiff.Dual{Float64}}(2;
+# #     #     weight_change_inertia = 0.9,
+# #     #     pde_loss_weights = ForwardDiff.Dual{Float64}(1.0,
+# #     #         ntuple(_ -> 0.0, 19)),
+# #     #     bc_loss_weights = ForwardDiff.Dual{Float64}(1.0,
+# #     #         ntuple(_ -> 0.0, 19)),
+# #     #     additional_loss_weights = ForwardDiff.Dual{Float64}(1.0,
+# #     #         ntuple(_ -> 0.0, 19)))
+# #     #     GradientScaleAdaptiveLoss{Float64, ForwardDiff.Dual{Float64, Float64}}(2;
+# #     # weight_change_inertia = 0.9,
+# #     # pde_loss_weights = ForwardDiff.Dual{Float64}(1.0, ntuple(_ -> 0.0, 19)),
+# #     # bc_loss_weights = ForwardDiff.Dual{Float64}(1.0, ntuple(_ -> 0.0, 19)),
+# #     # additional_loss_weights = ForwardDiff.Dual{Float64}(1.0, ntuple(_ -> 0.0, 19))))
+# # )
+
+# # # ForwardDiff.Dual{Float64}(1.0, ntuple(_ -> 0.0, 19))
+# # # typeof(ForwardDiff.Dual{Float64}(1.0, ntuple(_ -> 0.0, 19))) <: ForwardDiff.Dual
+
+# # a = GradientScaleAdaptiveLoss{Float64, ForwardDiff.Dual{Float64, Float64}}(2;
+# # weight_change_inertia = 0.9,
+# # pde_loss_weights = ForwardDiff.Dual{Float64}(1.0,
+# # zeros(19))),
+# # bc_loss_weights = ForwardDiff.Dual{Float64}(1.0,
+# # zeros(19)),
+# # additional_loss_weights = ForwardDiff.Dual{Float64}(1.0,
+# #     zeros(19))
+
+# # as = ForwardDiff.Dual{Float64}(1.0, ForwardDiff.Partials(ntuple(_ -> 0.0, 19)))
+# # typeof(ForwardDiff.Dual{Float64}(1.0, ntuple(_ -> 0.0, 19)))
+
+# # a = GradientScaleAdaptiveLoss{Float64, Float64}(2; weight_change_inertia = 0.9,
+# #     pde_loss_weights = 1,
+# #     bc_loss_weights = 1,
+# #     additional_loss_weights = 1)
+# # ForwardDiff.Dual{Float64, Float64, 19} <: ForwardDiff.Dual{Float64, Float64}
+# # typeof(ntuple(_ -> 0.0, 19)) <: Tuple
+# # ForwardDiff.Dual{Float64}(ForwardDiff.value(1.0), ntuple(_ -> 0.0, 19))
+# # typeof(ForwardDiff.Dual{Float64}(1.0, ntuple(_ -> 0.0, 19))) <: Real