diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index b96c8e1aaf..f39b9baa3e 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -2,7 +2,7 @@ steps:
   - label: "GPU"
     plugins:
       - JuliaCI/julia#v1:
-          version: "1"
+          version: "1.9"
       - JuliaCI/julia-test#v1:
            coverage: false # 1000x slowdown
     agents:
@@ -20,8 +20,7 @@ steps:
     matrix:
       setup:
         version:
-          - "1.6"
-          - "1"
+          - "1.9"
         group:
           - "NNODE"
           - "NeuralAdapter"
@@ -41,3 +40,22 @@ steps:
     timeout_in_minutes: 680
     # Don't run Buildkite if the commit message includes the text [skip tests]
     if: build.message !~ /\[skip tests\]/
+
+  - label: "Documentation"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: "1.9"
+    command: |
+      julia --project=docs -e '
+        println("--- :julia: Instantiating project")
+        using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()
+        println("+++ :julia: Building documentation")
+        include("docs/make.jl")'
+    agents:
+      queue: "juliagpu"
+      cuda: "*"
+    if: build.message !~ /\[skip docs\]/ && !build.pull_request.draft
+    timeout_in_minutes: 1000
+
+env:
+  SECRET_DOCUMENTER_KEY: "yBJHrf5zUPu3VFotb0W4TRdauMTGgNUei3ax0xrVTmcrrP3EX8zSGaj9MNZji9H6JqyNEhCqZMGcHhxR5XK0f97YjhAp5rDNpwV6FbIXY8FXgFyIOLAenYUklta1W6fNM7KTz3Dq3UnKBAprKhQBwUw3SldTuvtm+IhiVT2rFmADqxMSQcv+5LivYEgAFrrd6LX+PHhonj38VN46z5Bi3TOIGAnByVlZssX7cOwaRg/8TloLPsWAUlQiPr89Vdlow9k6SQV8W9mf00/Rq4LFd1Eq1EYTCSmReVawMxVpXh1mj7MSabf9ppVWYOmjP5Rzatk8YFWlJf80HVvzY7tXRQ==;U2FsdGVkX1/UmKrEQkyqoZY6GckLGmpV5SsPa2byZhZbomiJKIrLntTIHK9GclgfCJ1MDCKhuo3hMVxq+NKVm0QmnMZk0Hlzszx8Up5pbcuSb2BA0Wm7IJzZ5/2uXfdBOXIejwk+GUabJtSpiDlYJUW/8bOWYZUwl9rmrOM44tPesEcs5yMZPShIowZnJqrvQUcWXZ/+aZjftX+Pe7nP0KddzYRYzhIJVzYmU394V9MWqZDqaNU19TJqnL8dNQxFLfyDibXuCqSD9QjV2I/iCfwr1xI81h11vmaatpZCDUUGYyxLRh1w5BuT1hHvLqktoAGliBD2lSOEB5KbwJu0nQCbEXPsM6CpMb4JygFloViQJ1L0kb91JEZIrnP6ruM1rrgrRu8Ufb+hob+BsCiEhSxwLy6ynOll2ZggMU/BVjQjkVQ3TmxBLvJ4T3QwzCSPpyN6lyHgSWNodN2Fn+PaWd0Sc0gSs8BY9PmOOc9m5ErVCrsscTr7aqcIbLZBnNA3y+mtLN5Vpuk3bmv9Bb9SAllXMLFFd604wtlxJi2Ecrb+6b4rc+QUmr5gnYqOYLCCOruFJfvMY63eFuuHWHKT+qroiGeuUhprUJDZUzhKUKnFFhhUZKtI11RAhVikZMPyMAsW4+gshQkEjXTRZqBpadpMh+c0uO8V2tRZAPIn7GXSdsCaGWbzL6yVZx79mM0W4sDKAp0Xs2wc04FgPbDu2sHGA+VxokrGctRovGVhSELx65aAj7x/ByhYdIByPCxHa7TNBUHb7n/4XLw8KIzVKr6jX2ep5m3KlYdjI7uxq8Hlpeu0hCRG3tdCqwOZsEOm3yhC3B/jsrKLzOsDP/x3ByAp8RvSVPP9WsWP55CxZUvc+q5LiVXBc7PhUC4nRB5/FMykjm6PboB92Y0DkP8Wql09FDr3vs8B3TkVLzOvzcz888oZTQpTaoixaAlVBs51al4N7UXhp5BInUCUIkknIyAEzXgK/5SpzixVoEkeNPkrMqg4hDaSHlKu0VDuqcP0Uv/9IKf/qs0+XK+2v9QBgqV16upbHK2EptgII3QJpXf2sq/IQTPXq3aly3NnpPUcIZJZtfG/Wk7qOFg/NhBVMvXWkrTRwQhJ5VXFTP0kXVpbgBml4Eq/zw+tAn5mmtieVaeFonZgqCIa+gi+tWiMy2V3aTubEYUGWTb3WOtxMt9i3bu9KhvOIr+fwCgpYUG9Vb/6v7H84zt5HT59sNlo9J8Acih8EfVZseC5JVF6ugxfnHc8BBOtvuUUFtOjIWwOgcfCiPXvsZdMQh0Ow3u9pYyJUZ3s+enHkBwtsu3+kXBkeL77eggslREVUz2g9eK8G5sKwSCsltgv8HQbHYARkXqq14Unb3NNakvab7BrQ2znWy7zU4P04Thtqn2fOqiAOUxuGRF5iNnkSnIZavtataT6rezB1efn4V2HYANcR4JoGj1JBXWa/dOJAYVnRLB7pbpS94yjbEMjRMB5aWSnjf6FzaDnXwfUAjAHvHNZsLxVhtIejyPQhg2kbSftwWWqw9zVvc3dz2a18V+MyNakcRiRS0CEqSl/L8vIhTppGultqzJZCKJKMAIzUE5+Q1EeDYL1kEImtSZ3hVXq4YaSB4q/bNKbDLG4pWs7IO6DFR88j5KIPfMPy15jgP6v+w0QZh8JNPQLveXvmYU4T9vxalT8i1P/4Nc2tI1A8HUv0hVoNu6V0ugTxukMLJLLFrZ80Ic7xCxNzqlzzcMhsYOHhcR4fZCdDtmoNfZm066hAkRcQJ0yNbiv7GUxrybGzer+N+s7QtS7/AGxuVW1MNQlsqdwTL2KTOvkZWHYB5cHpfqeS6zSPczeEiOIgT1fRXv3kYRBaJ4Dk7aWwXuCBbN2vAhRCEjcJC6QXE4BUukycNacytP1+HhCBeouxdPli9jrPIz2YH0iy7kc47XPiJr7zR1KAza3M3boau6xeb/why5zV7gHzB08qAxhm+pBLm4ERdKSwe/KAdGX5M0hiqMLHceUwJgzWEgukJfntxeZnP1rFJnTEAbkBy/CKtEmdr4IJYIFZU59IE9WOeYgrzl677JoGblkJ2B1sj1U8DbsRHIN+muVdAmYu+PBft0Zxih4JNe/rgmC/hNpDClMEPIEk4euRLf3xl1OHCOcWfEKuhwV/wIwJ0dtQzjN97g6a9IbSlupLAwPDZM925hC7qYicuzrF0Nj64GeOSMb4WIiEGpgH8TWOYkgxea+RoNLu0MEErcz26jqnV1QS8YGEFtT8k0lnhivg+SXIcgdVMoyavFVjqP4FeNm0aL0vE5+odOzEIzKKVNvHqofO4HbrRmlbAorS9OfzRlHbzJznIWO+8yyQc6oKyeT92THh4+wFYXQzkw0bvHsJZ08OymCAnIP+dZCzOSJ/MzcI3BjmcMZcHasPS6CfgSRtm7o8GJvyZljfW/z4zdy6+HzGLxZcgiwDc4qODYBEMdSRdDrxzpENZ4/IK3JTTcafsrRgbqi1nYyadQQx5A5xFhyYZq04iaDxx8GmDV6v4l4Hp/iPHoG0mp3yjxrt5hUjEyLW/+5lZXCnBxIfEiV/vdZBXGJxZs3oiATTOjMQxQYbbcyEs02WnFqRsMxDwlTsUnhbnEPUb9vkQhJjkHAsOt2V7csH7ojwlJPCp9baWV6iqSbvtQu5sSwWItOJflxiw2NCVvvMhGjOQOb8sFf6yeAKHOi+2vk0T7Kkr5JziPZ1L2PDmiZHwMhjVwd2uIZP6pBuQtoyxxn6040nUX5QwHjVO7RamVmqOLoKJFQHYWlRs1FHSxK6BQWH/1MeSDvCBWfiFnm6wWUMWr9NKlSOMFPWEnVdQ+eu83yoSRVT0U7WNoSw/tyK1KB64DL6Z7VffWB1AvnMZ1uvuSFtkEjHOTrcmDkGzSDQs61qO8kmDiTqlpMDVbv7DmhiXpBAC2agwLw/xh1m3xoRNetTNxowMVRjokZCrevoPLCcxrRDl0tZz9g/fF2q9rMRIAEhwWTYC+WgX4KQ4Xj4BpFbx1d3G2oHEAIJItyIFHHoKzwKJl+InNJEdXZUCEpdE3IiI2agnbP9W/1sSRWKE1Ub0KukhK7GYBIwdnwx0GgqqLYTRNrA8PnKLkSQp4ri9XJRSxI52jqbMP/S3x2ogIbvyYeQXbCwS7jloEMSgDSAQcTPZpPEzR5tHZG/XMMYHZX/h+aARdsaQJq7UNoIAJ8zrwkWnjNKSqrpF3Wfn/uOCYHXsyHepa/4f9cf0mtklGa4nSZPV8nVa0jvXzt2lzmg7uur0/ysa8Wl2LAZpkcTLlZ1tbFrbdBbcibPGi4r0QYJ6BM60yjfarwq/WnFHY2BLIpJKOxxA/7ko6kZ05t+fe/IqZnkxHqH9PsdpCN63H+py0S3tMOijrULzRMNjalF1ywPNm2Ugl/EiBbx7wv2G30wMdk1pdKbjgiGdq2wF1nPb4fOGBSoFtk8USmDSno1/FnYRKCH4gcyV3x/vHflFUHlSbh5Aw43YT1wxASJt7lvPq/uSTVw8wVe0bavIW4Gzyk7Fds5fjEi0eyZRtCfAbPlsCQ6aybuZQD870vdT8bxc1sRTdjDRbtFy8PGQqRwR91MhqITLOT7FpptcwltMV9jsGAEXBS6EX754sT3hYLB9OK6INME0rAbHUmq9tvCknIAiH3LIwuJJHHBLEFVeYveTk/00iBHjvn4Yb9MYEPaiTgMcQwRz8khf1aWj/Vz16c2aOjdiZOBEDpxH5h5tJLAMmc8bHfVAhWqdC6hDaulAgHEuo5gcKbkIbWEX/jvuOJq0fO9EYz6eDwwPy6hGGB5pzasJIKUDvXRCh40f5Iy8DOcLJRh+KCxjD9zDqhQQZ8WC+9l1/brckKPw59n0F3Sh3c+nHfyYBBlmsONTmjUZTgRwqg92z+2YQWyUf7g5jmNtBEjLyXtNvn9HkZGXd9YVp7Ps10GklGQiKYZmWUl73KGtsfsRF+/SQ4kRumd4YnlC7b04w6tFRk3HMog+38OVZDwMj40unK4XJWYABJX0t2XySGlXrL8ZNW8xR/iCVsW6+4glxFvTeH5ujPUjQKFb/0bvbZeExeYnRECdDz6u3z/gQYZiUMA8NUNKJuQTzWV9nXyubOWEG9NuJZ4X7oaGE/DtWO3j57r8bcE9KdtV+DpGvKyS+lBrdxL5vlOJ3rX+PqWeIOkxKc85lKT/us8H054bVubQebl0Tc+rvyqZVM7MToHgDj9VwlEbfV6o02Em/J5JUh0GMCoJw6CX5rfHgAIPlhlh/wXRVj8FQcUiTSzDb8lpwXxGO9rNWNfgE9ZRduiXT5LnUYhf6BC5eomyvZ6DcqDABJyWIV7kejbT0TlspicebTzP/kMyTPrGM9TchZjMdv6bPp8jRf4lUGWBjD4i1Diris5aM="
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 052a8bdd4d..bac77129be 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -3,9 +3,13 @@ on:
   pull_request:
     branches:
       - master
+    paths-ignore:
+      - "docs/**"
   push:
     branches:
       - master
+    paths-ignore:
+      - "docs/**"
 jobs:
   test:
     runs-on: ubuntu-latest
@@ -13,16 +17,16 @@ jobs:
       fail-fast: false
       matrix:
         group:
+          - ODEBPINN
           - NNPDE1
           - NNPDE2
           - AdaptiveLoss
           - Logging
           - Forward
         version:
-          - '1'
-          - '1.6'
+          - "1.9"
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.version }}
diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml
deleted file mode 100644
index e22f1854bf..0000000000
--- a/.github/workflows/Documentation.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: Documentation
-
-on:
-  push:
-    branches:
-      - master
-    tags: '*'
-  pull_request:
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-      - uses: julia-actions/setup-julia@latest
-        with:
-          version: '1'
-      - name: Install dependencies
-        run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
-      - name: Build and deploy
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token
-          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key
-        run: julia --project=docs/ --code-coverage=user docs/make.jl
-      - uses: julia-actions/julia-processcoverage@v1
-        with:
-          directories: src,lib/NeuralPDELogging/src
-      - uses: codecov/codecov-action@v3
-        with:
-          files: lcov.info
diff --git a/.github/workflows/Downstream.yml b/.github/workflows/Downstream.yml
index 053879f2ac..b9bc18ad58 100644
--- a/.github/workflows/Downstream.yml
+++ b/.github/workflows/Downstream.yml
@@ -19,14 +19,14 @@ jobs:
         package:
           - {user: SciML, repo: PDESystemLibrary.jl, group: NeuralPDE}
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.julia-version }}
           arch: x64
       - uses: julia-actions/julia-buildpkg@latest
       - name: Clone Downstream
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: ${{ matrix.package.user }}/${{ matrix.package.repo }}
           path: downstream
diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml
index f80d0b18bb..dd551501c5 100644
--- a/.github/workflows/FormatCheck.yml
+++ b/.github/workflows/FormatCheck.yml
@@ -21,7 +21,7 @@ jobs:
         with:
           version: ${{ matrix.julia-version }}
 
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Install JuliaFormatter and format
         # This will use the latest version by default but you can set the version like so:
         #
diff --git a/.github/workflows/Invalidations.yml b/.github/workflows/Invalidations.yml
index 4d0004e831..b876dedf71 100644
--- a/.github/workflows/Invalidations.yml
+++ b/.github/workflows/Invalidations.yml
@@ -16,25 +16,25 @@ jobs:
     if: github.base_ref == github.event.repository.default_branch
     runs-on: ubuntu-latest
     steps:
-    - uses: julia-actions/setup-julia@v1
-      with:
-        version: '1'
-    - uses: actions/checkout@v3
-    - uses: julia-actions/julia-buildpkg@v1
-    - uses: julia-actions/julia-invalidations@v1
-      id: invs_pr
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: "1.9"
+      - uses: actions/checkout@v4
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-invalidations@v1
+        id: invs_pr
 
-    - uses: actions/checkout@v3
-      with:
-        ref: ${{ github.event.repository.default_branch }}
-    - uses: julia-actions/julia-buildpkg@v1
-    - uses: julia-actions/julia-invalidations@v1
-      id: invs_default
-    
-    - name: Report invalidation counts
-      run: |
-        echo "Invalidations on default branch: ${{ steps.invs_default.outputs.total }} (${{ steps.invs_default.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY
-        echo "This branch: ${{ steps.invs_pr.outputs.total }} (${{ steps.invs_pr.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY
-    - name: Check if the PR does increase number of invalidations
-      if: steps.invs_pr.outputs.total > steps.invs_default.outputs.total
-      run: exit 1
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.repository.default_branch }}
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-invalidations@v1
+        id: invs_default
+
+      - name: Report invalidation counts
+        run: |
+          echo "Invalidations on default branch: ${{ steps.invs_default.outputs.total }} (${{ steps.invs_default.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY
+          echo "This branch: ${{ steps.invs_pr.outputs.total }} (${{ steps.invs_pr.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY
+      - name: Check if the PR does increase number of invalidations
+        if: steps.invs_pr.outputs.total > steps.invs_default.outputs.total
+        run: exit 1
diff --git a/.gitignore b/.gitignore
index 1a5ddd55f2..18672428f2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,4 @@ Manifest.toml
 docs/build/*
 scratch
 scratch/*
-.DS_Store
+.DS_store
diff --git a/Project.toml b/Project.toml
index 168ae46c37..3cd1e08ba3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,10 +1,11 @@
 name = "NeuralPDE"
 uuid = "315f7962-48a3-4962-8226-d0f33b1235f0"
 authors = ["Chris Rackauckas <accounts@chrisrackauckas.com>"]
-version = "5.7.0"
+version = "5.9.0"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
+AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d"
 ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
@@ -15,11 +16,15 @@ DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 DomainSets = "5b8099bc-c8ec-5219-889f-1d9e522a28bf"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 Integrals = "de52edbc-65ea-441a-8357-d3a637375a31"
 IntegralsCubature = "c31f79ba-6e32-46d4-a52f-182a8ac42a54"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
+MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
 ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
+MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
@@ -38,30 +43,36 @@ UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
-Adapt = "3"
+Adapt = "3, 4"
+AdvancedHMC = "0.5"
 ArrayInterface = "6, 7"
 CUDA = "4"
 ChainRulesCore = "1"
-ComponentArrays = "0.13.2"
+ComponentArrays = "0.13.2, 0.14, 0.15"
 DiffEqBase = "6"
 DiffEqNoiseProcess = "5.1"
 Distributions = "0.23, 0.24, 0.25"
 DocStringExtensions = "0.8, 0.9"
 DomainSets = "0.6"
-Flux = "0.13"
+Flux = "0.13, 0.14"
 ForwardDiff = "0.10"
+Functors = "0.4"
 Integrals = "3.1"
-IntegralsCubature = "0.2"
-Lux = "0.4"
+IntegralsCubature = "=0.2.2"
+LogDensityProblems = "2"
+Lux = "0.4, 0.5"
+MCMCChains = "6"
 ModelingToolkit = "8"
-Optim = "1.0"
-Optimisers = "0.2"
+MonteCarloMeasurements = "1"
+Optim = ">= 1.7.8"
+Optimisers = "0.2, 0.3"
 Optimization = "3"
-QuasiMonteCarlo = "0.2.1"
+QuasiMonteCarlo = "0.3.2"
 RecursiveArrayTools = "2.31"
 Reexport = "1.0"
 RuntimeGeneratedFunctions = "0.5"
-SciMLBase = "1.91"
+SciMLBase = "1.91, 2"
+Statistics = "1"
 StochasticDiffEq = "6.13"
 SymbolicUtils = "1"
 Symbolics = "5"
@@ -71,7 +82,6 @@ julia = "1.6"
 
 [extras]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-IntegralsCuba = "1e5cbd8a-c439-4026-92c2-98742b2c817b"
 OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
 OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
@@ -80,4 +90,4 @@ SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "CUDA", "SafeTestsets", "OptimizationOptimisers", "OptimizationOptimJL", "Pkg", "OrdinaryDiffEq", "IntegralsCuba"]
+test = ["Test", "CUDA", "SafeTestsets", "OptimizationOptimisers", "OptimizationOptimJL", "Pkg", "OrdinaryDiffEq"]
diff --git a/README.md b/README.md
index fdf955c4b4..8ae6f0f6e8 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ the documentation, which contains the unreleased features.
   - Integrated logging suite for handling connections to TensorBoard
   - Handling of (partial) integro-differential equations and various stochastic equations
   - Specialized forms for solving `ODEProblem`s with neural networks
-  - Compatability with [Flux.jl](https://docs.sciml.ai/Flux.jl/stable/) and [Lux.jl](https://docs.sciml.ai/Lux/stable/)
+  - Compatability with [Flux.jl](https://fluxml.ai/) and [Lux.jl](https://lux.csail.mit.edu/)
     for all of the GPU-powered machine learning layers available from those libraries.
   - Compatability with [NeuralOperators.jl](https://docs.sciml.ai/NeuralOperators/stable/) for
     mixing DeepONets and other neural operators (Fourier Neural Operators, Graph Neural Operators,
diff --git a/docs/Project.toml b/docs/Project.toml
index 4d05b09d49..b3c1ed2088 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,5 +1,6 @@
 [deps]
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DomainSets = "5b8099bc-c8ec-5219-889f-1d9e522a28bf"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
@@ -20,12 +21,12 @@ SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 
 [compat]
 DiffEqBase = "6.106"
-Documenter = "0.27"
+Documenter = "1"
 DomainSets = "0.6"
-Flux = "0.13"
+Flux = "0.13, 0.14"
 Integrals = "3.3"
-IntegralsCubature = "0.2"
-Lux = "0.4"
+IntegralsCubature = "=0.2.2"
+Lux = "0.4, 0.5"
 ModelingToolkit = "8.33"
 NeuralPDE = "5.3"
 Optimization = "3.9"
diff --git a/docs/make.jl b/docs/make.jl
index c50aadcee3..67ee53b3d4 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -10,19 +10,10 @@ include("pages.jl")
 
 makedocs(sitename = "NeuralPDE.jl",
          authors = "#",
-         clean = true,
-         doctest = false,
          modules = [NeuralPDE],
-         strict = [
-             :doctest,
-             :linkcheck,
-             :parse_error,
-             :example_block,
-             # Other available options are
-             # :autodocs_block, :cross_references, :docs_block, :eval_block, :example_block, :footnote, :meta_block, :missing_docs, :setup_block
-         ],
-         format = Documenter.HTML(analytics = "UA-90474609-3",
-                                  assets = ["assets/favicon.ico"],
+         clean = true, doctest = false, linkcheck = true,
+         warnonly = [:missing_docs, :example_block],
+         format = Documenter.HTML(assets = ["assets/favicon.ico"],
                                   canonical = "https://docs.sciml.ai/NeuralPDE/stable/"),
          pages = pages)
 
diff --git a/docs/pages.jl b/docs/pages.jl
index 8277ae9ba9..60df4edd60 100644
--- a/docs/pages.jl
+++ b/docs/pages.jl
@@ -1,5 +1,6 @@
 pages = ["index.md",
-    "ODE PINN Tutorials" => Any["Introduction to NeuralPDE for ODEs" => "tutorials/ode.md"
+    "ODE PINN Tutorials" => Any["Introduction to NeuralPDE for ODEs" => "tutorials/ode.md",
+                                "Bayesian PINNs for Coupled ODEs" => "tutorials/Lotka_Volterra_BPINNs.md"
                                 #"examples/nnrode_example.md", # currently incorrect
                                 ],
     "PDE PINN Tutorials" => Any["Introduction to NeuralPDE for PDEs" => "tutorials/pdesystem.md",
diff --git a/docs/src/examples/ks.md b/docs/src/examples/ks.md
index e339dac116..bb2b1064f5 100644
--- a/docs/src/examples/ks.md
+++ b/docs/src/examples/ks.md
@@ -6,13 +6,13 @@ Let's consider the Kuramoto–Sivashinsky equation, which contains a 4th-order d
 ∂_t u(x, t) + u(x, t) ∂_x u(x, t) + \alpha ∂^2_x u(x, t) + \beta ∂^3_x u(x, t) + \gamma ∂^4_x u(x, t) =  0 \, ,
 ```
 
-where `\alpha = \gamma = 1` and `\beta = 4`. The exact solution is:
+where $\alpha = \gamma = 1$ and $\beta = 4$. The exact solution is:
 
 ```math
 u_e(x, t) = 11 + 15 \tanh \theta - 15 \tanh^2 \theta - 15 \tanh^3 \theta \, ,
 ```
 
-where `\theta = t - x/2` and with initial and boundary conditions:
+where $\theta = t - x/2$ and with initial and boundary conditions:
 
 ```math
 \begin{align*}
diff --git a/docs/src/examples/linear_parabolic.md b/docs/src/examples/linear_parabolic.md
index 4200ae27bd..0ae1432763 100644
--- a/docs/src/examples/linear_parabolic.md
+++ b/docs/src/examples/linear_parabolic.md
@@ -81,10 +81,14 @@ sym_prob = symbolic_discretize(pdesystem, discretization)
 pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
 bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions
 
+global iteration = 0
 callback = function (p, l)
-    println("loss: ", l)
-    println("pde_losses: ", map(l_ -> l_(p), pde_inner_loss_functions))
-    println("bcs_losses: ", map(l_ -> l_(p), bcs_inner_loss_functions))
+    if iteration % 10 == 0
+        println("loss: ", l)
+        println("pde_losses: ", map(l_ -> l_(p), pde_inner_loss_functions))
+        println("bcs_losses: ", map(l_ -> l_(p), bcs_inner_loss_functions))
+    end
+    global iteration += 1
     return false
 end
 
diff --git a/docs/src/examples/nonlinear_elliptic.md b/docs/src/examples/nonlinear_elliptic.md
index d6c365466e..db8d6e228f 100644
--- a/docs/src/examples/nonlinear_elliptic.md
+++ b/docs/src/examples/nonlinear_elliptic.md
@@ -95,11 +95,15 @@ pde_inner_loss_functions = sym_prob.loss_functions.pde_loss_functions
 bcs_inner_loss_functions = sym_prob.loss_functions.bc_loss_functions[1:6]
 aprox_derivative_loss_functions = sym_prob.loss_functions.bc_loss_functions[7:end]
 
+global iteration = 0
 callback = function (p, l)
-    println("loss: ", l)
-    println("pde_losses: ", map(l_ -> l_(p), pde_inner_loss_functions))
-    println("bcs_losses: ", map(l_ -> l_(p), bcs_inner_loss_functions))
-    println("der_losses: ", map(l_ -> l_(p), aprox_derivative_loss_functions))
+    if iteration % 10 == 0
+        println("loss: ", l)
+        println("pde_losses: ", map(l_ -> l_(p), pde_inner_loss_functions))
+        println("bcs_losses: ", map(l_ -> l_(p), bcs_inner_loss_functions))
+        println("der_losses: ", map(l_ -> l_(p), aprox_derivative_loss_functions))
+    end
+    global iteration += 1
     return false
 end
 
diff --git a/docs/src/index.md b/docs/src/index.md
index 2d18be492a..08960823e8 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -3,7 +3,7 @@
 [NeuralPDE.jl](https://github.com/SciML/NeuralPDE.jl) is a solver package which 
 consists of neural network solvers for partial differential equations using 
 physics-informed neural networks (PINNs) and the ability to generate neural
-networks which both approximate physical laws and real data simultaniously.
+networks which both approximate physical laws and real data simultaneously.
 
 ## Features
 
@@ -15,7 +15,7 @@ networks which both approximate physical laws and real data simultaniously.
   - Integrated logging suite for handling connections to TensorBoard.
   - Handling of (partial) integro-differential equations and various stochastic equations.
   - Specialized forms for solving `ODEProblem`s with neural networks.
-  - Compatibility with [Flux.jl](https://docs.sciml.ai/Flux.jl/stable/) and [Lux.jl](https://docs.sciml.ai/Lux/stable/).
+  - Compatibility with [Flux.jl](https://fluxml.ai/) and [Lux.jl](https://lux.csail.mit.edu/).
     for all the GPU-powered machine learning layers available from those libraries.
   - Compatibility with [NeuralOperators.jl](https://docs.sciml.ai/NeuralOperators/stable/) for
     mixing DeepONets and other neural operators (Fourier Neural Operators, Graph Neural Operators,
@@ -132,32 +132,19 @@ Pkg.status(; mode = PKGMODE_MANIFEST) # hide
 </details>
 ```
 
-```@raw html
-You can also download the 
-<a href="
-```
-
 ```@eval
 using TOML
+using Markdown
 version = TOML.parse(read("../../Project.toml", String))["version"]
 name = TOML.parse(read("../../Project.toml", String))["name"]
-link = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
-       "/assets/Manifest.toml"
-```
-
-```@raw html
-">manifest</a> file and the
-<a href="
-```
-
-```@eval
-using TOML
-version = TOML.parse(read("../../Project.toml", String))["version"]
-name = TOML.parse(read("../../Project.toml", String))["name"]
-link = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
-       "/assets/Project.toml"
-```
-
-```@raw html
-">project</a> file.
+link_manifest = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
+                "/assets/Manifest.toml"
+link_project = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
+               "/assets/Project.toml"
+Markdown.parse("""You can also download the
+[manifest]($link_manifest)
+file and the
+[project]($link_project)
+file.
+""")
 ```
diff --git a/docs/src/manual/ode.md b/docs/src/manual/ode.md
index 0cd20d4061..3539745364 100644
--- a/docs/src/manual/ode.md
+++ b/docs/src/manual/ode.md
@@ -3,3 +3,9 @@
 ```@docs
 NNODE
 ```
+
+# Bayesian inference with PINNs
+
+```@docs
+BNNODE
+```
\ No newline at end of file
diff --git a/docs/src/tutorials/Lotka_Volterra_BPINNs.md b/docs/src/tutorials/Lotka_Volterra_BPINNs.md
new file mode 100644
index 0000000000..5937f8d0dc
--- /dev/null
+++ b/docs/src/tutorials/Lotka_Volterra_BPINNs.md
@@ -0,0 +1,114 @@
+# Bayesian Physics informed Neural Network ODEs Solvers
+
+Bayesian inference for PINNs provides an approach to ODE solution finding and parameter estimation with quantified uncertainty.
+
+## The Lotka-Volterra Model
+
+The Lotka–Volterra equations, also known as the predator–prey equations, are a pair of first-order nonlinear differential equations. These differential equations are frequently used to describe the dynamics of biological systems in which two species interact, one as a predator and the other as prey. The populations change through time according to the pair of equations:
+
+```math
+\begin{aligned}
+\frac{\mathrm{d}x}{\mathrm{d}t} &= (\alpha - \beta y(t))x(t), \\
+\frac{\mathrm{d}y}{\mathrm{d}t} &= (\delta x(t) - \gamma)y(t)
+\end{aligned}
+```
+
+where $x(t)$ and $y(t)$ denote the populations of prey and predator at time $t$, respectively, and $\alpha, \beta, \gamma, \delta$ are positive parameters.
+
+We implement the Lotka-Volterra model and simulate it with ideal parameters $\alpha = 1.5$, $\beta = 1$, $\gamma = 3$, and $\delta = 1$ and initial conditions $x(0) = y(0) = 1$.
+
+We then solve the equations and estimate the parameters of the model with priors for $\alpha$, $\beta$, $\gamma$ and $\delta$ as  `Normal(1,2)`, `Normal(2,2)`, `Normal(2,2)` and `Normal(0,2)` using a neural network.
+
+```@example bpinn
+using NeuralPDE, Lux, Plots, OrdinaryDiffEq, Distributions, Random
+
+function lotka_volterra(u, p, t)
+    # Model parameters.
+    α, β, γ, δ = p
+    # Current state.
+    x, y = u
+
+    # Evaluate differential equations.
+    dx = (α - β * y) * x # prey
+    dy = (δ * x - γ) * y # predator
+
+    return [dx, dy]
+end
+
+# initial-value problem.
+u0 = [1.0, 1.0]
+p = [1.5, 1.0, 3.0, 1.0]
+tspan = (0.0, 4.0)
+prob = ODEProblem(lotka_volterra, u0, tspan, p)
+```
+
+With the [`saveat`](https://docs.sciml.ai/DiffEqDocs/stable/basics/common_solver_opts/) argument, we can specify that the solution is stored only at `saveat` time units.
+
+```@example bpinn
+# Solve using OrdinaryDiffEq.jl solver
+dt = 0.01
+solution = solve(prob, Tsit5(); saveat = dt)
+```
+
+We generate noisy observations to use for the parameter estimation task in this tutorial. To make the example more realistic we add random normally distributed noise to the simulation.
+
+```@example bpinn
+# Dataset creation for parameter estimation (30% noise)
+time = solution.t
+u = hcat(solution.u...)
+x = u[1, :] + (u[1, :]) .* (0.3 .* randn(length(u[1, :])))
+y = u[2, :] + (u[2, :]) .* (0.3 .* randn(length(u[2, :])))
+dataset = [x, y, time]
+
+# Plotting the data which will be used
+plot(time, x, label = "noisy x")
+plot!(time, y, label = "noisy y")
+plot!(solution, labels = ["x" "y"])
+```
+
+Lets define a PINN.
+
+```@example bpinn
+# Neural Networks must have 2 outputs as u -> [dx,dy] in function lotka_volterra()
+chain = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh),
+    Lux.Dense(6, 2))
+```
+
+The dataset we generated can be passed for doing parameter estimation using provided priors in `param` keyword argument for [`BNNODE`](@ref).
+
+```@example bpinn
+alg = BNNODE(chain;
+    dataset = dataset,
+    draw_samples = 1000,
+    l2std = [0.1, 0.1],
+    phystd = [0.1, 0.1],
+    priorsNNw = (0.0, 3.0),
+    param = [
+        Normal(1, 2),
+        Normal(2, 2),
+        Normal(2, 2),
+        Normal(0, 2)], progress = false)
+
+sol_pestim = solve(prob, alg; saveat = dt)
+
+nothing #hide
+```
+
+The solution for the ODE is retured as a nested vector `sol_flux_pestim.ensemblesol`. Here, [$x$ , $y$] would be returned.
+
+```@example bpinn
+# plotting solution for x,y for chain
+plot(time, sol_pestim.ensemblesol[1], label = "estimated x")
+plot!(time, sol_pestim.ensemblesol[2], label = "estimated y")
+
+# comparing it with the original solution
+plot!(solution, labels = ["true x" "true y"])
+```
+
+We can see the estimated ODE parameters by -
+
+```@example bpinn
+sol_pestim.estimated_ode_params
+```
+
+We can see it is close to the true values of the parameters.
diff --git a/docs/src/tutorials/constraints.md b/docs/src/tutorials/constraints.md
index 2c919f0dd0..8ccdda3c2f 100644
--- a/docs/src/tutorials/constraints.md
+++ b/docs/src/tutorials/constraints.md
@@ -35,8 +35,6 @@ Dxx = Differential(x)^2
 _σ = 0.5
 x_0 = -2.2
 x_end = 2.2
-# Discretization
-dx = 0.01
 
 eq = Dx((α * x - β * x^3) * p(x)) ~ (_σ^2 / 2) * Dxx(p(x))
 
@@ -57,7 +55,7 @@ lb = [x_0]
 ub = [x_end]
 function norm_loss_function(phi, θ, p)
     function inner_f(x, θ)
-        dx * phi(x, θ) .- 1
+        0.01 * phi(x, θ) .- 1
     end
     prob = IntegralProblem(inner_f, lb, ub, θ)
     norm2 = solve(prob, HCubatureJL(), reltol = 1e-8, abstol = 1e-8, maxiters = 10)
@@ -65,7 +63,7 @@ function norm_loss_function(phi, θ, p)
 end
 
 discretization = PhysicsInformedNN(chain,
-                                   GridTraining(dx),
+                                   QuadratureTraining(),
                                    additional_loss = norm_loss_function)
 
 @named pdesystem = PDESystem(eq, bcs, domains, [x], [p(x)])
@@ -98,7 +96,7 @@ using Plots
 C = 142.88418699042 #fitting param
 analytic_sol_func(x) = C * exp((1 / (2 * _σ^2)) * (2 * α * x^2 - β * x^4))
 
-xs = [infimum(d.domain):dx:supremum(d.domain) for d in domains][1]
+xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
 u_real = [analytic_sol_func(x) for x in xs]
 u_predict = [first(phi(x, res.u)) for x in xs]
 
diff --git a/docs/src/tutorials/derivative_neural_network.md b/docs/src/tutorials/derivative_neural_network.md
index 5e145c094c..3432d58688 100644
--- a/docs/src/tutorials/derivative_neural_network.md
+++ b/docs/src/tutorials/derivative_neural_network.md
@@ -93,9 +93,9 @@ input_ = length(domains)
 n = 15
 chain = [Lux.Chain(Dense(input_, n, Lux.σ), Dense(n, n, Lux.σ), Dense(n, 1)) for _ in 1:7]
 
-grid_strategy = NeuralPDE.GridTraining(0.07)
+training_strategy = NeuralPDE.QuadratureTraining()
 discretization = NeuralPDE.PhysicsInformedNN(chain,
-                                             grid_strategy)
+                                             training_strategy)
 
 vars = [u1(t, x), u2(t, x), u3(t, x), Dxu1(t, x), Dtu1(t, x), Dxu2(t, x), Dtu2(t, x)]
 @named pdesystem = PDESystem(eqs_, bcs__, domains, [t, x], vars)
diff --git a/docs/src/tutorials/gpu.md b/docs/src/tutorials/gpu.md
index 9e8213e9fe..5a95043450 100644
--- a/docs/src/tutorials/gpu.md
+++ b/docs/src/tutorials/gpu.md
@@ -84,7 +84,7 @@ chain = Chain(Dense(3, inner, Lux.σ),
               Dense(inner, inner, Lux.σ),
               Dense(inner, 1))
 
-strategy = GridTraining(0.05)
+strategy = QuadratureTraining()
 ps = Lux.setup(Random.default_rng(), chain)[1]
 ps = ps |> ComponentArray |> gpu .|> Float64
 discretization = PhysicsInformedNN(chain,
diff --git a/docs/src/tutorials/integro_diff.md b/docs/src/tutorials/integro_diff.md
index e50bb11b3f..3eb2323ba0 100644
--- a/docs/src/tutorials/integro_diff.md
+++ b/docs/src/tutorials/integro_diff.md
@@ -57,7 +57,7 @@ bcs = [i(0.0) ~ 0.0]
 domains = [t ∈ Interval(0.0, 2.0)]
 chain = Chain(Dense(1, 15, Flux.σ), Dense(15, 1)) |> f64
 
-strategy_ = GridTraining(0.05)
+strategy_ = QuadratureTraining()
 discretization = PhysicsInformedNN(chain,
                                    strategy_)
 @named pde_system = PDESystem(eq, bcs, domains, [t], [i(t)])
diff --git a/docs/src/tutorials/low_level.md b/docs/src/tutorials/low_level.md
index 367cc6ca39..d60e3163dc 100644
--- a/docs/src/tutorials/low_level.md
+++ b/docs/src/tutorials/low_level.md
@@ -35,12 +35,9 @@ bcs = [u(0, x) ~ -sin(pi * x),
 domains = [t ∈ Interval(0.0, 1.0),
     x ∈ Interval(-1.0, 1.0)]
 
-# Discretization
-dx = 0.05
-
 # Neural network
 chain = Lux.Chain(Dense(2, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1))
-strategy = NeuralPDE.GridTraining(dx)
+strategy = NeuralPDE.QuadratureTraining()
 
 indvars = [t, x]
 depvars = [u(t, x)]
@@ -79,7 +76,7 @@ And some analysis:
 ```@example low_level
 using Plots
 
-ts, xs = [infimum(d.domain):dx:supremum(d.domain) for d in domains]
+ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
 u_predict_contourf = reshape([first(phi([t, x], res.u)) for t in ts for x in xs],
                              length(xs), length(ts))
 plot(ts, xs, u_predict_contourf, linetype = :contourf, title = "predict")
diff --git a/docs/src/tutorials/neural_adapter.md b/docs/src/tutorials/neural_adapter.md
index e8b65d8e7e..db4ac9574b 100644
--- a/docs/src/tutorials/neural_adapter.md
+++ b/docs/src/tutorials/neural_adapter.md
@@ -67,7 +67,7 @@ function loss(cord, θ)
     chain2(cord, θ) .- phi(cord, res.u)
 end
 
-strategy = NeuralPDE.GridTraining(0.02)
+strategy = NeuralPDE.QuadratureTraining()
 
 prob_ = NeuralPDE.neural_adapter(loss, init_params2, pde_system, strategy)
 callback = function (p, l)
@@ -179,7 +179,7 @@ for i in 1:count_decomp
     bcs_ = create_bcs(domains_[1].domain, phi_bound)
     @named pde_system_ = PDESystem(eq, bcs_, domains_, [x, y], [u(x, y)])
     push!(pde_system_map, pde_system_)
-    strategy = NeuralPDE.GridTraining([0.1 / count_decomp, 0.1])
+    strategy = NeuralPDE.QuadratureTraining()
 
     discretization = NeuralPDE.PhysicsInformedNN(chains[i], strategy;
                                                  init_params = init_params[i])
@@ -243,10 +243,10 @@ callback = function (p, l)
 end
 
 prob_ = NeuralPDE.neural_adapter(losses, init_params2, pde_system_map,
-                                 NeuralPDE.GridTraining([0.1 / count_decomp, 0.1]))
+                                 NeuralPDE.QuadratureTraining())
 res_ = Optimization.solve(prob_, BFGS(); callback = callback, maxiters = 2000)
 prob_ = NeuralPDE.neural_adapter(losses, res_.minimizer, pde_system_map,
-                                 NeuralPDE.GridTraining([0.05 / count_decomp, 0.05]))
+                                 NeuralPDE.QuadratureTraining())
 res_ = Optimization.solve(prob_, BFGS(); callback = callback, maxiters = 1000)
 
 phi_ = NeuralPDE.get_phi(chain2)
diff --git a/docs/src/tutorials/ode.md b/docs/src/tutorials/ode.md
index 4eaa7ba656..5b1afdfd2a 100644
--- a/docs/src/tutorials/ode.md
+++ b/docs/src/tutorials/ode.md
@@ -3,72 +3,73 @@
 !!! note
     
     It is highly recommended you first read the [solving ordinary differential
-    equations with DifferentialEquations.jl tutorial](https://docs.sciml.ai/DiffEqDocs/stable/tutorials/ode_example/)
+    equations with DifferentialEquations.jl tutorial](https://docs.sciml.ai/DiffEqDocs/stable/getting_started/)
     before reading this tutorial.
 
-This tutorial is an introduction to using physics-informed neural networks (PINNs)
-for solving ordinary differential equations (ODEs). In contrast to the later
-parts of this documentation which use the symbolic interface, here we will focus on
-the simplified `NNODE` which uses the `ODEProblem` specification for the ODE.
+This tutorial is an introduction to using physics-informed neural networks (PINNs) for solving ordinary differential equations (ODEs). In contrast to the later parts of this documentation which use the symbolic interface, here we will focus on the simplified [`NNODE`](@ref) which uses the `ODEProblem` specification for the ODE.
+
 Mathematically, the `ODEProblem` defines a problem:
 
 ```math
 u' = f(u,p,t)
 ```
 
-for ``t \in (t_0,t_f)`` with an initial condition ``u(t_0) = u_0``. With physics-informed
-neural networks, we choose a neural network architecture `NN` to represent the solution `u`
-and seek parameters `p` such that `NN' = f(NN,p,t)` for all points in the domain.
-When this is satisfied sufficiently closely, then `NN` is thus a solution to the differential
-equation.
+for ``t \in (t_0,t_f)`` with an initial condition ``u(t_0) = u_0``. With physics-informed neural networks, we choose a neural network architecture `NN` to represent the solution `u` and seek parameters `p` such that `NN' = f(NN,p,t)` for all points in the domain. When this is satisfied sufficiently closely, then `NN` is thus a solution to the differential equation.
 
 ## Solving an ODE with NNODE
 
-Let's solve the simple ODE:
+Let's solve a simple ODE:
 
 ```math
 u' = \cos(2\pi t)
 ```
 
-for ``t \in (0,1)`` and ``u_0 = 0`` with `NNODE`. First, we define the `ODEProblem` as we would
-with any other DifferentialEquations.jl solver. This looks like:
+for ``t \in (0,1)`` and ``u_0 = 0`` with [`NNODE`](@ref). First, we define an `ODEProblem` as we would for defining an ODE using DifferentialEquations.jl interface. This looks like:
 
 ```@example nnode1
-using NeuralPDE, Flux, OptimizationOptimisers
+using NeuralPDE
 
-linear(u, p, t) = cos(2pi * t)
-tspan = (0.0f0, 1.0f0)
-u0 = 0.0f0
+linear(u, p, t) = cos(t * 2 * pi)
+tspan = (0.0, 1.0)
+u0 = 0.0
 prob = ODEProblem(linear, u0, tspan)
 ```
 
-Now, to define the `NNODE` solver, we must choose a neural network architecture. To do this, we
-will use the [Flux.jl library](https://fluxml.ai/) to define a multilayer perceptron (MLP)
-with one hidden layer of 5 nodes and a sigmoid activation function. This looks like:
+Now, to define the [`NNODE`](@ref) solver, we must choose a neural network architecture. To do this, we will use the [Lux.jl](https://lux.csail.mit.edu/) to define a multilayer perceptron (MLP) with one hidden layer of 5 nodes and a sigmoid activation function. This looks like:
 
 ```@example nnode1
-chain = Flux.Chain(Dense(1, 5, σ), Dense(5, 1))
+using Lux, Random
+
+rng = Random.default_rng()
+Random.seed!(rng, 0)
+chain = Chain(Dense(1, 5, σ), Dense(5, 1))
+ps, st = Lux.setup(rng, chain) |> Lux.f64
 ```
 
-Now we must choose an optimizer to define the `NNODE` solver. A common choice is `ADAM`, with
-a tunable rate , which we will set to `0.1`. In general, this rate parameter should be
-decreased if the solver's loss tends to be unsteady (sometimes rise “too much”), but should be
-as large as possible for efficiency. Thus, the definition of the `NNODE` solver is as follows:
+Now we must choose an optimizer to define the [`NNODE`](@ref) solver. A common choice is `Adam`, with a tunable rate, which we will set to `0.1`. In general, this rate parameter should be decreased if the solver's loss tends to be unsteady (sometimes rise “too much”), but should be as large as possible for efficiency. We use `Adam` from [OptimizationOptimisers](https://docs.sciml.ai/Optimization/stable/optimization_packages/optimisers/). Thus, the definition of the [`NNODE`](@ref) solver is as follows:
 
 ```@example nnode1
-opt = OptimizationOptimisers.Adam(0.1)
-alg = NeuralPDE.NNODE(chain, opt)
+using OptimizationOptimisers
+
+opt = Adam(0.1)
+alg = NNODE(chain, opt, init_params = ps)
 ```
 
-Once these pieces are together, we call `solve` just like with any other `ODEProblem` solver.
-Let's turn on `verbose` so we can see the loss over time during the training process:
+Once these pieces are together, we call `solve` just like with any other `ODEProblem`. Let's turn on `verbose` so we can see the loss over time during the training process:
 
 ```@example nnode1
-sol = solve(prob, NeuralPDE.NNODE(chain, opt), verbose = true, abstol = 1.0f-6,
-            maxiters = 200)
+sol = solve(prob, alg, verbose = true, maxiters = 2000, saveat = 0.01)
+```
+
+Now lets compare the predictions from the learned network with the ground truth which we can obtain by numerically solving the ODE.
+
+```@example nnode1
+using OrdinaryDiffEq, Plots
+
+ground_truth = solve(prob, Tsit5(), saveat = 0.01)
+
+plot(ground_truth, label = "ground truth")
+plot!(sol.t, sol.u, label = "pred")
 ```
 
-And that's it: the neural network solution was computed by training the neural network and
-returned in the standard DifferentialEquations.jl `ODESolution` format. For more information
-on handling the solution, consult
-[the DifferentialEquations.jl solution handling section](https://docs.sciml.ai/DiffEqDocs/stable/basics/solution/).
+And that's it: the neural network solution was computed by training the neural network and returned in the standard DifferentialEquations.jl `ODESolution` format. For more information on handling the solution, consult [here](https://docs.sciml.ai/DiffEqDocs/stable/basics/solution/).
diff --git a/docs/src/tutorials/param_estim.md b/docs/src/tutorials/param_estim.md
index 297f379419..2f2ebc782e 100644
--- a/docs/src/tutorials/param_estim.md
+++ b/docs/src/tutorials/param_estim.md
@@ -10,7 +10,7 @@ Consider a Lorenz System,
 \end{align*}
 ```
 
-with Physics-Informed Neural Networks. Now we would consider the case where we want to optimize the parameters `\sigma`, `\beta`, and `\rho`.
+with Physics-Informed Neural Networks. Now we would consider the case where we want to optimize the parameters $\sigma$, $\beta$, and $\rho$.
 
 We start by defining the problem,
 
@@ -58,7 +58,7 @@ u0 = [1.0; 0.0; 0.0]
 tspan = (0.0, 1.0)
 prob = ODEProblem(lorenz!, u0, tspan)
 sol = solve(prob, Tsit5(), dt = 0.1)
-ts = [infimum(d.domain):dt:supremum(d.domain) for d in domains][1]
+ts = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
 function getData(sol)
     data = []
     us = hcat(sol(ts).u...)
@@ -113,7 +113,7 @@ Then finally defining and optimizing using the `PhysicsInformedNN` interface.
 
 ```@example param_estim
 discretization = NeuralPDE.PhysicsInformedNN([chain1, chain2, chain3],
-                                             NeuralPDE.GridTraining(dt), param_estim = true,
+                                             NeuralPDE.QuadratureTraining(), param_estim = true,
                                              additional_loss = additional_loss)
 @named pde_system = PDESystem(eqs, bcs, domains, [t], [x(t), y(t), z(t)], [σ_, ρ, β],
                               defaults = Dict([p .=> 1.0 for p in [σ_, ρ, β]]))
@@ -130,7 +130,7 @@ And then finally some analysis by plotting.
 
 ```@example param_estim
 minimizers = [res.u.depvar[depvars[i]] for i in 1:3]
-ts = [infimum(d.domain):(dt / 10):supremum(d.domain) for d in domains][1]
+ts = [infimum(d.domain):(0.001):supremum(d.domain) for d in domains][1]
 u_predict = [[discretization.phi[i]([t], minimizers[i])[1] for t in ts] for i in 1:3]
 plot(sol)
 plot!(ts, u_predict, label = ["x(t)" "y(t)" "z(t)"])
diff --git a/docs/src/tutorials/pdesystem.md b/docs/src/tutorials/pdesystem.md
index b140dc793a..4ace039bd4 100644
--- a/docs/src/tutorials/pdesystem.md
+++ b/docs/src/tutorials/pdesystem.md
@@ -23,7 +23,7 @@ on the space domain:
 x \in [0, 1] \, , \ y \in [0, 1] \, ,
 ```
 
-with grid discretization `dx = 0.05` using physics-informed neural networks.
+Using physics-informed neural networks.
 
 ## Copy-Pasteable Code
 
@@ -52,7 +52,7 @@ chain = Lux.Chain(Dense(dim, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1))
 
 # Discretization
 dx = 0.05
-discretization = PhysicsInformedNN(chain, GridTraining(dx))
+discretization = PhysicsInformedNN(chain, QuadratureTraining())
 
 @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
 prob = discretize(pde_system, discretization)
@@ -122,9 +122,8 @@ Here, we build PhysicsInformedNN algorithm where `dx` is the step of discretizat
 `strategy` stores information for choosing a training strategy.
 
 ```@example poisson
-# Discretization
 dx = 0.05
-discretization = PhysicsInformedNN(chain, GridTraining(dx))
+discretization = PhysicsInformedNN(chain, QuadratureTraining())
 ```
 
 As described in the API docs, we now need to define the `PDESystem` and create PINNs
diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
new file mode 100644
index 0000000000..5d937e4cf1
--- /dev/null
+++ b/src/BPINN_ode.jl
@@ -0,0 +1,273 @@
+# HIGH level API for BPINN ODE solver
+
+"""
+```julia
+BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
+                    priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05],
+                    phystd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
+                    MCMCargs = (n_leapfrog=30), nchains = 1, init_params = nothing, 
+                    Adaptorkwargs = (Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8, Metric = DiagEuclideanMetric),
+                    Integratorkwargs = (Integrator = Leapfrog,), autodiff = false,
+                    progress = false, verbose = false)
+```
+
+Algorithm for solving ordinary differential equations using a Bayesian neural network. This is a specialization
+of the physics-informed neural network which is used as a solver for a standard `ODEProblem`.
+
+!!! warn
+
+    Note that BNNODE only supports ODEs which are written in the out-of-place form, i.e.
+    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the BNNODE
+    will exit with an error.
+
+## Positional Arguments
+
+* `chain`: A neural network architecture, defined as either a `Flux.Chain` or a `Lux.AbstractExplicitLayer`.
+* `Kernel`: Choice of MCMC Sampling Algorithm. Defaults to `AdvancedHMC.HMC`
+
+## Keyword Arguments
+(refer ahmc_bayesian_pinn_ode() keyword arguments.)
+
+## Example
+
+```julia
+linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t)
+tspan = (0.0, 10.0)
+u0 = 0.0
+p = [5.0, -5.0]
+prob = ODEProblem(linear, u0, tspan, p)
+linear_analytic = (u0, p, t) -> exp(-t / 5) * (u0 + sin(t))
+
+sol = solve(prob, Tsit5(); saveat = 0.05)
+u = sol.u[1:100]
+time = sol.t[1:100]
+x̂ = u .+ (u .* 0.2) .* randn(size(u))
+dataset = [x̂, time]
+
+chainlux = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
+
+alg = NeuralPDE.BNNODE(chainlux, draw_samples = 2000,
+                       l2std = [0.05], phystd = [0.05],
+                       priorsNNw = (0.0, 3.0), progress = true)
+
+sol_lux = solve(prob, alg)
+
+# with parameter estimation
+alg = NeuralPDE.BNNODE(chainlux,dataset = dataset,
+                        draw_samples = 2000,l2std = [0.05],
+                        phystd = [0.05],priorsNNw = (0.0, 10.0),
+                        param = [Normal(6.5, 0.5), Normal(-3, 0.5)],
+                        progress = true)
+
+sol_lux_pestim = solve(prob, alg)
+```
+
+## Solution Notes
+
+Note that the solution is evaluated at fixed time points according to the strategy chosen.
+ensemble solution is evaluated and given at steps of `saveat`.
+Dataset should only be provided when ODE parameter Estimation is being done.  
+The neural network is a fully continuous solution so `BPINNsolution`
+is an accurate interpolation (up to the neural network training result). In addition, the
+`BPINNstats` is returned as `sol.fullsolution` for further analysis.
+
+## References
+
+Liu Yanga, Xuhui Menga, George Em Karniadakis. "B-PINNs: Bayesian Physics-Informed Neural Networks for
+Forward and Inverse PDE Problems with Noisy Data"
+
+Kevin Linka, Amelie Schäfer, Xuhui Meng, Zongren Zou, George Em Karniadakis, Ellen Kuhl. 
+"Bayesian Physics Informed Neural Networks for real-world nonlinear dynamical systems"
+
+"""
+struct BNNODE{C, K, IT <: NamedTuple,
+    A <: NamedTuple, H <: NamedTuple,
+    ST <: Union{Nothing, AbstractTrainingStrategy},
+    I <: Union{Nothing, <:NamedTuple, Vector{<:AbstractFloat}},
+    P <: Union{Nothing, Vector{<:Distribution}},
+    D <:
+    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}} <:
+       NeuralPDEAlgorithm
+    chain::C
+    Kernel::K
+    strategy::ST
+    draw_samples::Int64
+    priorsNNw::Tuple{Float64, Float64}
+    param::P
+    l2std::Vector{Float64}
+    phystd::Vector{Float64}
+    dataset::D
+    physdt::Float64
+    MCMCkwargs::H
+    nchains::Int64
+    init_params::I
+    Adaptorkwargs::A
+    Integratorkwargs::IT
+    autodiff::Bool
+    progress::Bool
+    verbose::Bool
+end
+function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
+    priorsNNw = (0.0, 2.0), param = nothing, l2std = [0.05], phystd = [0.05],
+    dataset = [nothing], physdt = 1 / 20.0, MCMCkwargs = (n_leapfrog = 30,), nchains = 1,
+    init_params = nothing,
+    Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+        Metric = DiagEuclideanMetric,
+        targetacceptancerate = 0.8),
+    Integratorkwargs = (Integrator = Leapfrog,),
+    autodiff = false, progress = false, verbose = false)
+    BNNODE(chain, Kernel, strategy,
+        draw_samples, priorsNNw, param, l2std,
+        phystd, dataset, physdt, MCMCkwargs,
+        nchains, init_params,
+        Adaptorkwargs, Integratorkwargs,
+        autodiff, progress, verbose)
+end
+
+"""
+Contains ahmc_bayesian_pinn_ode() function output:
+1> a MCMCChains.jl chain object for sampled parameters
+2> The set of all sampled parameters
+3> statistics like: 
+    > n_steps
+    > acceptance_rate
+    > log_density
+    > hamiltonian_energy
+    > hamiltonian_energy_error 
+    > numerical_error
+    > step_size
+    > nom_step_size
+"""
+struct BPINNstats{MC, S, ST}
+    mcmc_chain::MC
+    samples::S
+    statistics::ST
+end
+
+"""
+BPINN Solution contains the original solution from AdvancedHMC.jl sampling(BPINNstats contains fields related to that)
+> ensemblesol is the Probabilistic Estimate(MonteCarloMeasurements.jl Particles type) of Ensemble solution from All Neural Network's(made using all sampled parameters) output's.
+> estimated_nn_params - Probabilistic Estimate of NN params from sampled weights,biases
+> estimated_ode_params - Probabilistic Estimate of ODE params from sampled unknown ode paramters
+"""
+struct BPINNsolution{O <: BPINNstats, E,
+    NP <: Vector{<:MonteCarloMeasurements.Particles{<:Float64}},
+    OP <: Union{Vector{Nothing},
+        Vector{<:MonteCarloMeasurements.Particles{<:Float64}}}}
+    original::O
+    ensemblesol::E
+    estimated_nn_params::NP
+    estimated_ode_params::OP
+
+    function BPINNsolution(original, ensemblesol, estimated_nn_params, estimated_ode_params)
+        new{typeof(original), typeof(ensemblesol), typeof(estimated_nn_params),
+            typeof(estimated_ode_params)}(original, ensemblesol, estimated_nn_params,
+            estimated_ode_params)
+    end
+end
+
+function DiffEqBase.__solve(prob::DiffEqBase.ODEProblem,
+    alg::BNNODE,
+    args...;
+    dt = nothing,
+    timeseries_errors = true,
+    save_everystep = true,
+    adaptive = false,
+    abstol = 1.0f-6,
+    reltol = 1.0f-3,
+    verbose = false,
+    saveat = 1 / 50.0,
+    maxiters = nothing,
+    numensemble = floor(Int, alg.draw_samples / 3))
+    @unpack chain, l2std, phystd, param, priorsNNw, Kernel, strategy,
+    draw_samples, dataset, init_params,
+    nchains, physdt, Adaptorkwargs, Integratorkwargs,
+    MCMCkwargs, autodiff, progress, verbose = alg
+
+    # ahmc_bayesian_pinn_ode needs param=[] for easier vcat operation for full vector of parameters
+    param = param === nothing ? [] : param
+    strategy = strategy === nothing ? GridTraining : strategy
+
+    if draw_samples < 0
+        throw(error("Number of samples to be drawn has to be >=0."))
+    end
+
+    mcmcchain, samples, statistics = ahmc_bayesian_pinn_ode(prob, chain,
+        strategy = strategy, dataset = dataset,
+        draw_samples = draw_samples,
+        init_params = init_params,
+        physdt = physdt, l2std = l2std,
+        phystd = phystd,
+        priorsNNw = priorsNNw,
+        param = param,
+        nchains = nchains,
+        autodiff = autodiff,
+        Kernel = Kernel,
+        Adaptorkwargs = Adaptorkwargs,
+        Integratorkwargs = Integratorkwargs,
+        MCMCkwargs = MCMCkwargs,
+        progress = progress,
+        verbose = verbose)
+
+    fullsolution = BPINNstats(mcmcchain, samples, statistics)
+    ninv = length(param)
+    t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2])
+
+    if chain isa Lux.AbstractExplicitLayer
+        θinit, st = Lux.setup(Random.default_rng(), chain)
+        θ = [vector_to_parameters(samples[i][1:(end - ninv)], θinit)
+             for i in (draw_samples - numensemble):draw_samples]
+        luxar = [chain(t', θ[i], st)[1] for i in 1:numensemble]
+        # only need for size
+        θinit = collect(ComponentArrays.ComponentArray(θinit))
+    elseif chain isa Flux.Chain
+        θinit, re1 = Flux.destructure(chain)
+        out = re1.([samples[i][1:(end - ninv)]
+                    for i in (draw_samples - numensemble):draw_samples])
+        luxar = collect(out[i](t') for i in eachindex(out))
+    else
+        throw(error("Only Lux.AbstractExplicitLayer and Flux.Chain neural networks are supported"))
+    end
+
+    # contructing ensemble predictions
+    ensemblecurves = Vector{}[]
+    # check if NN output is more than 1
+    numoutput = size(luxar[1])[1]
+    if numoutput > 1
+        # Initialize a vector to store the separated outputs for each output dimension
+        output_matrices = [Vector{Vector{Float32}}() for _ in 1:numoutput]
+
+        # Loop through each element in `luxar`
+        for element in luxar
+            for i in 1:numoutput
+                push!(output_matrices[i], element[i, :])  # Append the i-th output (i-th row) to the i-th output_matrices
+            end
+        end
+
+        for r in 1:numoutput
+            ensem_r = hcat(output_matrices[r]...)'
+            ensemblecurve_r = prob.u0[r] .+
+                              [Particles(ensem_r[:, i]) for i in 1:length(t)] .*
+                              (t .- prob.tspan[1])
+            push!(ensemblecurves, ensemblecurve_r)
+        end
+
+    else
+        ensemblecurve = prob.u0 .+
+                        [Particles(reduce(vcat, luxar)[:, i]) for i in 1:length(t)] .*
+                        (t .- prob.tspan[1])
+        push!(ensemblecurves, ensemblecurve)
+    end
+
+    nnparams = length(θinit)
+    estimnnparams = [Particles(reduce(hcat, samples)[i, :]) for i in 1:nnparams]
+
+    if ninv == 0
+        estimated_params = [nothing]
+    else
+        estimated_params = [Particles(reduce(hcat, samples[(end - ninv + 1):end])[i, :])
+                            for i in (nnparams + 1):(nnparams + ninv)]
+    end
+
+    BPINNsolution(fullsolution, ensemblecurves, estimnnparams, estimated_params)
+end
\ No newline at end of file
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 9bbd316a3b..5e842f1b49 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -24,6 +24,8 @@ using DomainSets
 using Symbolics
 using Symbolics: wrap, unwrap, arguments, operation, symtype
 using SymbolicUtils
+using AdvancedHMC, LogDensityProblems, LinearAlgebra, Functors, MCMCChains
+using MonteCarloMeasurements
 using SymbolicUtils.Code
 using SymbolicUtils: Prewalk, Postwalk, Chain
 import ModelingToolkit: value, nameof, toexpr, build_expr, expand_derivatives
@@ -55,6 +57,8 @@ include("transform_inf_integral.jl")
 include("loss_function_generation.jl")
 include("discretize.jl")
 include("neural_adapter.jl")
+include("advancedHMC_MCMC.jl")
+include("BPINN_ode.jl")
 
 export NNODE, TerminalPDEProblem, NNPDEHan, NNPDENS, NNRODE,
        KolmogorovPDEProblem, NNKolmogorov, NNStopping, ParamKolmogorovPDEProblem,
@@ -68,6 +72,6 @@ export NNODE, TerminalPDEProblem, NNPDEHan, NNPDENS, NNRODE,
        build_symbolic_equation, build_symbolic_loss_function, symbolic_discretize,
        AbstractAdaptiveLoss, NonAdaptiveLoss, GradientScaleAdaptiveLoss,
        MiniMaxAdaptiveLoss,
-       LogOptions
+       LogOptions, ahmc_bayesian_pinn_ode, BNNODE
 
 end # module
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
new file mode 100644
index 0000000000..df174e2538
--- /dev/null
+++ b/src/advancedHMC_MCMC.jl
@@ -0,0 +1,606 @@
+mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
+    P <: Vector{<:Distribution},
+    D <:
+    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}},
+}
+    dim::Int
+    prob::DiffEqBase.ODEProblem
+    chain::C
+    st::S
+    strategy::ST
+    dataset::D
+    priors::P
+    phystd::Vector{Float64}
+    l2std::Vector{Float64}
+    autodiff::Bool
+    physdt::Float64
+    extraparams::Int
+    init_params::I
+
+    function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
+        dataset,
+        priors, phystd, l2std, autodiff, physdt, extraparams,
+        init_params::AbstractVector)
+        new{
+            typeof(chain),
+            Nothing,
+            typeof(strategy),
+            typeof(init_params),
+            typeof(priors),
+            typeof(dataset),
+        }(dim,
+            prob,
+            chain,
+            nothing, strategy,
+            dataset,
+            priors,
+            phystd,
+            l2std,
+            autodiff,
+            physdt,
+            extraparams,
+            init_params)
+    end
+    function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy,
+        dataset,
+        priors, phystd, l2std, autodiff, physdt, extraparams,
+        init_params::NamedTuple)
+        new{
+            typeof(chain),
+            typeof(st),
+            typeof(strategy),
+            typeof(init_params),
+            typeof(priors),
+            typeof(dataset),
+        }(dim,
+            prob,
+            chain, st, strategy,
+            dataset, priors,
+            phystd, l2std,
+            autodiff,
+            physdt,
+            extraparams,
+            init_params)
+    end
+end
+
+"""
+cool function to convert parameter's vector to ComponentArray of parameters (for Lux Chain: vector of samples -> Lux ComponentArrays)
+"""
+function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
+    @assert length(ps_new) == Lux.parameterlength(ps)
+    i = 1
+    function get_ps(x)
+        z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
+        i += length(x)
+        return z
+    end
+    return Functors.fmap(get_ps, ps)
+end
+
+function LogDensityProblems.logdensity(Tar::LogTargetDensity, θ)
+    return physloglikelihood(Tar, θ) + priorweights(Tar, θ) + L2LossData(Tar, θ)
+end
+
+LogDensityProblems.dimension(Tar::LogTargetDensity) = Tar.dim
+
+function LogDensityProblems.capabilities(::LogTargetDensity)
+    LogDensityProblems.LogDensityOrder{1}()
+end
+
+"""
+L2 loss loglikelihood(needed for ODE parameter estimation)
+"""
+function L2LossData(Tar::LogTargetDensity, θ)
+    # check if dataset is provided
+    if Tar.dataset isa Vector{Nothing} || Tar.extraparams == 0
+        return 0
+    else
+        # matrix(each row corresponds to vector u's rows)
+        nn = Tar(Tar.dataset[end], θ[1:(length(θ) - Tar.extraparams)])
+
+        L2logprob = 0
+        for i in 1:length(Tar.prob.u0)
+            # for u[i] ith vector must be added to dataset,nn[1,:] is the dx in lotka_volterra
+            L2logprob += logpdf(MvNormal(nn[i, :],
+                    LinearAlgebra.Diagonal(abs2.(Tar.l2std[i] .*
+                                                 ones(length(Tar.dataset[i]))))),
+                Tar.dataset[i])
+        end
+        return L2logprob
+    end
+end
+
+"""
+physics loglikelihood over problem timespan + dataset timepoints
+"""
+function physloglikelihood(Tar::LogTargetDensity, θ)
+    f = Tar.prob.f
+    p = Tar.prob.p
+    tspan = Tar.prob.tspan
+    autodiff = Tar.autodiff
+    strategy = Tar.strategy
+
+    # parameter estimation chosen or not
+    if Tar.extraparams > 0
+        ode_params = Tar.extraparams == 1 ?
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)][1] :
+                     θ[((length(θ) - Tar.extraparams) + 1):length(θ)]
+    else
+        ode_params = p == SciMLBase.NullParameters() ? [] : p
+    end
+
+    return getlogpdf(strategy, Tar, f, autodiff, tspan, ode_params, θ)
+end
+
+function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::Bool,
+    tspan,
+    ode_params, θ)
+    if Tar.dataset isa Vector{Nothing}
+        t = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
+    else
+        t = vcat(collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2]),
+            Tar.dataset[end])
+    end
+
+    sum(innerdiff(Tar, f, autodiff, t, θ,
+        ode_params))
+end
+
+function getlogpdf(strategy::StochasticTraining,
+    Tar::LogTargetDensity,
+    f,
+    autodiff::Bool,
+    tspan,
+    ode_params,
+    θ)
+    if Tar.dataset isa Vector{Nothing}
+        t = [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)]
+    else
+        t = vcat([(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)],
+            Tar.dataset[end])
+    end
+
+    sum(innerdiff(Tar, f, autodiff, t, θ,
+        ode_params))
+end
+
+function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f,
+    autodiff::Bool,
+    tspan,
+    ode_params, θ)
+    function integrand(t::Number, θ)
+        innerdiff(Tar, f, autodiff, [t], θ, ode_params)
+    end
+    intprob = IntegralProblem(integrand, tspan[1], tspan[2], θ; nout = length(Tar.prob.u0))
+    sol = solve(intprob, QuadGKJL(); abstol = strategy.abstol, reltol = strategy.reltol)
+    sum(sol.u)
+end
+
+function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f,
+    autodiff::Bool,
+    tspan,
+    ode_params, θ)
+    minT = tspan[1]
+    maxT = tspan[2]
+
+    weights = strategy.weights ./ sum(strategy.weights)
+
+    N = length(weights)
+    points = strategy.points
+
+    difference = (maxT - minT) / N
+
+    data = Float64[]
+    for (index, item) in enumerate(weights)
+        temp_data = rand(1, trunc(Int, points * item)) .* difference .+ minT .+
+                    ((index - 1) * difference)
+        data = append!(data, temp_data)
+    end
+
+    if Tar.dataset isa Vector{Nothing}
+        t = data
+    else
+        t = vcat(data,
+            Tar.dataset[end])
+    end
+
+    sum(innerdiff(Tar, f, autodiff, t, θ,
+        ode_params))
+end
+
+"""
+MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ 
+"""
+function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
+    ode_params)
+
+    # Tar used for phi and LogTargetDensity object attributes access
+    out = Tar(t, θ[1:(length(θ) - Tar.extraparams)])
+
+    # # reject samples case(write clear reason why)
+    if any(isinf, out[:, 1]) || any(isinf, ode_params)
+        return -Inf
+    end
+
+    # this is a vector{vector{dx,dy}}(handle case single u(float passed))
+    if length(out[:, 1]) == 1
+        physsol = [f(out[:, i][1],
+            ode_params,
+            t[i])
+                   for i in 1:length(out[1, :])]
+    else
+        physsol = [f(out[:, i],
+            ode_params,
+            t[i])
+                   for i in 1:length(out[1, :])]
+    end
+    physsol = reduce(hcat, physsol)
+
+    nnsol = NNodederi(Tar, t, θ[1:(length(θ) - Tar.extraparams)], autodiff)
+
+    vals = nnsol .- physsol
+
+    # N dimensional vector if N outputs for NN(each row has logpdf of i[i] where u is vector of dependant variables)
+    return [logpdf(MvNormal(vals[i, :],
+            LinearAlgebra.Diagonal(abs2.(Tar.phystd[i] .*
+                                         ones(length(vals[i, :]))))),
+        zeros(length(vals[i, :]))) for i in 1:length(Tar.prob.u0)]
+end
+
+"""
+prior logpdf for NN parameters + ODE constants
+"""
+function priorweights(Tar::LogTargetDensity, θ)
+    allparams = Tar.priors
+    # nn weights
+    nnwparams = allparams[1]
+
+    if Tar.extraparams > 0
+        # Vector of ode parameters priors
+        invpriors = allparams[2:end]
+
+        invlogpdf = sum(logpdf(invpriors[length(θ) - i + 1], θ[i])
+                        for i in (length(θ) - Tar.extraparams + 1):length(θ); init = 0.0)
+
+        return (invlogpdf
+                +
+                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)]))
+    else
+        return logpdf(nnwparams, θ)
+    end
+end
+
+function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params)
+    θ, st = Lux.setup(Random.default_rng(), chain)
+    return init_params, chain, st
+end
+
+function generate_Tar(chain::Lux.AbstractExplicitLayer, init_params::Nothing)
+    θ, st = Lux.setup(Random.default_rng(), chain)
+    return θ, chain, st
+end
+
+function generate_Tar(chain::Flux.Chain, init_params)
+    θ, re = Flux.destructure(chain)
+    return init_params, re, nothing
+end
+
+function generate_Tar(chain::Flux.Chain, init_params::Nothing)
+    θ, re = Flux.destructure(chain)
+    # find_good_stepsize,phasepoint takes only float64
+    return θ, re, nothing
+end
+
+"""
+nn OUTPUT AT t,θ ~ phi(t,θ)
+"""
+function (f::LogTargetDensity{C, S})(t::AbstractVector,
+    θ) where {C <: Optimisers.Restructure, S}
+    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* f.chain(θ)(adapt(parameterless_type(θ), t'))
+end
+
+function (f::LogTargetDensity{C, S})(t::AbstractVector,
+    θ) where {C <: Lux.AbstractExplicitLayer, S}
+    θ = vector_to_parameters(θ, f.init_params)
+    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st)
+    ChainRulesCore.@ignore_derivatives f.st = st
+    f.prob.u0 .+ (t' .- f.prob.tspan[1]) .* y
+end
+
+function (f::LogTargetDensity{C, S})(t::Number,
+    θ) where {C <: Optimisers.Restructure, S}
+    #  must handle paired odes hence u0 broadcasted
+    f.prob.u0 .+ (t - f.prob.tspan[1]) * f.chain(θ)(adapt(parameterless_type(θ), [t]))
+end
+
+function (f::LogTargetDensity{C, S})(t::Number,
+    θ) where {C <: Lux.AbstractExplicitLayer, S}
+    θ = vector_to_parameters(θ, f.init_params)
+    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
+    ChainRulesCore.@ignore_derivatives f.st = st
+    f.prob.u0 .+ (t .- f.prob.tspan[1]) .* y
+end
+
+"""
+similar to ode_dfdx() in NNODE/ode_solve.jl
+"""
+function NNodederi(phi::LogTargetDensity, t::AbstractVector, θ, autodiff::Bool)
+    if autodiff
+        hcat(ForwardDiff.derivative.(ti -> phi(ti, θ), t)...)
+    else
+        (phi(t .+ sqrt(eps(eltype(t))), θ) - phi(t, θ)) ./ sqrt(eps(eltype(t)))
+    end
+end
+
+function kernelchoice(Kernel, MCMCkwargs)
+    if Kernel == HMCDA
+        δ, λ = MCMCkwargs[:δ], MCMCkwargs[:λ]
+        Kernel(δ, λ)
+    elseif Kernel == NUTS
+        δ, max_depth, Δ_max = MCMCkwargs[:δ], MCMCkwargs[:max_depth], MCMCkwargs[:Δ_max]
+        Kernel(δ, max_depth = max_depth, Δ_max = Δ_max)
+    else
+        # HMC
+        n_leapfrog = MCMCkwargs[:n_leapfrog]
+        Kernel(n_leapfrog)
+    end
+end
+
+function integratorchoice(Integratorkwargs, initial_ϵ)
+    Integrator = Integratorkwargs[:Integrator]
+    if Integrator == JitteredLeapfrog
+        jitter_rate = Integratorkwargs[:jitter_rate]
+        Integrator(initial_ϵ, jitter_rate)
+    elseif Integrator == TemperedLeapfrog
+        tempering_rate = Integratorkwargs[:tempering_rate]
+        Integrator(initial_ϵ, tempering_rate)
+    else
+        Integrator(initial_ϵ)
+    end
+end
+
+function adaptorchoice(Adaptor, mma, ssa)
+    if Adaptor != AdvancedHMC.NoAdaptation()
+        Adaptor(mma, ssa)
+    else
+        AdvancedHMC.NoAdaptation()
+    end
+end
+
+"""
+```julia
+ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
+                    dataset = [nothing],init_params = nothing, 
+                    draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
+                    phystd = [0.05], priorsNNw = (0.0, 2.0),
+                    param = [], nchains = 1, autodiff = false, Kernel = HMC,
+                    Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+                        Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+                    Integratorkwargs = (Integrator = Leapfrog,),
+                    MCMCkwargs = (n_leapfrog = 30,),
+                    progress = false, verbose = false)
+```
+!!! warn
+
+    Note that ahmc_bayesian_pinn_ode() only supports ODEs which are written in the out-of-place form, i.e.
+    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the ahmc_bayesian_pinn_ode()
+    will exit with an error.
+
+## Example
+linear = (u, p, t) -> -u / p[1] + exp(t / p[2]) * cos(t)
+tspan = (0.0, 10.0)
+u0 = 0.0
+p = [5.0, -5.0]
+prob = ODEProblem(linear, u0, tspan, p)
+
+# CREATE DATASET (Necessity for accurate Parameter estimation)
+sol = solve(prob, Tsit5(); saveat = 0.05)
+u = sol.u[1:100]
+time = sol.t[1:100]
+
+# dataset and BPINN create
+x̂ = collect(Float64, Array(u) + 0.05 * randn(size(u)))
+dataset = [x̂, time]
+
+chainflux1 = Flux.Chain(Flux.Dense(1, 5, tanh), Flux.Dense(5, 5, tanh), Flux.Dense(5, 1)
+
+# simply solving ode here hence better to not pass dataset(uses ode params specified in prob)
+fh_mcmc_chainflux1, fhsamplesflux1, fhstatsflux1 = ahmc_bayesian_pinn_ode(prob,chainflux1,
+                                                                          dataset = dataset,
+                                                                          draw_samples = 1500,
+                                                                          l2std = [0.05],
+                                                                          phystd = [0.05],
+                                                                          priorsNNw = (0.0,3.0))
+
+# solving ode + estimating parameters hence dataset needed to optimize parameters upon + Pior Distributions for ODE params
+fh_mcmc_chainflux2, fhsamplesflux2, fhstatsflux2 = ahmc_bayesian_pinn_ode(prob,chainflux1,
+                                                                          dataset = dataset,
+                                                                          draw_samples = 1500,
+                                                                          l2std = [0.05],
+                                                                          phystd = [0.05],
+                                                                          priorsNNw = (0.0,3.0),
+                                                                          param = [Normal(6.5,0.5),Normal(-3,0.5)])
+
+## NOTES 
+Dataset is required for accurate Parameter estimation + solving equations
+Incase you are only solving the Equations for solution, do not provide dataset
+
+## Positional Arguments
+* `prob`: DEProblem(out of place and the function signature should be f(u,p,t)
+* `chain`: Lux/Flux Neural Netork which would be made the Bayesian PINN
+
+## Keyword Arguments
+* `strategy`: The training strategy used to choose the points for the evaluations. By default GridTraining is used with given physdt discretization.
+* `dataset`: Vector containing Vectors of corresponding u,t values 
+* `init_params`: intial parameter values for BPINN (ideally for multiple chains different initializations preferred)
+* `nchains`: number of chains you want to sample (random initialisation of params by default)
+* `draw_samples`: number of samples to be drawn in the MCMC algorithms (warmup samples are ~2/3 of draw samples)
+* `l2std`: standard deviation of BPINN predicition against L2 losses/Dataset
+* `phystd`: standard deviation of BPINN predicition against Chosen Underlying ODE System
+* `priorsNNw`: Vector of [mean, std] for BPINN parameter. Weights and Biases of BPINN are Normal Distributions by default
+* `param`: Vector of chosen ODE parameters Distributions in case of Inverse problems.
+* `autodiff`: Boolean Value for choice of Derivative Backend(default is numerical)
+* `physdt`: Timestep for approximating ODE in it's Time domain. (1/20.0 by default)
+
+# AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
+* `Kernel`: Choice of MCMC Sampling Algorithm (AdvancedHMC.jl implemenations HMC/NUTS/HMCDA)
+* `Integratorkwargs`: A NamedTuple containing the chosen integrator and its keyword Arguments, as follows :
+    * `Integrator`: https://turinglang.org/AdvancedHMC.jl/stable/
+    * `jitter_rate`: https://turinglang.org/AdvancedHMC.jl/stable/
+    * `tempering_rate`: https://turinglang.org/AdvancedHMC.jl/stable/
+* `Adaptorkwargs`: A NamedTuple containing the chosen Adaptor, it's Metric and targetacceptancerate, as follows :
+    * `Adaptor`: https://turinglang.org/AdvancedHMC.jl/stable/
+    * `Metric`: https://turinglang.org/AdvancedHMC.jl/stable/
+    * `targetacceptancerate`: Target percentage(in decimal) of iterations in which the proposals were accepted(0.8 by default)
+* `MCMCargs`: A NamedTuple containing all the chosen MCMC kernel's(HMC/NUTS/HMCDA) Arguments, as follows :
+    * `n_leapfrog`: number of leapfrog steps for HMC
+    * `δ`: target acceptance probability for NUTS and HMCDA
+    * `λ`: target trajectory length for HMCDA
+    * `max_depth`: Maximum doubling tree depth (NUTS)
+    * `Δ_max`: Maximum divergence during doubling tree (NUTS)
+* `progress`: controls whether to show the progress meter or not.
+* `verbose`: controls the verbosity. (Sample call args in AHMC)
+
+"""
+
+"""
+dataset would be (x̂,t)
+priors: pdf for W,b + pdf for ODE params
+"""
+function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
+    strategy = GridTraining, dataset = [nothing],
+    init_params = nothing, draw_samples = 1000,
+    physdt = 1 / 20.0, l2std = [0.05],
+    phystd = [0.05], priorsNNw = (0.0, 2.0),
+    param = [], nchains = 1, autodiff = false,
+    Kernel = HMC,
+    Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+        Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+    Integratorkwargs = (Integrator = Leapfrog,),
+    MCMCkwargs = (n_leapfrog = 30,),
+    progress = false, verbose = false)
+
+    # NN parameter prior mean and variance(PriorsNN must be a tuple)
+    if isinplace(prob)
+        throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
+    end
+
+    strategy = strategy == GridTraining ? strategy(physdt) : strategy
+
+    if dataset != [nothing] &&
+       (length(dataset) < 2 || !(dataset isa Vector{<:Vector{<:AbstractFloat}}))
+        throw(error("Invalid dataset. dataset would be timeseries (x̂,t) where type: Vector{Vector{AbstractFloat}"))
+    end
+
+    if dataset != [nothing] && param == []
+        println("Dataset is only needed for Parameter Estimation + Forward Problem, not in only Forward Problem case.")
+    elseif dataset == [nothing] && param != []
+        throw(error("Dataset Required for Parameter Estimation."))
+    end
+
+    if chain isa Lux.AbstractExplicitLayer || chain isa Flux.Chain
+        # Flux-vector, Lux-Named Tuple
+        initial_nnθ, recon, st = generate_Tar(chain, init_params)
+    else
+        error("Only Lux.AbstractExplicitLayer and Flux.Chain neural networks are supported")
+    end
+
+    if nchains > Threads.nthreads()
+        throw(error("number of chains is greater than available threads"))
+    elseif nchains < 1
+        throw(error("number of chains must be greater than 1"))
+    end
+
+    # eltype(physdt) cause needs Float64 for find_good_stepsize
+    if chain isa Lux.AbstractExplicitLayer
+        # Lux chain(using component array later as vector_to_parameter need namedtuple)
+        initial_θ = collect(eltype(physdt),
+            vcat(ComponentArrays.ComponentArray(initial_nnθ)))
+    else
+        initial_θ = collect(eltype(physdt), initial_nnθ)
+    end
+
+    # adding ode parameter estimation
+    nparameters = length(initial_θ)
+    ninv = length(param)
+    priors = [
+        MvNormal(priorsNNw[1] * ones(nparameters),
+            LinearAlgebra.Diagonal(abs2.(priorsNNw[2] .* ones(nparameters)))),
+    ]
+
+    # append Ode params to all paramvector
+    if ninv > 0
+        # shift ode params(initialise ode params by prior means)
+        initial_θ = vcat(initial_θ, [Distributions.params(param[i])[1] for i in 1:ninv])
+        priors = vcat(priors, param)
+        nparameters += ninv
+    end
+
+    t0 = prob.tspan[1]
+    # dimensions would be total no of params,initial_nnθ for Lux namedTuples
+    ℓπ = LogTargetDensity(nparameters, prob, recon, st, strategy, dataset, priors,
+        phystd, l2std, autodiff, physdt, ninv, initial_nnθ)
+
+    try
+        ℓπ(t0, initial_θ[1:(nparameters - ninv)])
+    catch err
+        if isa(err, DimensionMismatch)
+            throw(DimensionMismatch("Dimensions of the initial u0 and chain should match"))
+        else
+            throw(err)
+        end
+    end
+
+    Adaptor, Metric, targetacceptancerate = Adaptorkwargs[:Adaptor],
+    Adaptorkwargs[:Metric], Adaptorkwargs[:targetacceptancerate]
+
+    # Define Hamiltonian system (nparameters ~ dimensionality of the sampling space)
+    metric = Metric(nparameters)
+    hamiltonian = Hamiltonian(metric, ℓπ, ForwardDiff)
+
+    # parallel sampling option
+    if nchains != 1
+        # Cache to store the chains
+        chains = Vector{Any}(undef, nchains)
+        statsc = Vector{Any}(undef, nchains)
+        samplesc = Vector{Any}(undef, nchains)
+
+        Threads.@threads for i in 1:nchains
+            # each chain has different initial NNparameter values(better posterior exploration)
+            initial_θ = vcat(randn(nparameters - ninv),
+                initial_θ[(nparameters - ninv + 1):end])
+            initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
+            integrator = integratorchoice(Integratorkwargs, initial_ϵ)
+            adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
+                StepSizeAdaptor(targetacceptancerate, integrator))
+
+            MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
+            Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
+            samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples, adaptor;
+                progress = progress, verbose = verbose)
+
+            samplesc[i] = samples
+            statsc[i] = stats
+            mcmc_chain = Chains(hcat(samples...)')
+            chains[i] = mcmc_chain
+        end
+
+        return chains, samplesc, statsc
+    else
+        initial_ϵ = find_good_stepsize(hamiltonian, initial_θ)
+        integrator = integratorchoice(Integratorkwargs, initial_ϵ)
+        adaptor = adaptorchoice(Adaptor, MassMatrixAdaptor(metric),
+            StepSizeAdaptor(targetacceptancerate, integrator))
+
+        MCMC_alg = kernelchoice(Kernel, MCMCkwargs)
+        Kernel = AdvancedHMC.make_kernel(MCMC_alg, integrator)
+        samples, stats = sample(hamiltonian, Kernel, initial_θ, draw_samples,
+            adaptor; progress = progress, verbose = verbose)
+
+        # return a chain(basic chain),samples and stats
+        matrix_samples = hcat(samples...)
+        mcmc_chain = MCMCChains.Chains(matrix_samples')
+        return mcmc_chain, samples, stats
+    end
+end
\ No newline at end of file
diff --git a/src/ode_solve.jl b/src/ode_solve.jl
index 84e6daf29a..4ad843283f 100644
--- a/src/ode_solve.jl
+++ b/src/ode_solve.jl
@@ -30,6 +30,7 @@ of the physics-informed neural network which is used as a solver for a standard
 ## Example
 
 ```julia
+u0 = [1.0, 1.0]
     ts=[t for t in 1:100]
     (u_, t_) = (analytical_func(ts), ts)
     function additional_loss(phi, θ)
@@ -120,23 +121,21 @@ mutable struct ODEPhi{C, T, U, S}
     end
 end
 
-function generate_phi_θ(chain::Lux.AbstractExplicitLayer, t, u0, init_params::Nothing)
-    θ, st = Lux.setup(Random.default_rng(), chain)
-    ODEPhi(chain, t, u0, st), ComponentArrays.ComponentArray(θ)
-end
-
 function generate_phi_θ(chain::Lux.AbstractExplicitLayer, t, u0, init_params)
     θ, st = Lux.setup(Random.default_rng(), chain)
-    ODEPhi(chain, t, u0, st), ComponentArrays.ComponentArray(init_params)
-end
-
-function generate_phi_θ(chain::Flux.Chain, t, u0, init_params::Nothing)
-    θ, re = Flux.destructure(chain)
-    ODEPhi(re, t, u0), θ
+    if init_params === nothing
+        init_params = ComponentArrays.ComponentArray(θ)
+    else
+        init_params = ComponentArrays.ComponentArray(init_params)
+    end
+    ODEPhi(chain, t, u0, st), init_params
 end
 
 function generate_phi_θ(chain::Flux.Chain, t, u0, init_params)
     θ, re = Flux.destructure(chain)
+    if init_params === nothing
+        init_params = θ
+    end
     ODEPhi(re, t, u0), init_params
 end
 
@@ -185,7 +184,7 @@ end
 
 function (f::ODEPhi{C, T, U})(t::AbstractVector,
                               θ) where {C <: Optimisers.Restructure, T, U}
-    f.u0 .+ (t .- f.t0) .* f.chain(θ)(adapt(parameterless_type(θ), t'))
+    f.u0 .+ (t .- f.t0)' .* f.chain(θ)(adapt(parameterless_type(θ), t'))
 end
 
 """
@@ -258,6 +257,7 @@ Representation of the loss function, parametric on the training strategy `strate
 function generate_loss(strategy::QuadratureTraining, phi, f, autodiff::Bool, tspan, p,
                        batch)
     integrand(t::Number, θ) = abs2(inner_loss(phi, f, autodiff, t, θ, p))
+
     integrand(ts, θ) = [abs2(inner_loss(phi, f, autodiff, t, θ, p)) for t in ts]
     @assert batch == 0 # not implemented
 
@@ -290,6 +290,7 @@ function generate_loss(strategy::StochasticTraining, phi, f, autodiff::Bool, tsp
     function loss(θ, _)
         ts = adapt(parameterless_type(θ),
                    [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)])
+
         if batch
             sum(abs2, inner_loss(phi, f, autodiff, ts, θ, p))
         else
@@ -307,13 +308,13 @@ function generate_loss(strategy::WeightedIntervalTraining, phi, f, autodiff::Boo
     weights = strategy.weights ./ sum(strategy.weights)
 
     N = length(weights)
-    samples = strategy.samples
+    points = strategy.points
 
     difference = (maxT - minT) / N
 
     data = Float64[]
     for (index, item) in enumerate(weights)
-        temp_data = rand(1, trunc(Int, samples * item)) .* difference .+ minT .+
+        temp_data = rand(1, trunc(Int, points * item)) .* difference .+ minT .+
                     ((index - 1) * difference)
         data = append!(data, temp_data)
     end
@@ -330,10 +331,24 @@ function generate_loss(strategy::WeightedIntervalTraining, phi, f, autodiff::Boo
     return loss
 end
 
+function evaluate_tstops_loss(phi, f, autodiff::Bool, tstops, p, batch)
+
+    # sum(abs2,inner_loss(t,θ) for t in ts) but Zygote generators are broken
+    function loss(θ, _)
+        if batch
+            sum(abs2, inner_loss(phi, f, autodiff, tstops, θ, p))
+        else
+            sum(abs2, [inner_loss(phi, f, autodiff, t, θ, p) for t in tstops])
+        end
+    end
+    return loss
+end
+
 function generate_loss(strategy::QuasiRandomTraining, phi, f, autodiff::Bool, tspan)
     error("QuasiRandomTraining is not supported by NNODE since it's for high dimensional spaces only. Use StochasticTraining instead.")
 end
 
+
 struct NNODEInterpolation{T <: ODEPhi, T2}
     phi::T
     θ::T2
@@ -364,7 +379,8 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem,
                             reltol = 1.0f-3,
                             verbose = false,
                             saveat = nothing,
-                            maxiters = nothing)
+                            maxiters = nothing, 
+                            tstops = nothing)
     u0 = prob.u0
     tspan = prob.tspan
     f = prob.f
@@ -429,9 +445,27 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem,
     function total_loss(θ, _)
         L2_loss = inner_f(θ, phi)
         if !(additional_loss isa Nothing)
-            return additional_loss(phi, θ) + L2_loss
+            L2_loss = L2_loss + additional_loss(phi, θ) 
         end
-        L2_loss
+        if !(tstops isa Nothing)
+            num_tstops_points = length(tstops)
+            tstops_loss_func = evaluate_tstops_loss(phi, f, autodiff, tstops, p, batch) 
+            tstops_loss = tstops_loss_func(θ, phi)
+            if strategy isa GridTraining 
+                num_original_points = length(tspan[1]:(strategy.dx):tspan[2])
+            elseif strategy isa Union{WeightedIntervalTraining, StochasticTraining}
+                num_original_points = strategy.points
+            else
+                return L2_loss + tstops_loss
+            end
+            
+            total_original_loss = L2_loss * num_original_points
+            total_tstops_loss = tstops_loss * num_original_points
+            total_points = num_original_points + num_tstops_points
+            L2_loss = (total_original_loss + total_tstops_loss) / total_points
+
+        end
+        return L2_loss
     end
 
     # Choice of Optimization Algo for Training Strategies
@@ -440,7 +474,6 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem,
     else
         Optimization.AutoZygote()
     end
-
     # Creates OptimizationFunction Object from total_loss
     optf = OptimizationFunction(total_loss, opt_algo)
 
diff --git a/src/pinn_types.jl b/src/pinn_types.jl
index a4a03ddc5f..90ef0fa65f 100644
--- a/src/pinn_types.jl
+++ b/src/pinn_types.jl
@@ -67,7 +67,7 @@ methodology.
   should only be used to more directly impose functional information in the training problem,
   for example imposing the boundary condition by the test function formulation.
 * `adaptive_loss`: the choice for the adaptive loss function. See the
-  [adaptive loss page](@id adaptive_loss) for more details. Defaults to no adaptivity.
+  [adaptive loss page](@ref adaptive_loss) for more details. Defaults to no adaptivity.
 * `additional_loss`: a function `additional_loss(phi, θ, p_)` where `phi` are the neural
   network trial solutions, `θ` are the weights of the neural network(s), and `p_` are the
   hyperparameters of the `OptimizationProblem`. If `param_estim = true`, then `θ` additionally
@@ -106,8 +106,8 @@ struct PhysicsInformedNN{T, P, PH, DER, PE, AL, ADA, LOG, K} <: SciMLBase.Abstra
                                            logger = nothing,
                                            log_options = LogOptions(),
                                            iteration = nothing,
-                                           kwargs...) where {iip}
-        multioutput = typeof(chain) <: AbstractArray
+                                           kwargs...)
+        multioutput = chain isa AbstractArray
 
         if phi === nothing
             if multioutput
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index e2e696a61e..1eafeec15a 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -312,7 +312,7 @@ such that the total number of sampled points is equivalent to the given samples
 ## Positional Arguments
 
 * `weights`: A vector of weights that should sum to 1, representing the proportion of samples at each interval.
-* `samples`: the total number of samples that we want, across the entire time span
+* `points`: the total number of samples that we want, across the entire time span
 
 ## Limitations
 
@@ -320,15 +320,15 @@ This training strategy can only be used with ODEs (`NNODE`).
 """
 struct WeightedIntervalTraining{T} <: AbstractGridfreeStrategy
     weights::Vector{T}
-    samples::Int
+    points::Int
 end
 
-function WeightedIntervalTraining(weights, samples)
-    WeightedIntervalTraining(weights, samples)
+function WeightedIntervalTraining(weights, points)
+    WeightedIntervalTraining(weights, points)
 end
 
 function get_loss_function(loss_function, train_set, eltypeθ,
                            strategy::WeightedIntervalTraining;
                            τ = nothing)
     loss = (θ) -> mean(abs2, loss_function(train_set, θ))
-end
+end
\ No newline at end of file
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
new file mode 100644
index 0000000000..4873d5f457
--- /dev/null
+++ b/test/BPINN_Tests.jl
@@ -0,0 +1,348 @@
+# # Testing Code
+using Test, MCMCChains
+using ForwardDiff, Distributions, OrdinaryDiffEq
+using Flux, OptimizationOptimisers, AdvancedHMC, Lux
+using Statistics, Random, Functors, ComponentArrays
+using NeuralPDE, MonteCarloMeasurements
+
+# note that current testing bounds can be easily further tightened but have been inflated for support for Julia build v1
+# on latest Julia version it performs much better for below tests
+Random.seed!(100)
+
+# for sampled params->lux ComponentArray
+function vector_to_parameters(ps_new::AbstractVector, ps::NamedTuple)
+    @assert length(ps_new) == Lux.parameterlength(ps)
+    i = 1
+    function get_ps(x)
+        z = reshape(view(ps_new, i:(i + length(x) - 1)), size(x))
+        i += length(x)
+        return z
+    end
+    return Functors.fmap(get_ps, ps)
+end
+
+## PROBLEM-1 (WITHOUT PARAMETER ESTIMATION)
+linear_analytic = (u0, p, t) -> u0 + sin(2 * π * t) / (2 * π)
+linear = (u, p, t) -> cos(2 * π * t)
+tspan = (0.0, 2.0)
+u0 = 0.0
+prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan)
+p = prob.p
+
+# Numerical and Analytical Solutions: testing ahmc_bayesian_pinn_ode()
+ta = range(tspan[1], tspan[2], length = 300)
+u = [linear_analytic(u0, nothing, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.02 * randn(size(u)))
+time = vec(collect(Float64, ta))
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# testing points for solve() call must match saveat(1/50.0) arg
+ta0 = range(tspan[1], tspan[2], length = 101)
+u1 = [linear_analytic(u0, nothing, ti) for ti in ta0]
+x̂1 = collect(Float64, Array(u1) + 0.02 * randn(size(u1)))
+time1 = vec(collect(Float64, ta0))
+physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+init1, re1 = destructure(chainflux)
+θinit, st = Lux.setup(Random.default_rng(), chainlux)
+
+fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux,
+    draw_samples = 2500)
+
+fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux,
+    draw_samples = 2500)
+
+# can change training strategies by adding this to call (Quadratuer and GridTraining show good results but stochastics sampling techniques perform bad)
+# strategy = QuadratureTraining(; quadrature_alg = QuadGKJL(),
+#     reltol = 1e-6,
+#     abstol = 1e-3, maxiters = 1000,
+#     batch = 0)
+
+alg = NeuralPDE.BNNODE(chainflux, draw_samples = 2500)
+sol1flux = solve(prob, alg)
+
+alg = NeuralPDE.BNNODE(chainlux, draw_samples = 2500)
+sol1lux = solve(prob, alg)
+
+# testing points
+t = time
+# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+out = re1.(fhsamples1[(end - 500):end])
+yu = collect(out[i](t') for i in eachindex(out))
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+θ = [vector_to_parameters(fhsamples1[i], θinit) for i in 2000:2500]
+luxar = [chainlux(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# --------------------- ahmc_bayesian_pinn_ode() call
+@test mean(abs.(x̂ .- meanscurve1)) < 0.05
+@test mean(abs.(physsol1 .- meanscurve1)) < 0.005
+@test mean(abs.(x̂ .- meanscurve2)) < 0.05
+@test mean(abs.(physsol1 .- meanscurve2)) < 0.005
+
+#--------------------- solve() call 
+@test mean(abs.(x̂1 .- sol1flux.ensemblesol[1])) < 0.05
+@test mean(abs.(physsol0_1 .- sol1flux.ensemblesol[1])) < 0.05
+@test mean(abs.(x̂1 .- sol1lux.ensemblesol[1])) < 0.05
+@test mean(abs.(physsol0_1 .- sol1lux.ensemblesol[1])) < 0.05
+
+## PROBLEM-1 (WITH PARAMETER ESTIMATION)
+linear_analytic = (u0, p, t) -> u0 + sin(p * t) / (p)
+linear = (u, p, t) -> cos(p * t)
+tspan = (0.0, 2.0)
+u0 = 0.0
+p = 2 * pi
+prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), u0, tspan, p)
+
+# Numerical and Analytical Solutions
+sol1 = solve(prob, Tsit5(); saveat = 0.01)
+u = sol1.u
+time = sol1.t
+
+# BPINN AND TRAINING DATASET CREATION(dataset must be defined only inside problem timespan!)
+ta = range(tspan[1], tspan[2], length = 100)
+u = [linear_analytic(u0, p, ti) for ti in ta]
+x̂ = collect(Float64, Array(u) + 0.2 * randn(size(u)))
+time = vec(collect(Float64, ta))
+dataset = [x̂, time]
+physsol1 = [linear_analytic(prob.u0, p, time[i]) for i in eachindex(time)]
+
+# testing points for solve call(saveat=1/50.0 ∴ at t = collect(eltype(saveat), prob.tspan[1]:saveat:prob.tspan[2] internally estimates)
+ta0 = range(tspan[1], tspan[2], length = 101)
+u1 = [linear_analytic(u0, p, ti) for ti in ta0]
+x̂1 = collect(Float64, Array(u1) + 0.2 * randn(size(u1)))
+time1 = vec(collect(Float64, ta0))
+physsol1_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+chainflux1 = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
+chainlux1 = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
+init1, re1 = destructure(chainflux1)
+θinit, st = Lux.setup(Random.default_rng(), chainlux1)
+
+fh_mcmc_chain1, fhsamples1, fhstats1 = ahmc_bayesian_pinn_ode(prob, chainflux1,
+    dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0,
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(9,
+            0.5),
+    ])
+
+fh_mcmc_chain2, fhsamples2, fhstats2 = ahmc_bayesian_pinn_ode(prob, chainlux1,
+    dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)])
+
+alg = NeuralPDE.BNNODE(chainflux1, dataset = dataset,
+    draw_samples = 2500, physdt = 1 / 50.0,
+    priorsNNw = (0.0, 3.0),
+    param = [LogNormal(9, 0.5)])
+
+sol2flux = solve(prob, alg)
+
+alg = NeuralPDE.BNNODE(chainlux1, dataset = dataset,
+    draw_samples = 2500,
+    physdt = 1 / 50.0,
+    priorsNNw = (0.0,
+        3.0),
+    param = [
+        LogNormal(9,
+            0.5),
+    ])
+
+sol2lux = solve(prob, alg)
+
+# testing points
+t = time
+# Mean of last 500 sampled parameter's curves(flux and lux chains)[Ensemble predictions]
+out = re1.([fhsamples1[i][1:22] for i in 2000:2500])
+yu = collect(out[i](t') for i in eachindex(out))
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+θ = [vector_to_parameters(fhsamples2[i][1:(end - 1)], θinit) for i in 2000:2500]
+luxar = [chainlux1(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+# --------------------- ahmc_bayesian_pinn_ode() call  
+@test mean(abs.(physsol1 .- meanscurve1)) < 0.15
+@test mean(abs.(physsol1 .- meanscurve2)) < 0.15
+
+# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+@test abs(p - mean([fhsamples2[i][23] for i in 2000:2500])) < abs(0.35 * p)
+@test abs(p - mean([fhsamples1[i][23] for i in 2000:2500])) < abs(0.35 * p)
+
+#-------------------------- solve() call  
+@test mean(abs.(physsol1_1 .- sol2flux.ensemblesol[1])) < 8e-2
+@test mean(abs.(physsol1_1 .- sol2lux.ensemblesol[1])) < 8e-2
+
+# ESTIMATED ODE PARAMETERS (NN1 AND NN2)
+@test abs(p - sol2flux.estimated_ode_params[1]) < abs(0.15 * p)
+@test abs(p - sol2lux.estimated_ode_params[1]) < abs(0.15 * p)
+
+## PROBLEM-2
+linear = (u, p, t) -> u / p + exp(t / p) * cos(t)
+tspan = (0.0, 10.0)
+u0 = 0.0
+p = -5.0
+prob = ODEProblem(linear, u0, tspan, p)
+linear_analytic = (u0, p, t) -> exp(t / p) * (u0 + sin(t))
+
+# SOLUTION AND CREATE DATASET
+sol = solve(prob, Tsit5(); saveat = 0.1)
+u = sol.u
+time = sol.t
+x̂ = u .+ (u .* 0.2) .* randn(size(u))
+dataset = [x̂, time]
+t = sol.t
+physsol1 = [linear_analytic(prob.u0, p, t[i]) for i in eachindex(t)]
+
+ta0 = range(tspan[1], tspan[2], length = 501)
+u1 = [linear_analytic(u0, p, ti) for ti in ta0]
+time1 = vec(collect(Float64, ta0))
+physsol2 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
+
+chainflux12 = Flux.Chain(Flux.Dense(1, 6, tanh), Flux.Dense(6, 6, tanh),
+    Flux.Dense(6, 1)) |> Flux.f64
+chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
+init1, re1 = destructure(chainflux12)
+θinit, st = Lux.setup(Random.default_rng(), chainlux12)
+
+fh_mcmc_chainflux12, fhsamplesflux12, fhstatsflux12 = ahmc_bayesian_pinn_ode(prob,
+    chainflux12,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [
+        0.03],
+    priorsNNw = (0.0,
+        10.0))
+
+fh_mcmc_chainflux22, fhsamplesflux22, fhstatsflux22 = ahmc_bayesian_pinn_ode(prob,
+    chainflux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [
+        0.03,
+    ],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ])
+
+fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0))
+
+fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ])
+
+alg = NeuralPDE.BNNODE(chainflux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [
+        0.03,
+    ],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ])
+
+sol3flux_pestim = solve(prob, alg)
+
+alg = NeuralPDE.BNNODE(chainlux12,
+    dataset = dataset,
+    draw_samples = 1500,
+    l2std = [0.03],
+    phystd = [0.03],
+    priorsNNw = (0.0,
+        10.0),
+    param = [
+        Normal(-7,
+            4),
+    ])
+
+sol3lux_pestim = solve(prob, alg)
+
+# testing timepoints
+t = sol.t
+#------------------------------ ahmc_bayesian_pinn_ode() call 
+# Mean of last 500 sampled parameter's curves(flux chains)[Ensemble predictions]
+out = re1.([fhsamplesflux12[i][1:61] for i in 1000:1500])
+yu = [out[i](t') for i in eachindex(out)]
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1_1 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+out = re1.([fhsamplesflux22[i][1:61] for i in 1000:1500])
+yu = [out[i](t') for i in eachindex(out)]
+fluxmean = [mean(vcat(yu...)[:, i]) for i in eachindex(t)]
+meanscurve1_2 = prob.u0 .+ (t .- prob.tspan[1]) .* fluxmean
+
+@test mean(abs.(sol.u .- meanscurve1_1)) < 1e-2
+@test mean(abs.(physsol1 .- meanscurve1_1)) < 1e-2
+@test mean(abs.(sol.u .- meanscurve1_2)) < 5e-2
+@test mean(abs.(physsol1 .- meanscurve1_2)) < 5e-2
+
+# estimated parameters(flux chain)
+param1 = mean(i[62] for i in fhsamplesflux22[1000:1500])
+@test abs(param1 - p) < abs(0.3 * p)
+
+# Mean of last 500 sampled parameter's curves(lux chains)[Ensemble predictions]
+θ = [vector_to_parameters(fhsampleslux12[i], θinit) for i in 1000:1500]
+luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2_1 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+θ = [vector_to_parameters(fhsampleslux22[i][1:(end - 1)], θinit) for i in 1000:1500]
+luxar = [chainlux12(t', θ[i], st)[1] for i in 1:500]
+luxmean = [mean(vcat(luxar...)[:, i]) for i in eachindex(t)]
+meanscurve2_2 = prob.u0 .+ (t .- prob.tspan[1]) .* luxmean
+
+@test mean(abs.(sol.u .- meanscurve2_1)) < 1e-1
+@test mean(abs.(physsol1 .- meanscurve2_1)) < 1e-1
+@test mean(abs.(sol.u .- meanscurve2_2)) < 5e-2
+@test mean(abs.(physsol1 .- meanscurve2_2)) < 5e-2
+
+# estimated parameters(lux chain)
+param1 = mean(i[62] for i in fhsampleslux22[1000:1500])
+@test abs(param1 - p) < abs(0.3 * p)
+
+#-------------------------- solve() call 
+# (flux chain)
+@test mean(abs.(physsol2 .- sol3flux_pestim.ensemblesol[1])) < 0.15
+# estimated parameters(flux chain)
+param1 = sol3flux_pestim.estimated_ode_params[1]
+@test abs(param1 - p) < abs(0.45 * p)
+
+# (lux chain)
+@test mean(abs.(physsol2 .- sol3lux_pestim.ensemblesol[1])) < 0.15
+# estimated parameters(lux chain)
+param1 = sol3lux_pestim.estimated_ode_params[1]
+@test abs(param1 - p) < abs(0.45 * p)
\ No newline at end of file
diff --git a/test/NNODE_tests.jl b/test/NNODE_tests.jl
index 1839f57c7d..7e3dec5222 100644
--- a/test/NNODE_tests.jl
+++ b/test/NNODE_tests.jl
@@ -207,6 +207,22 @@ sol = solve(prob, NeuralPDE.NNODE(luxchain, opt; batch = true), verbose = true,
             abstol = 1.0f-8, dt = 1 / 5.0f0)
 @test sol.errors[:l2] < 0.5
 
+#Example 3 ODEs system
+linear = (u, p, t) -> [cos(2pi * t), sin(2pi * t)]
+tspan = (0.0f0, 1.0f0)
+u0 = [0.0f0, -1.0f0 / 2pi]
+linear_analytic = (u0, p, t) -> [sin(2pi * t) / 2pi, -cos(2pi * t) / 2pi]
+odefunction = ODEFunction(linear, analytic = linear_analytic)
+prob = ODEProblem(odefunction, u0, tspan)
+chain = Flux.Chain(Dense(1, 10, σ), Dense(10, 2))
+opt = OptimizationOptimisers.Adam(0.1)
+alg = NeuralPDE.NNODE(chain, opt; autodiff = false)
+
+sol = solve(prob,
+            alg, verbose = true, dt = 1 / 40.0f0,
+            maxiters = 2000, abstol = 1.0f-7)
+@test sol.errors[:l2] < 0.5
+
 # WeightedIntervalTraining(Lux Chain)
 function f(u, p, t)
     [p[1] * u[1] - p[2] * u[1] * u[2], -p[3] * u[2] + p[4] * u[1] * u[2]]
@@ -223,9 +239,9 @@ chain = Lux.Chain(Lux.Dense(1, N, func), Lux.Dense(N, N, func), Lux.Dense(N, N,
 
 opt = Optimisers.Adam(0.01)
 weights = [0.7, 0.2, 0.1]
-samples = 200
+points = 200
 alg = NeuralPDE.NNODE(chain, opt, autodiff = false,
-                      strategy = NeuralPDE.WeightedIntervalTraining(weights, samples))
+                      strategy = NeuralPDE.WeightedIntervalTraining(weights, points))
 sol = solve(prob_oop, alg, verbose = true, maxiters = 100000, saveat = 0.01)
 
 @test abs(mean(sol) - mean(true_sol)) < 0.2
diff --git a/test/NNODE_tstops_test.jl b/test/NNODE_tstops_test.jl
new file mode 100644
index 0000000000..82761f60bd
--- /dev/null
+++ b/test/NNODE_tstops_test.jl
@@ -0,0 +1,67 @@
+using OrdinaryDiffEq, Lux, OptimizationOptimisers, Test, Statistics, Optimisers, NeuralPDE
+
+function fu(u, p, t)
+    [p[1] * u[1] - p[2] * u[1] * u[2], -p[3] * u[2] + p[4] * u[1] * u[2]]
+end
+
+p = [1.5, 1.0, 3.0, 1.0]
+u0 = [1.0, 1.0]
+tspan = (0.0, 3.0)
+points1 = [rand() for i=1:280]
+points2 = [rand() + 1 for i=1:80]
+points3 = [rand() + 2 for i=1:40]
+addedPoints = vcat(points1, points2, points3)
+
+saveat = 0.01
+maxiters = 30000
+
+prob_oop = ODEProblem{false}(fu, u0, tspan, p)
+true_sol = solve(prob_oop, Tsit5(), saveat = saveat)
+func = Lux.σ
+N = 12
+chain = Lux.Chain(Lux.Dense(1, N, func), Lux.Dense(N, N, func), Lux.Dense(N, N, func),
+                    Lux.Dense(N, N, func), Lux.Dense(N, length(u0)))
+
+opt = Optimisers.Adam(0.01)
+threshold = 0.2
+
+#bad choices for weights, samples and dx so that the algorithm will fail without the added points
+weights = [0.3, 0.3, 0.4]
+points = 3
+dx = 1.0
+
+#Grid Training without added points (difference between solutions should be high)
+alg = NeuralPDE.NNODE(chain, opt, autodiff = false, strategy = NeuralPDE.GridTraining(dx))
+sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat)
+
+@test abs(mean(sol) - mean(true_sol)) > threshold
+
+#Grid Training with added points (difference between solutions should be low)
+alg = NeuralPDE.NNODE(chain, opt, autodiff = false, strategy = NeuralPDE.GridTraining(dx))
+sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
+
+@test abs(mean(sol) - mean(true_sol)) < threshold
+
+#WeightedIntervalTraining without added points (difference between solutions should be high)
+alg = NeuralPDE.NNODE(chain, opt, autodiff = false, strategy = NeuralPDE.WeightedIntervalTraining(weights, points))
+sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat)
+
+@test abs(mean(sol) - mean(true_sol)) > threshold
+
+#WeightedIntervalTraining with added points (difference between solutions should be low)
+alg = NeuralPDE.NNODE(chain, opt, autodiff = false, strategy = NeuralPDE.WeightedIntervalTraining(weights, points))
+sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
+
+@test abs(mean(sol) - mean(true_sol)) < threshold
+
+#StochasticTraining without added points (difference between solutions should be high)
+alg = NeuralPDE.NNODE(chain, opt, autodiff = false, strategy = NeuralPDE.StochasticTraining(points))
+sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat)
+
+@test abs(mean(sol) - mean(true_sol)) > threshold
+
+#StochasticTraining with added points (difference between solutions should be low)
+alg = NeuralPDE.NNODE(chain, opt, autodiff = false, strategy = NeuralPDE.StochasticTraining(points))
+sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
+
+@test abs(mean(sol) - mean(true_sol)) < threshold
diff --git a/test/runtests.jl b/test/runtests.jl
index 75bf98a91f..afe7186a28 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -15,12 +15,19 @@ function dev_subpkg(subpkg)
 end
 
 @time begin
+    #fixes 682
+    if GROUP == "All" || GROUP == "ODEBPINN"
+        @time @safetestset "Bpinn ODE solver" begin include("BPINN_Tests.jl") end
+    end
+
     if GROUP == "All" || GROUP == "NNPDE1"
         @time @safetestset "NNPDE" begin include("NNPDE_tests.jl") end
     end
     if GROUP == "All" || GROUP == "NNODE"
         @time @safetestset "NNODE" begin include("NNODE_tests.jl") end
+        @time @safetestset "NNODE_tstops" begin include("NNODE_tstops_test.jl") end
     end
+
     if GROUP == "All" || GROUP == "NNPDE2"
         @time @safetestset "Additional Loss" begin include("additional_loss_tests.jl") end
         @time @safetestset "Direction Function Approximation" begin include("direct_function_tests.jl") end
@@ -54,4 +61,4 @@ end
         @safetestset "NNPDE_gpu" begin include("NNPDE_tests_gpu.jl") end
         @safetestset "NNPDE_gpu_Lux" begin include("NNPDE_tests_gpu_Lux.jl") end
     end
-end
+end
\ No newline at end of file