diff --git a/Project.toml b/Project.toml index 6de6ede..e9e8488 100644 --- a/Project.toml +++ b/Project.toml @@ -1,11 +1,12 @@ name = "PseudoTransientStokes" uuid = "47583623-8bd0-4afd-8f99-f1ff54a695ec" authors = ["Ludovic Raess , Ivan Utkin and contributors"] -version = "0.1.0" +version = "1.0.0" [deps] BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" ImplicitGlobalGrid = "4d7a3746-15be-11ea-1130-334b0c4f5fa0" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MAT = "23992714-dd62-5051-b70f-ba57cb901cac" diff --git a/README.md b/README.md index c7a7459..b22fb09 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,19 @@ Parallel (multi-) XPU iterative 2D and 3D incompressible Stokes flow solvers with viscous and Maxwell visco-elastic shear rheology. This software is part of the [**PTsolvers project**](https://ptsolvers.github.io/). -The aim of this project is to provide iterative solvers **assessing the scalability, performance, and robustness of the accelerated pseudo-transient method** with application to Stokes flow and mechancial processes. The solution strategy characterises as semi-iterative, implementing the second-order convergence acceleration as introduced by, e.g., \[[Frankel, 1950](https://doi.org/10.2307/2002770)\]. +The aim of this project is to provide iterative solvers **assessing the scalability, performance, and robustness of the accelerated pseudo-transient method** with application to Stokes flow and mechancial processes. The solution strategy characterises as semi-iterative, implementing the second-order convergence acceleration as introduced by, e.g., [Frankel (1950)](https://doi.org/10.2307/2002770). -This repository, together with [**PseudoTransientDiffusion.jl**](https://github.com/PTsolvers/PseudoTransientDiffusion.jl/), relates to the original research article draft submitted to the _**Journal XXX**_: +This repository, together with [**PseudoTransientDiffusion.jl**](https://github.com/PTsolvers/PseudoTransientDiffusion.jl/), relates to the original research article draft submitted to the _**Geoscientific Model Development**_ journal: ```tex -@article{raess2022, - title = {{ }}, - journal = {Journal XXX} +@Article{raess2022, + AUTHOR = {R\"ass, L. and Utkin, I. and Duretz, T. and Omlin, S. and Podladchikov, Y. Y.}, + TITLE = {Assessing the robustness and scalability of the accelerated pseudo-transient method towards exascale computing}, + JOURNAL = {Geoscientific Model Development Discussions}, + VOLUME = {2022}, + YEAR = {2022}, + PAGES = {1--46}, + URL = {https://gmd.copernicus.org/preprints/gmd-2021-411/}, + DOI = {10.5194/gmd-2021-411} } ``` diff --git a/output/out_Stokes2D_ve_aspect_ratio_param.txt b/output/out_Stokes2D_ve_aspect_ratio_param.txt new file mode 100644 index 0000000..db9322a --- /dev/null +++ b/output/out_Stokes2D_ve_aspect_ratio_param.txt @@ -0,0 +1,200 @@ +127 127 1 1.0 7493 5 8.949133719444429e-7 +127 127 1 0.9 6985 5 4.181419533545885e-7 +127 127 1 0.8 6985 5 7.875613751823342e-7 +127 127 1 0.7 6858 5 3.7557325527563354e-7 +127 127 1 0.6 7747 5 5.810437011692422e-7 +127 127 1 0.5 8890 5 6.690701429439299e-7 +127 127 1 0.4 10668 5 9.660892025202093e-7 +127 127 1 0.3 14224 5 4.83414048051695e-7 +127 127 1 0.2 20574 5 7.356075701915861e-7 +127 127 1 0.1 39243 5 5.931611081453011e-7 +255 127 2 1.0 40290 5 9.689716795449745e-7 +255 127 2 0.9 36210 5 8.824730578376933e-7 +255 127 2 0.8 32130 5 7.834884977722411e-7 +255 127 2 0.7 28305 5 8.878712118355355e-7 +255 127 2 0.6 24225 5 7.525784032748784e-7 +255 127 2 0.5 20145 5 8.837592258677685e-7 +255 127 2 0.4 16830 5 6.653892802484143e-7 +255 127 2 0.3 17595 5 9.97126568649744e-7 +255 127 2 0.2 27795 5 3.109979817956845e-7 +255 127 2 0.1 52020 5 9.678335959842161e-7 +511 127 4 1.0 131838 5 8.991267697482378e-7 +511 127 4 0.9 118552 5 9.057773613974623e-7 +511 127 4 0.8 105266 5 9.124082426576858e-7 +511 127 4 0.7 92491 5 9.207469467580272e-7 +511 127 4 0.6 79205 5 9.286647929562713e-7 +511 127 4 0.5 64897 5 9.384957748370229e-7 +511 127 4 0.4 52122 5 9.304819560945235e-7 +511 127 4 0.3 39347 5 5.894892345199578e-7 +511 127 4 0.2 28105 5 9.45420652110166e-7 +511 127 4 0.1 52633 5 2.9110225854316013e-7 +1023 127 8 1.0 111507 5 9.361251890050822e-7 +1023 127 8 0.9 100254 5 8.314605988965376e-7 +1023 127 8 0.8 93093 5 9.021172236269436e-7 +1023 127 8 0.7 82863 5 5.817231807359488e-7 +1023 127 8 0.6 71610 5 5.516853997779365e-7 +1023 127 8 0.5 57288 5 9.88164142720484e-7 +1023 127 8 0.4 51150 5 5.728016994878197e-7 +1023 127 8 0.3 49104 5 2.6270541005704357e-7 +1023 127 8 0.2 68541 5 5.86363972706685e-8 +1023 127 8 0.1 139128 5 7.685919230468523e-7 +255 255 1 1.0 14790 5 6.791522405730391e-7 +255 255 1 0.9 14025 5 4.395960984089908e-7 +255 255 1 0.8 14025 5 7.31753251678742e-7 +255 255 1 0.7 14025 5 4.163477559257886e-7 +255 255 1 0.6 15300 5 4.936447856025642e-7 +255 255 1 0.5 17595 5 6.133327342363607e-7 +255 255 1 0.4 22440 5 7.635308715871784e-7 +255 255 1 0.3 28305 5 4.662756467175016e-7 +255 255 1 0.2 41310 5 7.637852272206864e-7 +255 255 1 0.1 78030 5 8.243779230052118e-7 +511 255 2 1.0 47523 5 9.048588086115834e-7 +511 255 2 0.9 43435 5 8.592460324659496e-7 +511 255 2 0.8 38325 5 8.161696843568943e-7 +511 255 2 0.7 33726 5 7.53652344834529e-7 +511 255 2 0.6 28105 5 9.566665405154029e-7 +511 255 2 0.5 24528 5 5.868071476703658e-7 +511 255 2 0.4 26061 5 5.769205741627131e-7 +511 255 2 0.3 31171 5 7.719007292828865e-7 +511 255 2 0.2 45990 5 6.958651219444252e-7 +511 255 2 0.1 91469 5 5.937653206533137e-7 +1023 255 4 1.0 392833 5 9.864575117795227e-7 +1023 255 4 0.9 364188 5 9.850069904625935e-7 +1023 255 4 0.8 323268 5 9.898649520476326e-7 +1023 255 4 0.7 282348 5 9.98903309338416e-7 +1023 255 4 0.6 242451 5 9.327412158694644e-7 +1023 255 4 0.5 202554 5 9.275514061140826e-7 +1023 255 4 0.4 160611 5 9.254853185540378e-7 +1023 255 4 0.3 119691 5 9.094416913519604e-7 +1023 255 4 0.2 84909 5 7.902401527930736e-7 +1023 255 4 0.1 101277 5 4.2420961946284025e-7 +2047 255 8 1.0 331614 5 9.407841289680347e-7 +2047 255 8 0.9 298862 5 9.618348073861225e-7 +2047 255 8 0.8 264063 5 9.864200940173828e-7 +2047 255 8 0.7 233358 5 8.953426088146778e-7 +2047 255 8 0.6 204700 5 9.049949422592606e-7 +2047 255 8 0.5 184230 5 8.078278893769553e-7 +2047 255 8 0.4 161713 5 9.908782894745435e-7 +2047 255 8 0.3 137149 5 7.769415691154724e-7 +2047 255 8 0.2 98256 5 7.167327040519742e-7 +2047 255 8 0.1 167854 5 5.933066442117446e-7 +511 511 1 1.0 29127 5 5.563170854633726e-7 +511 511 1 0.9 28105 5 4.3736733059403713e-7 +511 511 1 0.8 27083 5 6.375102365405169e-7 +511 511 1 0.7 28105 5 4.223696693508142e-7 +511 511 1 0.6 30149 5 9.825989301822902e-7 +511 511 1 0.5 35259 5 5.315279912700201e-7 +511 511 1 0.4 43946 5 6.461611128592814e-7 +511 511 1 0.3 56721 5 4.654477839585659e-7 +511 511 1 0.2 82782 5 7.615527491400387e-7 +511 511 1 0.1 155344 5 5.833147538617798e-7 +1023 511 2 1.0 97185 5 8.512857924429395e-7 +1023 511 2 0.9 86955 5 9.954942646693146e-7 +1023 511 2 0.8 77748 5 9.338531033951888e-7 +1023 511 2 0.7 68541 5 8.67410598227796e-7 +1023 511 2 0.6 59334 5 7.73107909003622e-7 +1023 511 2 0.5 50127 5 6.663725924951892e-7 +1023 511 2 0.4 46035 5 5.993662061032392e-7 +1023 511 2 0.3 61380 5 6.839889752979761e-7 +1023 511 2 0.2 90024 5 6.354620164546036e-7 +1023 511 2 0.1 172887 5 4.073608559466756e-7 +2047 511 4 1.0 497421 5 9.794480542612262e-7 +2047 511 4 0.9 448293 5 9.814099808456513e-7 +2047 511 4 0.8 399165 5 9.917409441099138e-7 +2047 511 4 0.7 347990 5 9.716233930138064e-7 +2047 511 4 0.6 298862 5 9.85076705082809e-7 +2047 511 4 0.5 249734 5 9.410596173482327e-7 +2047 511 4 0.4 202653 5 9.430731654549624e-7 +2047 511 4 0.3 155572 5 8.505438204835805e-7 +2047 511 4 0.2 128961 5 6.450922252281872e-7 +2047 511 4 0.1 198559 5 2.971606873429217e-7 +4095 511 8 1.0 380835 5 9.813828483815118e-7 +4095 511 8 0.9 348075 5 9.334827938554041e-7 +4095 511 8 0.8 311220 5 9.485673333929995e-7 +4095 511 8 0.7 270270 5 9.83337622008546e-7 +4095 511 8 0.6 233415 5 9.278774998514253e-7 +4095 511 8 0.5 192465 5 9.796201601525925e-7 +4095 511 8 0.4 163800 5 8.765207853084336e-7 +4095 511 8 0.3 159705 5 7.408289813407761e-7 +4095 511 8 0.2 159705 5 5.325120649022499e-7 +4095 511 8 0.1 221130 5 2.773797450981498e-7 +1023 1023 1 1.0 58311 5 5.080232950235334e-7 +1023 1023 1 0.9 56265 5 4.2754048078567846e-7 +1023 1023 1 0.8 54219 5 5.957298452604008e-7 +1023 1023 1 0.7 56265 5 4.227832792123089e-7 +1023 1023 1 0.6 60357 5 9.158270106666292e-7 +1023 1023 1 0.5 70587 5 4.927867145848536e-7 +1023 1023 1 0.4 87978 5 5.938143045146796e-7 +1023 1023 1 0.3 111507 5 4.6080738203512136e-7 +1023 1023 1 0.2 165726 5 7.55085209557807e-7 +1023 1023 1 0.1 310992 5 5.771850734330881e-7 +2047 1023 2 1.0 157619 5 9.024666034771542e-7 +2047 1023 2 0.9 143290 5 9.142781594155181e-7 +2047 1023 2 0.8 126914 5 9.400428884771679e-7 +2047 1023 2 0.7 110538 5 9.661385150377673e-7 +2047 1023 2 0.6 94162 5 7.378644181315238e-7 +2047 1023 2 0.5 85974 5 3.525552763417923e-7 +2047 1023 2 0.4 92115 5 5.384007080824774e-7 +2047 1023 2 0.3 122820 5 6.350885506310499e-7 +2047 1023 2 0.2 178089 5 5.861690536689725e-7 +2047 1023 2 0.1 345943 5 3.076026961406958e-7 +4095 1023 4 1.0 1597052 5 9.807849780882133e-7 +4095 1023 4 0.9 1486486 5 9.85380521233787e-7 +4095 1023 4 0.8 1367731 5 9.642555965960354e-7 +4095 1023 4 0.7 1199835 5 9.87775660059297e-7 +4095 1023 4 0.6 1027845 5 9.753456303235292e-7 +4095 1023 4 0.5 859950 5 9.547029687145927e-7 +4095 1023 4 0.4 687960 5 9.944238863192944e-7 +4095 1023 4 0.3 515970 5 9.687230258231868e-7 +4095 1023 4 0.2 352170 5 9.211997900556518e-7 +4095 1023 4 0.1 380835 5 3.004563754238144e-7 +8191 1023 8 1.0 1130358 5 9.886797666971499e-7 +8191 1023 8 0.9 1023875 5 9.755058338619207e-7 +8191 1023 8 0.8 909201 5 9.765735836758394e-7 +8191 1023 8 0.7 810909 5 9.421201736910573e-7 +8191 1023 8 0.6 696235 5 9.33381055558157e-7 +8191 1023 8 0.5 597943 5 9.227497550958628e-7 +8191 1023 8 0.4 565179 5 8.787963445641398e-7 +8191 1023 8 0.3 524224 5 9.992063047854377e-7 +8191 1023 8 0.2 442314 5 9.255563467140268e-7 +8191 1023 8 0.1 573370 5 7.325327415445809e-7 +2047 2047 1 1.0 116679 5 4.87567555536393e-7 +2047 2047 1 0.9 112585 5 4.1904926880050874e-7 +2047 2047 1 0.8 108491 5 5.76360633680984e-7 +2047 2047 1 0.7 112585 5 4.2209983134023145e-7 +2047 2047 1 0.6 120773 5 8.848022272453102e-7 +2047 2047 1 0.5 139196 5 4.734848344181913e-7 +2047 2047 1 0.4 176042 5 5.688845836865335e-7 +2047 2047 1 0.3 223123 5 4.5678974136979957e-7 +2047 2047 1 0.2 331614 5 7.499449977674834e-7 +2047 2047 1 0.1 622288 5 5.728334991195086e-7 +4095 2047 2 1.0 147420 5 9.807672605107035e-7 +4095 2047 2 0.9 135135 5 4.042723561418304e-7 +4095 2047 2 0.8 126945 5 4.497623880515488e-7 +4095 2047 2 0.7 122850 5 3.294602843134487e-7 +4095 2047 2 0.6 135135 5 2.403407029461205e-7 +4095 2047 2 0.5 155610 5 9.370393808041531e-7 +4095 2047 2 0.4 184275 5 5.219763587442667e-7 +4095 2047 2 0.3 237510 5 6.201819316490409e-7 +4095 2047 2 0.2 356265 5 5.758648981041689e-7 +4095 2047 2 0.1 692055 5 2.6508430058487347e-7 +8191 2047 4 1.0 1523526 5 9.750684611500487e-7 +8191 2047 4 0.9 1367897 5 9.768997214874987e-7 +8191 2047 4 0.8 1220459 5 9.665137032074935e-7 +8191 2047 4 0.7 1064830 5 9.933891609796304e-7 +8191 2047 4 0.6 917392 5 9.763406536071941e-7 +8191 2047 4 0.5 761763 5 9.807629362085003e-7 +8191 2047 4 0.4 614325 5 9.697127941270696e-7 +8191 2047 4 0.3 483269 5 7.714389261741339e-7 +8191 2047 4 0.2 458696 5 5.489689719037788e-7 +8191 2047 4 0.1 761763 5 2.9880142093997593e-7 +16383 2047 8 1.0 1261491 5 9.913309698241956e-7 +16383 2047 8 0.9 1146810 5 9.646188689210857e-7 +16383 2047 8 0.8 1015746 5 9.990420928017293e-7 +16383 2047 8 0.7 901065 5 9.602494666476408e-7 +16383 2047 8 0.6 786384 5 9.164044218107381e-7 +16383 2047 8 0.5 655320 5 9.630995134486998e-7 +16383 2047 8 0.4 557022 5 7.997760328955989e-7 +16383 2047 8 0.3 540639 5 6.929784137665239e-7 +16383 2047 8 0.2 606171 5 4.969339854074335e-7 +16383 2047 8 0.1 884682 5 1.2192983240272868e-7 diff --git a/scripts/Stokes2D_param.jl b/scripts/Stokes2D_param.jl index e0a741e..54bbe1c 100644 --- a/scripts/Stokes2D_param.jl +++ b/scripts/Stokes2D_param.jl @@ -17,6 +17,7 @@ using ParallelStencil using ParallelStencil.FiniteDifferences2D @static if USE_GPU @init_parallel_stencil(CUDA, Float64, 2) + CUDA.device!(gpu_id) # Set CUDA device else @init_parallel_stencil(Threads, Float64, 2) end @@ -136,8 +137,6 @@ end @views function Stokes2D() # Set random seed for reproducibility Random.seed!(1855 + nsub) - # Set CUDA device - CUDA.device!(gpu_id) # Physics lx, ly = 10.0, 10.0 # domain extends μs0 = 1.0 # matrix viscosity @@ -235,7 +234,7 @@ end end if do_save !ispath("../output") && mkpath("../output") - open("out_Stokes2D_$(simname)_param.txt","a") do io + open("../output/out_Stokes2D_$(simname)_param.txt","a") do io println(io, "$(vrpow) $(nsub) $(Re_mlt) $(iter) $(err_V) $(err_∇V)") end end diff --git a/scripts/Stokes2D_ve_param.jl b/scripts/Stokes2D_ve_param.jl new file mode 100644 index 0000000..0cbae07 --- /dev/null +++ b/scripts/Stokes2D_ve_param.jl @@ -0,0 +1,219 @@ +const USE_GPU = haskey(ENV, "USE_GPU" ) ? parse(Bool, ENV["USE_GPU"] ) : false +const do_viz = haskey(ENV, "DO_VIZ" ) ? parse(Bool, ENV["DO_VIZ"] ) : false +const do_save = haskey(ENV, "DO_SAVE" ) ? parse(Bool, ENV["DO_SAVE"] ) : false +const do_save_viz = haskey(ENV, "DO_SAVE_VIZ") ? parse(Bool, ENV["DO_SAVE_VIZ"]) : false +const gpu_id = haskey(ENV, "GPU_ID" ) ? parse(Int , ENV["GPU_ID"] ) : 0 +const ny = haskey(ENV, "RESOL" ) ? parse(Int , ENV["RESOL"] ) : 256 - 1 +const nsub = haskey(ENV, "NSUB" ) ? parse(Int , ENV["NSUB"] ) : 1 +const simname = haskey(ENV, "SIMNAME" ) ? ENV["SIMNAME"] : "" +const fact = haskey(ENV, "FACT" ) ? parse(Float64, ENV["FACT"] ) : 1.0 +### +using ParallelStencil +using ParallelStencil.FiniteDifferences2D +@static if USE_GPU + @init_parallel_stencil(CUDA, Float64, 2) + CUDA.device!(gpu_id) # Set CUDA device +else + @init_parallel_stencil(Threads, Float64, 2) +end +using Plots, Printf, Statistics, LinearAlgebra, MAT + +@parallel function smooth!(A2::Data.Array, A::Data.Array, fact::Data.Number) + @inn(A2) = @inn(A) + 1.0/4.1/fact*(@d2_xi(A) + @d2_yi(A)) + return +end + +@parallel function compute_maxloc!(Musτ2::Data.Array, Musτ::Data.Array) + @inn(Musτ2) = @maxloc(Musτ) + return +end + +macro Mu_eff() esc(:(1.0/(1.0/@all(Musτ) + 1.0/(G*dt)))) end +@parallel function compute_iter_params!(dτ_Rho::Data.Array, Gdτ::Data.Array, Musτ::Data.Array, Vpdτ::Data.Number, G::Data.Number, dt::Data.Number, Re::Data.Number, r::Data.Number, min_lxy::Data.Number) + @all(dτ_Rho) = Vpdτ*min_lxy/Re/@Mu_eff() + @all(Gdτ) = Vpdτ^2/@all(dτ_Rho)/(r+2) + return +end + +@parallel function assign_τ!(τxx::Data.Array, τyy::Data.Array, τxy::Data.Array, τxx_o::Data.Array, τyy_o::Data.Array, τxy_o::Data.Array) + @all(τxx_o) = @all(τxx) + @all(τyy_o) = @all(τyy) + @all(τxy_o) = @all(τxy) + return +end + +@parallel function compute_P!(∇V::Data.Array, Pt::Data.Array, Gdτ::Data.Array, Vx::Data.Array, Vy::Data.Array, r::Data.Number, dx::Data.Number, dy::Data.Number) + @all(∇V) = @d_xa(Vx)/dx + @d_ya(Vy)/dy + @all(Pt) = @all(Pt) - r*@all(Gdτ)*@all(∇V) + return +end + +macro Gr() esc(:( @all(Gdτ)/(G*dt) )) end +macro av_Gr() esc(:( @av(Gdτ)/(G*dt) )) end +@parallel function compute_τ!(τxx::Data.Array, τyy::Data.Array, τxy::Data.Array, τxx_o::Data.Array, τyy_o::Data.Array, τxy_o::Data.Array, Gdτ::Data.Array, Vx::Data.Array, Vy::Data.Array, Mus::Data.Array, G::Data.Number, dt::Data.Number, dx::Data.Number, dy::Data.Number) + @all(τxx) = (@all(τxx) + @all(τxx_o)* @Gr() + 2.0*@all(Gdτ)*(@d_xa(Vx)/dx))/(1.0 + @all(Gdτ)/@all(Mus) + @Gr()) + @all(τyy) = (@all(τyy) + @all(τyy_o)* @Gr() + 2.0*@all(Gdτ)*(@d_ya(Vy)/dy))/(1.0 + @all(Gdτ)/@all(Mus) + @Gr()) + @all(τxy) = (@all(τxy) + @all(τxy_o)*@av_Gr() + 2.0*@av(Gdτ)*(0.5*(@d_yi(Vx)/dy + @d_xi(Vy)/dx)))/(1.0 + @av(Gdτ)/@av(Mus) + @av_Gr()) + return +end + +@parallel function compute_dV!(dVx::Data.Array, dVy::Data.Array, Pt::Data.Array, τxx::Data.Array, τyy::Data.Array, τxy::Data.Array, dτ_Rho::Data.Array, dx::Data.Number, dy::Data.Number) + @all(dVx) = (@d_xi(τxx)/dx + @d_ya(τxy)/dy - @d_xi(Pt)/dx)*@av_xi(dτ_Rho) + @all(dVy) = (@d_yi(τyy)/dy + @d_xa(τxy)/dx - @d_yi(Pt)/dy)*@av_yi(dτ_Rho) + return +end + +@parallel function compute_V!(Vx::Data.Array, Vy::Data.Array, dVx::Data.Array, dVy::Data.Array) + @inn(Vx) = @inn(Vx) + @all(dVx) + @inn(Vy) = @inn(Vy) + @all(dVy) + return +end + +@parallel function compute_Res!(Rx::Data.Array, Ry::Data.Array, Pt::Data.Array, τxx::Data.Array, τyy::Data.Array, τxy::Data.Array, dx::Data.Number, dy::Data.Number) + @all(Rx) = @d_xi(τxx)/dx + @d_ya(τxy)/dy - @d_xi(Pt)/dx + @all(Ry) = @d_yi(τyy)/dy + @d_xa(τxy)/dx - @d_yi(Pt)/dy + return +end + +@parallel_indices (iy) function bc_x!(A::Data.Array) + A[1 , iy] = A[2 , iy] + A[end, iy] = A[end-1, iy] + return +end + +@parallel_indices (ix) function bc_y!(A::Data.Array) + A[ix, 1 ] = A[ix, 2 ] + A[ix, end] = A[ix, end-1] + return +end + +@views function Stokes2D() + # Physics + ly = 10.0 # domain extends + ξ = 1.0 # Maxwell relaxation time + μs0 = 1.0 # matrix viscosity + μsi = 1e-3 # inclusion viscosity + G = 1.0 # elastic shear modulus + εbg = 1.0 # background strain-rate + dt = μs0/(G*ξ) + # Derived physics + lx = nsub*ly + # Numerics + nx = nsub*(ny+1)-1 + nt = 5 # number of time steps + iterMax = 100nx # maximum number of pseudo-transient iterations + nout = 1nx # error checking frequency + Re = 5π # Reynolds number + r = 1.0 # Bulk to shear elastic modulus ratio + CFL = 0.87/sqrt(2) # CFL number # DEBUG was 0.9 + ε = 1e-6 # nonlinear absolute tolerence + # Derived numerics + dx, dy = lx/nx, ly/ny # cell sizes + # min_lxy = 0.4/nsub*min(lx,ly) + min_lxy = fact*min(lx,ly) + Vpdτ = min(dx,dy)*CFL + xc,yc,yv = LinRange(dx/2, lx - dx/2, nx), LinRange(dy/2, ly - dy/2, ny), LinRange(0, ly, ny+1) + # Array allocations + Pt = @zeros(nx ,ny ) + dτ_Rho = @zeros(nx ,ny ) + Gdτ = @zeros(nx ,ny ) + ∇V = @zeros(nx ,ny ) + τxx = @zeros(nx ,ny ) + τyy = @zeros(nx ,ny ) + τxy = @zeros(nx-1,ny-1) + τxx_o = @zeros(nx ,ny ) + τyy_o = @zeros(nx ,ny ) + τxy_o = @zeros(nx-1,ny-1) + dVx = @zeros(nx-1,ny-2) + dVy = @zeros(nx-2,ny-1) + Rx = @zeros(nx-1,ny-2) + Ry = @zeros(nx-2,ny-1) + Mus2 = @zeros(nx ,ny ) + Musτ = @zeros(nx ,ny ) + # Initial conditions + Rad2 = zeros(nx ,ny ) + Vx = zeros(nx+1,ny ) + Vy = zeros(nx ,ny+1) + Vx = Data.Array( -εbg.*[((ix-1)*dx -0.5*lx) for ix=1:size(Vx,1), iy=1:size(Vx,2)] ) + Vy = Data.Array( εbg.*[((iy-1)*dy -0.5*ly) for ix=1:size(Vy,1), iy=1:size(Vy,2)] ) + Mus = μs0*ones(nx,ny) + for isub=1:nsub + Rad2 .= [((ix-1)*dx +0.5*dx -0.5*ly -(isub-1)*ly)^2 + ((iy-1)*dy +0.5*dy -0.5*ly)^2 for ix=1:size(Rad2,1), iy=1:size(Rad2,2)] + Mus[Rad2.<1.0] .= μsi + end + Mus = Data.Array( Mus ) + Mus2 .= Mus + for ism=1:10 + @parallel smooth!(Mus2, Mus, 1.0) + Mus, Mus2 = Mus2, Mus + end + Musτ .= Mus + @parallel compute_maxloc!(Musτ, Mus) + @parallel (1:size(Musτ,2)) bc_x!(Musτ) + @parallel (1:size(Musτ,1)) bc_y!(Musτ) + # Time loop + @parallel compute_iter_params!(dτ_Rho, Gdτ, Musτ, Vpdτ, G, dt, Re, r, min_lxy) + t=0.0; ittot=0; evo_t=[]; evo_τyy=[]; err_evo1=[]; err_evo2=[]; err=2*ε + for it = 1:nt + err=2*ε; iter=0 + @parallel assign_τ!(τxx, τyy, τxy, τxx_o, τyy_o, τxy_o) + # Pseudo-transient iteration + while err > ε && iter <= iterMax + if (it==1 && iter==11) global wtime0 = Base.time() end + @parallel compute_P!(∇V, Pt, Gdτ, Vx, Vy, r, dx, dy) + @parallel compute_τ!(τxx, τyy, τxy, τxx_o, τyy_o, τxy_o, Gdτ, Vx, Vy, Mus, G, dt, dx, dy) + @parallel compute_dV!(dVx, dVy, Pt, τxx, τyy, τxy, dτ_Rho, dx, dy) + @parallel compute_V!(Vx, Vy, dVx, dVy) + @parallel (1:size(Vx,1)) bc_y!(Vx) + @parallel (1:size(Vy,2)) bc_x!(Vy) + iter += 1 + if iter % nout == 0 + @parallel compute_Res!(Rx, Ry, Pt, τxx, τyy, τxy, dx, dy) + Vmin, Vmax = minimum(Vx), maximum(Vx) + Pmin, Pmax = minimum(Pt), maximum(Pt) + norm_Rx = norm(Rx)/(Pmax-Pmin)*lx/sqrt(length(Rx)) + norm_Ry = norm(Ry)/(Pmax-Pmin)*ly/sqrt(length(Ry)) + norm_∇V = norm(∇V)/(Vmax-Vmin)*ly/sqrt(length(∇V)) + err = maximum([norm_Rx, norm_Ry, norm_∇V]) + if isnan(err) error("NaN") end + push!(err_evo1, maximum([norm_Rx, norm_Ry, norm_∇V])); push!(err_evo2,iter) + @printf("Step = %d, iter = %d, err = %1.3e [norm_Rx=%1.3e, norm_Ry=%1.3e, norm_∇V=%1.3e] \n", it, iter, err, norm_Rx, norm_Ry, norm_∇V) + end + end + ittot += iter; t += dt + # push!(evo_t, t); push!(evo_τyy, maximum(τyy)) + end + # Performance + wtime = Base.time() - wtime0 + A_eff = (6*2 + 1)/1e9*nx*ny*sizeof(Data.Number) # Effective main memory access per iteration [GB] (Lower bound of required memory access: Te has to be read and written: 2 whole-array memaccess; Ci has to be read: : 1 whole-array memaccess) + wtime_it = wtime/(ittot-10) # Execution time per iteration [s] + T_eff = A_eff/wtime_it # Effective memory throughput [GB/s] + @printf("Total iters = %d (%d steps), time = %1.3e sec (@ T_eff = %1.2f GB/s) \n", ittot, nt, wtime, round(T_eff, sigdigits=2)) + # Visualisation + if do_viz + p1 = heatmap(xc, yc, Array(τyy)', aspect_ratio=1, xlims=extrema(xc), ylims=extrema(yc), c=:viridis, title="τyy") + p2 = heatmap(xc, yv, Array(Vy)', aspect_ratio=1, xlims=extrema(xc), ylims=extrema(yv), c=:viridis, title="Vy") + p4 = heatmap(xc[2:end-1], yv[2:end-1], log10.(abs.(Array(Ry)')), aspect_ratio=1, xlims=extrema(xc[2:end-1]), ylims=extrema(yv[2:end-1]), c=:viridis, title="log10(Ry)") + p5 = scatter(err_evo2, err_evo1, legend=false, xlabel="# iterations", ylabel="log10(error)", linewidth=2, markershape=:circle, markersize=3, framestyle=:box, labels="max(error)", yaxis=:log10) + p3 = plot(evo_t, evo_τyy , legend=false, xlabel="time", ylabel="max(τyy)", linewidth=0, markershape=:circle, framestyle=:box, markersize=3) + #plot!(evo_t, 2.0.*εbg.*μs0.*(1.0.-exp.(.-evo_t.*G./μs0)), linewidth=2.0) # analytical solution + plot(p1, p2, p4, p5, p3) + outdir = joinpath("..", "out_visu", "NSUB_$(nsub)_RESOL_$(ny)") + !ispath("../out_visu") && mkdir("../out_visu") + png(outdir) + end + if do_save + !ispath("../output") && mkdir("../output") + open("../output/out_Stokes2D_ve_$(simname)_param.txt","a") do io + println(io, "$(nx) $(ny) $(nsub) $(fact) $(ittot) $(nt) $(err)") + end + end + if do_save_viz + outdir = joinpath("..", "out_visu", simname, "NSUB_$(nsub)_RESOL_$(ny)") + !ispath("../out_visu") && mkdir("../out_visu") + matwrite("../out_visu/Stokes_2D_ve.mat", Dict("Pt_2D"=> Array(Pt), "Mus_2D"=> Array(Mus), "Txy_2D"=> Array(τxy), "Vy_2D"=> Array(Vy), "dx_2D"=> dx, "dy_2D"=> dy); compress = true) + end + return +end + +Stokes2D() diff --git a/scripts/runme_Stokes2D_aspect_ratio.jl b/scripts/runme_Stokes2D_aspect_ratio.jl new file mode 100644 index 0000000..32b9116 --- /dev/null +++ b/scripts/runme_Stokes2D_aspect_ratio.jl @@ -0,0 +1,24 @@ +using DataStructures +params = OrderedDict( + # "NSUB" => (1,2,4,8), + # "RESOL" => (127, 255, 511, 1023, 2047) + "FACT" => (1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1), + "NSUB" => (1, 2, 4, 8), + "RESOL" => (127, 255, 511, 1023, 2047) +) +static_params = Dict( + "USE_GPU" => true, + "GPU_ID" => 7, + "DO_VIZ" => false, + "DO_SAVE" => true, + "DO_SAVE_VIZ" => false, + "SIMNAME" => "aspect_ratio" +) +par_names = Iterators.flatten([typeof(par)<:Tuple ? [par...] : [par] for par ∈ keys(params)]) +for par in Iterators.product(values(params)...) + par_values = Iterators.flatten(par) + println(collect(par_values)) + for (p,v) ∈ zip(par_names,par_values) ENV[p] = v end + for (p,v) ∈ static_params ENV[p] = v end + run(`julia --project -O3 --check-bounds=no Stokes2D_ve_param.jl`) +end diff --git a/scripts/stokes_3D/Stokes3D_ve_multixpu.jl b/scripts/stokes_3D/Stokes3D_ve_multixpu.jl index 2d6d4a8..4f6d7ae 100644 --- a/scripts/stokes_3D/Stokes3D_ve_multixpu.jl +++ b/scripts/stokes_3D/Stokes3D_ve_multixpu.jl @@ -136,7 +136,6 @@ end b_width = (8, 4, 4) # boundary width for comm/comp overlap # Derived numerics me, dims = init_global_grid(nx, ny, nz) # MPI initialisation - @static if USE_GPU select_device() end # select one GPU per MPI local rank (if >1 GPU per node) dx, dy, dz = lx/nx_g(), ly/ny_g(), lz/nz_g() # cell sizes max_lxyz = max(lx,ly,lz) Vpdτ = min(dx,dy,dz)*CFL @@ -265,11 +264,11 @@ end if (me==0) @printf("Total iters = %d (%d steps), time = %1.3e sec (@ T_eff = %1.2f GB/s) \n", ittot, nt, wtime, round(T_eff, sigdigits=2)) end # Visualisation if do_viz || do_save_viz - Pt_inn .= inn(Pt); gather!(Pt_inn, Pt_v) - Vz_inn .= av_zi(Vz); gather!(Vz_inn, Vz_v) - Rz_inn .= av_za(Rz); gather!(Rz_inn, Rz_v) - Mus_inn.= inn(Mus); gather!(Mus_inn, Mus_v) - τxz_inn.= av_xza(τxz); gather!(τxz_inn, τxz_v) + Pt_inn .= Array(inn(Pt)); gather!(Pt_inn, Pt_v) + Vz_inn .= Array(av_zi(Vz)); gather!(Vz_inn, Vz_v) + Rz_inn .= Array(av_za(Rz)); gather!(Rz_inn, Rz_v) + Mus_inn.= Array(inn(Mus)); gather!(Mus_inn, Mus_v) + τxz_inn.= Array(av_xza(τxz)); gather!(τxz_inn, τxz_v) if me==0 && do_viz p1 = heatmap(Xi_g, Zi_g, Pt_v[:,y_sl,:]', aspect_ratio=1, xlims=(Xi_g[1],Xi_g[end]), zlims=(Zi_g[1],Zi_g[end]), c=:viridis, title="Pressure") p2 = heatmap(Xi_g, Zi_g, Vz_v[:,y_sl,:]', aspect_ratio=1, xlims=(Xi_g[1],Xi_g[end]), zlims=(Zi_g[1],Zi_g[end]), c=:viridis, title="Vz") diff --git a/scripts/stokes_3D/Stokes3D_ve_multixpu_perf.jl b/scripts/stokes_3D/Stokes3D_ve_multixpu_perf.jl index d235e2f..67f9d76 100644 --- a/scripts/stokes_3D/Stokes3D_ve_multixpu_perf.jl +++ b/scripts/stokes_3D/Stokes3D_ve_multixpu_perf.jl @@ -140,7 +140,6 @@ end b_width = (16, 8, 4) # boundary width for comm/comp overlap # Derived numerics me, dims, nprocs = init_global_grid(nx, ny, nz) # MPI initialisation - @static if USE_GPU select_device() end # select one GPU per MPI local rank (if >1 GPU per node) dx, dy, dz = lx/nx_g(), ly/ny_g(), lz/nz_g() # cell sizes max_lxyz = max(lx,ly,lz) Vpdτ = min(dx,dy,dz)*CFL @@ -265,11 +264,11 @@ end if (me==0) @printf("Total iters = %d (%d steps), time = %1.3e sec (@ T_eff = %1.2f GB/s) \n", ittot, nt, wtime, round(T_eff, sigdigits=3)) end # Visualisation if do_viz || do_save_viz - Pt_inn .= inn(Pt); gather!(Pt_inn, Pt_v) - Vz_inn .= av_zi(Vz); gather!(Vz_inn, Vz_v) - Rz_inn .= av_za(Rz); gather!(Rz_inn, Rz_v) - Mus_inn.= inn(Mus); gather!(Mus_inn, Mus_v) - τxz_inn.= av_xza(τxz); gather!(τxz_inn, τxz_v) + Pt_inn .= Array(inn(Pt)); gather!(Pt_inn, Pt_v) + Vz_inn .= Array(av_zi(Vz)); gather!(Vz_inn, Vz_v) + Rz_inn .= Array(av_za(Rz)); gather!(Rz_inn, Rz_v) + Mus_inn.= Array(inn(Mus)); gather!(Mus_inn, Mus_v) + τxz_inn.= Array(av_xza(τxz)); gather!(τxz_inn, τxz_v) if me==0 && do_viz p1 = heatmap(Xi_g, Zi_g, Pt_v[:,y_sl,:]', aspect_ratio=1, xlims=(Xi_g[1],Xi_g[end]), zlims=(Zi_g[1],Zi_g[end]), c=:viridis, title="Pressure") p2 = heatmap(Xi_g, Zi_g, Vz_v[:,y_sl,:]', aspect_ratio=1, xlims=(Xi_g[1],Xi_g[end]), zlims=(Zi_g[1],Zi_g[end]), c=:viridis, title="Vz") diff --git a/scripts/stokes_3D/previous_damp/Stokes3D0_multixpu.jl b/scripts/stokes_3D/previous_damp/Stokes3D0_multixpu.jl index 42c9541..3db1a87 100644 --- a/scripts/stokes_3D/previous_damp/Stokes3D0_multixpu.jl +++ b/scripts/stokes_3D/previous_damp/Stokes3D0_multixpu.jl @@ -225,9 +225,9 @@ end if (me==0) @printf("Total steps = %d, err = %1.3e, time = %1.3e sec (@ T_eff = %1.2f GB/s) \n", iter, err, wtime, round(T_eff, sigdigits=2)) end # Visualisation if do_viz - Pt_inn .= inn(Pt); gather!(Pt_inn, Pt_v) - Vz_inn .= av_zi(Vz); gather!(Vz_inn, Vz_v) - Rz_inn .= av_za(Rz); gather!(Rz_inn, Rz_v) + Pt_inn .= Array(inn(Pt)); gather!(Pt_inn, Pt_v) + Vz_inn .= Array(av_zi(Vz)); gather!(Vz_inn, Vz_v) + Rz_inn .= Array(av_za(Rz)); gather!(Rz_inn, Rz_v) if (me==0) p1 = heatmap(Xi_g, Zi_g, Pt_v[:,y_sl,:]', aspect_ratio=1, xlims=(Xi_g[1],Xi_g[end]), zlims=(Zi_g[1],Zi_g[end]), c=:viridis, title="Pressure") p2 = heatmap(Xi_g, Zi_g, Vz_v[:,y_sl,:]', aspect_ratio=1, xlims=(Xi_g[1],Xi_g[end]), zlims=(Zi_g[1],Zi_g[end]), c=:viridis, title="Vz") diff --git a/visu/fig_stokes_systematics_aspect_ratio.png b/visu/fig_stokes_systematics_aspect_ratio.png new file mode 100644 index 0000000..5c1b4f9 Binary files /dev/null and b/visu/fig_stokes_systematics_aspect_ratio.png differ diff --git a/visu/viz_stokes_param_aspect.m b/visu/viz_stokes_param_aspect.m new file mode 100644 index 0000000..fcf2c68 --- /dev/null +++ b/visu/viz_stokes_param_aspect.m @@ -0,0 +1,38 @@ +clear;load('roma.mat') +%setup +fs = 10; % font size +% figure parameters +nt = 5; +sims = {'aspect_ratio'}; +fact = [1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]; +subs = [1 2 4 8]; % number of grid subdivisions +resol = [127, 255, 511, 1023, 2047]; +% vis +for ifig = 1:numel(sims) + figure(ifig);clf + set(gcf,'Units','centimeters','Position',[(5+13*(ifig-1)) 2 8 18],'PaperUnits','centimeters','PaperPosition',[0 0 8 18]); + data = readmatrix(sprintf('../output/out_Stokes2D_ve_%s_param.txt',sims{ifig})); +% data = data(:,2:end); + sz = [numel(fact),numel(subs),numel(resol)]; + nx = reshape(data(:,1),sz); + ny = reshape(data(:,2),sz); + nsub = reshape(data(:,3),sz); + nfact = reshape(data(:,4),sz); + iters = reshape(data(:,5),sz); + error = reshape(data(:,7),sz); + tiledlayout(numel(resol),1,'TileSpacing','compact','Padding','normal') + for iSub = 1:numel(resol) + nexttile + plot(min(iters(:,:,iSub))./nt./min(nx(:,:,iSub)),'-o','LineWidth',1.6,'MarkerSize',3) + xlim([0.9 4.1]); ylim([6 20]); + yticks([8 12 16]); yticklabels([8 12 16]) + if iSub == numel(resol);xticks(1:4);xticklabels(subs);xlabel('\bfaspect ratio (lx/ly)');else; xticklabels([]); end + text(0.02,0.88,['\bf(' char('a'+iSub-1) ')'],'Units','normalized','FontName','Courier') + text(0.63,0.88,['\bfny = ' num2str(resol(iSub))],'Units','normalized','FontName','Courier') + set(gca,'FontName','Courier','FontSize',fs,'YDir','normal','XAxisLocation','bottom','LineWidth',0.8) + if iSub == 3; ylabel('\bfiter_{tot}/nt/nx'); end + drawnow + end + % save figure + print('-dpng',sprintf('fig_stokes_systematics_%s.png',sims{ifig}),'-r300') +end \ No newline at end of file