From fba207fe50e5adc4e1f62f7be03275806a05919d Mon Sep 17 00:00:00 2001 From: Anton Smirnov Date: Fri, 6 Dec 2024 06:14:07 +0200 Subject: [PATCH] Add caching memory allocator (#708) --- Project.toml | 2 +- docs/make.jl | 4 +- docs/src/assets/gc-vram-breakdown.png | Bin 0 -> 48711 bytes docs/src/assets/with-caching-allocator.png | Bin 0 -> 6640 bytes docs/src/assets/without-caching-allocator.png | Bin 0 -> 7142 bytes docs/src/caching_allocator.md | 76 +++++++++ docs/src/execution_control.md | 27 ---- src/AMDGPU.jl | 2 +- src/array.jl | 23 ++- src/caching_allocator.jl | 151 ++++++++++++++++++ src/memory_record.jl | 48 ------ src/tls.jl | 36 ++--- 12 files changed, 265 insertions(+), 104 deletions(-) create mode 100644 docs/src/assets/gc-vram-breakdown.png create mode 100644 docs/src/assets/with-caching-allocator.png create mode 100644 docs/src/assets/without-caching-allocator.png create mode 100644 docs/src/caching_allocator.md delete mode 100644 docs/src/execution_control.md create mode 100644 src/caching_allocator.jl delete mode 100644 src/memory_record.jl diff --git a/Project.toml b/Project.toml index d32d76ffd..dbbe4c427 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "AMDGPU" uuid = "21141c5a-9bdb-4563-92ae-f87d6854732e" authors = ["Julian P Samaroo ", "Valentin Churavy ", "Anton Smirnov "] -version = "1.1.3" +version = "1.1.4" [deps] AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" diff --git a/docs/make.jl b/docs/make.jl index db6b78cab..0735739f1 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -27,10 +27,8 @@ function main() "Exceptions" => "exceptions.md", "Profiling" => "profiling.md", "Memory" => "memory.md", + "Caching Memory Allocator" => "caching_allocator.md", "Host-Call" => "hostcall.md", - "Intrinsics" => [ - "Execution Control" => "execution_control.md", - ], "Printing" => "printing.md", "Logging" => "logging.md", "API Reference" => "api.md" diff --git a/docs/src/assets/gc-vram-breakdown.png b/docs/src/assets/gc-vram-breakdown.png new file mode 100644 index 0000000000000000000000000000000000000000..a1ccd6698a64f3065f5166962a6dbbe993f15b32 GIT binary patch literal 48711 zcmd43cUY58w>GK>N)b^If>MGM=^aFB=v8``-c?GF(7PZaARxVi^iC+!y9x*d5ClT6 z5;{l;5L&*6_uKE@M}P0Ru6_PH{}8UpJkQK3v(~-tnMs(2ngTK5L&9sllFoE8L5GYz*0-}}&U8PnmbrN565kL$l#y@PEx#s|3&T*ooY z^1+n!z=p(^FO2Cihp_^5l~UJm?gn1NrMq?=U-FNC(Of@1&H2cN{^xIj);H#NK-d4n zgI5o5&;^vyl%GyRI9WwWsnLBql{77v@CQgz+}LXk=RZmG zv_W#koQsQVP?LcbVZ?A31X^8)CVU3|vu~gu5v5lt^_;TJyh`+>VcOL*A|lV<3{>Ie zXQCYiyDcy&D*K?|P!SS`_fuUBm;gj)l8_8G(o} zFF!v+81`6LIHiAO+<~&OvC(cYT~oLAs!>R&-qvJ+17EVk_4<*y+PeWwyk9QDLBc=5 zkRpOVo#2x{ZWI_I@#4p)n=3}8dgYgx)vTPHoQE3eBm?JfM zQ%CZIM=Ci{qtN8j-YH0uXSIkR{f!qxaw+!KBk#9=*lbIQ4gv{S9n2_Io%r(gtLn4y zak$xhrNvhg$ocQJ;oO%mUxtN+5rMcI=2|e#yKP1W1_qqsBFUxuH`0Or^^VgNjv%9F z&k%9U>LvNsAqgQQTyfOk*yUh{K|=k)Co+}bN!*!g$115)??G< z?#S6^6AuMn30Qo2%hpIrMn)z0>a%%gkg^~dDQS8}#_CX(S@FzCv@4JP%B`m%p`m6? zuB>CGbrTZ?%N*nW@jY=+N+a*zLSsvr`;{bR_MeSAo3L!myGusMA_qGGz(EZS4Oyot zz~F$7jy}H^uB*5W=VLOc66dW{u(Q8cvbZT`gkSTp*1uiBjSH96;i+Fs&HKy8~pTbrBBlR+PBQn6Zw` zN>lbPbb7ID4+ZI{USQ9&HX<(GZKRf;4-)LH3)Ix=6=%2D$8vj@c%QS4wmF_H!NWm) z%o9cGO46_M`5m?t63r_wKnlTK)A~MMlyH zrZVzUO%|~OzrxE1t#4yb<=`bJQ|&jVoTFcdY$DcDFJBf9GjmWi zxA7YP9RoRje3A+l_c@wr@v4yuz6mTu=+x(zKmgesFBbxWt;N@y>gq!$CnwnRRS4j; zX-^zqjh7iFd_CCTC&b66!>8xv-Meko-{0?ZdiXN?Z8|3-wCuWo%xn&Dm<0Qy0tlhfmT?(l^Wo zIvK(v-qWMAf{aG$r~B%r_M1I_-brW~HOm}(4Qdu=(+`l z22fmHp6shORnM3H$4(HlyFWO?E3l$JR?`gL=&(J#FhLFz>0x&T!TN=XGzyn)ha(lx zInT**IU@L@nj^9M#ibx7c-EfsEg}8mDEPpXdjzv?)>tw2%l&(9P40!8YayHMKBYCG zHE}*ed;O&0qpQj;OF_(N=q75aY4f|;5rmY_R+jjS{nVwjTg=H^tVOxRR3>MPnAusy z!M9diT{ds;!(CqS=gm#E>w<&FI*Dt0CHKl7Zw(gSA(!=?s_w1KC;65-d}#C$;gB{Q zDY$KGVvT$*=xvVG` zcsB!oH8nMjjj5x1_<57Q?_)I+)@la7DYay)B?s z-<98kO@LA>+r|1u9Btf}>zw%!aV1|Ds#mamrkB#P^KM>NKI>^2Iu+&d7m%3Dp`4pX&H=s6v++(` zY#v}tx;PG6Qb4=Nl*eOtuJwk)^h$pc`-GX4!Wz}?ck{+4>J38S~PN=Em3iQ8Fc%# zSAfhrS+?HD%<=3V>>V)*MM5&HU+X}_z!2lqBPBVZr@;cf=_jV&d5-r)VT>c;=4$6J zz9v7B@rR^wlCGwwPxt$DFV4h=T!{F?!nqS%q7hpotqMt1RQ!xO{K0;x^k4b{4g)@b zbN+O#FDhcWxXu7X+68&uKN8Zl>v6bi=3Ih;W4*oWsDU&9eE{inb0oiC4?Ui*l9ZRn z$jn@Lc<4$NEDVM223%r7>E3_%@Zt0R_HeGkXz4{L36;PL=cR81aqwD5{$!TkO`(AKsET_MF=4p?u^(uFl$3fUqPmbZNPL|wn zc08Es^FBZB-;KBsuP}sPr9fjcixPs3(YfAX(fjXDO{NNqOZr{+W^w3u zOI#ZwW_Cr$@pm$Eww`GFwtxKIX0Tt9vI)}ENo_opBvCfw;(S%g^aUV`5^2g@pm6!pX+Q z$D4e1x(RP~UH@`=Q653T>Eh-#FoX6rWY4i z@KxWJJkH1@2w)ZQV`t-_;Cje`<^TM76Mx;03$nz&GR^L~vq;a!WR+oU1Y0*(6F%1Q zf?$@y2E!p-_Wm>2ko|$X*MY>@m-2&xm=L_3*X+3V~uUYuLpQpuSN2d(%c~_3izJLF|Si4YOR+jQ%mMpy# zPtU1rp!8pmuK@o?C$$L-dwFqiSZCD}HLRd|8>OYKohAg8u1PkfYX)oWkrk$`&n?Z#2t)Sv_K+1OR+*NoX9>Z=K`dgT@P48J`EpgOw$;ixa#^A1 zQk-T27uuiV2N7(f6S2wH05FiFb7TIA5#xCceE;BkAFoE|-bhq%zCKiOeoNE)v9{Q( z0!8BE)2dR-HzLzb1q>uHXCckbWqY12D8Ek7@%B9!HyE(WIdy_xm$nBR+vq^kka<hO5KAD}=mcVqqcV>UaFgwR{)o-rjs zZ(=u@nP9S!6e!!QXy2+6NaF#061cwLu$8N%4xU)irl|W}DHcJ+4Hl5(^{mVZj&tdj zDk-jiUbL*@M&cp!%XGthXARlc&HZxf_(i_U&hv*xiodJhP6x!HUzTrSiw~{6yKdtL znG>LjK88E0sHsI+OplC=OqAI7y(c$> zVC!H&$&JS1F*#zJi(*d=v7=!sHQjc^aY0jZC+4@p{9L;!y)34gxuLCfKSsmHa)st{ zJ5YaG)r97E;wtl^@J8?#W*r`oDFo?aT^81w3};=?0RLNrUCHD zfNu%-8+YsBNZ|DwK=}YeAyU47{|dfyqy#aVxe$;r*YN&^1B>udP#A5gT`X<9n=z5$1|WDQ`xxx zP9Wf*8ZYfJJG;PW*BCy^*Vi{0%wIeUfb^!APler`=|u~pGfp(jWh#A(5wb01Mteg1 zA>^-3$ve||7%Y`TGtEK@De&dK=a?RcrOW*NWAFpLA-`PIRS-XO28Wp<}jHi_wAYWv!AIQ!}x)bG}1i!>lVS_ zzX&WHl@0)^VcRjT(FD_2goTAgRaMnMs$i+% z6FbZHJDRFO$;$;5v}67Tk}ekA*+8Zr8f&IygW1jS7OE~4sXN1B>;nXNli8nYPmQV6v4xxpG?n&Smyc%g_VnLR zSN{e2jh~}Y`6}M!&|4yUp|Fr}guSga$MU&Ir*X{h1ulzMH#?X2;oEH6E38HV?KuU5 z_<+mMLVT;Md!Fq@QlopOnFydZWO(%uk5o352RpePq1K6`W;7D)T%sQr9&0h!vQI+s?;dQa?*Uys)p zC%2ySo*qBuLnyM^i@$JuR-QvlmrjSI!7uj+W*I(4V{)Ev#E{f`4>N_@vbohsy?i~= zP9nX=EeCk{g6$Y(g1k-9N#d1i*A$Fbj>zXrIMCLU_PJ1i9l%3)`^ z5VtlG+zlvq{itYM^kSyk!S1o{U@MbfZPhF5qSGk;3iF}*{R)%&H*WSbpDDF(S=5Q5 z@M*C#mWJFU328}|zgl8R#-KJU;-o{gMGos_`!C9=MO0%ymbP6VFuceoObC6ZLx>8J zXP;^+Nfp$u1skIRVqZ6>pW0iq=g5qkN6~o>h4dHnwVLP-7!`@Mnm$Co|1Pe%pIYBH zsk<^P#Va9sn66+<80+Sg@A6u|cKy-W_BHa?kKx=@qlUvCpM1YPW3>yiL`l7hm-F~G zPpsY4RMc`S;R2I@Dlfy8nwc}&_b18Mh9%S_S#N&LUQl?FG9Mb3D45f($srB{{jC8Kk`7gp=-HT_gErMP04@M z_Cahk=WEg(YUP0Py!;pYh(Wjhef^Od^j)iT%L zNCHg#9VEGS&>wstKt$iV`{GC7H}EFN+%yaymHcpm`7UCS{;$1)g%>!sjgYT{4_|+_ zzjja?e@%s_XZ64fP+Q>8YT8Qgs1``z^iQgh?%JCT!sHd3PZ@$*CNiU;k^{JGw_tWd znJasqqaHAGnAr6VQ9I9;8oQzKEz2*4f8|d)^9UUG#Nreik-CN`${J2WojWKkEiF<~ z(gx#-+&NyuPcG(ID*yX|Klee#xq)qmh_0J|)eDaGk1E^?6I5Ltn~S> z>goUXE|Xi>nygXI*UHxTaU-_Z+Q!BvKR=&|i3zAG>0{oHr71FoM@9-#AbKmMI?8Zd z8qWtaL~R{OBmd);5Id>)O;}lRIzX-xhUngc*#P6~T~B$In40=({P8g5e^AxkO8V!&fhv}UVve}KUq zE)U)ex){U$=7gl~N|p{i@#rzQmOE)&a{e&J&6!}EK3^vgDC2mk z@+8A+!xOtyL-4e3BMHU4V@lhxhh90sgTD95ySz1T;XiMc()_i|N4GtdXy_fE>CP#a zaI(?ZgF5f@Z4l%rG_ZWIQU{VeT{!m{%wxA)?KIETa_mwM=6AN?lCq^g$Ow-^fx6pQ2&Zn_tswHJ0u81J)&uCQ&;ErDpVAzRtzi zCDp=RkOn*ak~zvP+mOi7c2e-hn2BFq6U}q zy$vvEexBx=pLJIfgJBp)DtxvOIQOVa7iGgF+{8Q>N<*h8Hn~8jV>+I13p6Sq}2~Iobskag?vH|C|20 z44=n|%Edjd9Vq+M)ruvneqWg*I~D#M8DGEPI_A}gF2h3oGtBADcP4$L%0zIdJBKDN zJVqsIzsJ-2$yB;#?1Xl$*IP)dJNnin)DuV~lLJ;KQmDpJcK2vdDOrv??k%U@LzqJd-g#--`317)VCz&9x?tAYhZ9wE8lqo! z6N48@BLcI(PWbZQX90XSV>_4@kqt-ZMRq2Bet<>&E>kqroWGHslD|Lc+@TvR(e85K zezo-*ST>CZ+sC@Jek*C`W)^+6*nRb<3n$!@zDFqreGw*dv$FNBn{&U*?UO=wnxX6Z z6_1DH>vMJ>0UYfoRc6~dJgfbNq+X%yD}`X=?!6V<-4^-Q?~vy1Xfp42%qwOFo1g}! z5cY&{BJiyD$HM7Srht)~Fjuyo+o%GyjEDE`S-j$5?nFuAv^0n6opPJh>G<`eYtYBs zRVT|4DmU2L1yfeDaj;!J(ADc#WlNZA@zQROLCExv5eT6f^X3Ry&olfeXx={EcW3;FS>nEcTd6wK(SwjoSLT zt>vNsz=iu?V%*+5x_69ADuqLim<0*{*w`GJ9o94ZGT@b|pvWj|>++57j@?SH@h$JI z&4Jk}y)3HsxgjJJ)0T#z*X%@_bf$ML9vf!;0wUmHDUL+d*F8VW{hbwc`NJ>y>SHAlT2e++!W~Sx?YRi_M|0$HXD+gNp}2;P|bN1 z7PjVeVyU*!B-1-Y&_ASWhYQreTuJ@bKmxA@c$e7N*jEAF>6B?-@$ZsEmmitk^V_pd zZioK9)OllIoZP_QfrP5+%;X<1le`5tn!dP66ejDk+)W%zOipV;Te;3=w(2<^QYneG zIon-N)Y5%>?2hN6_wD|Hc9AGbOMsj(TFV1r6IN{QgxRxnGim{>yHI4e;kTi;*HJE1 zG=-^;b~L2zSj4Tk9X)h+oQv~)MIoz3Ew1**C90x^`lxp2aMV=~ zg8g#Z@v+sTSS23Z-U)eu^vh2ES9bD2|0{RF`HlRSr4m6>y`N)Oo*Y#LG9r&6L zm1dFGj6is{q~B?z2b?*-2%HbM8ZyY51H579U-|5wbMhGI0Nm|>e{_6sKx9l>XYXj< zfx6#=0htBtxMR87216W|NaR-aw84G%y;GTe{VmS41|nUrmqv?!K4jM?W7cSVI2{Ku z>6zeewOriZe&fH%^#iFW?}~3HcU#@>;qEA#n)5HRYy%ENQ&YeVG^?(ZeNJ)OdYKKs z5f5_*iVkN+ff`O)R6$3uileZ!~>FIJ+c4E+br&D^%9@DeQZv^Nr&?<0$lOt%x?;cf1O;%V$uIZ% z(=FJLEk}DRT*9gZ=S`r`%hs%b339GYX&{9STbc2D6uh5>OT6~bziX#kh61+~YCcL| z=^-)zrPlr^(9d$aN}PyQ-vN$Br8Jy#Y4w#?{?`A-CqV!{`Mv?_(&A=YH`BliS{+t- z%C0uCx9h`EI>fd>R|@AJP|$qjpIt~aVKzF^WtFv}sZMlv1%|v{RjTiWNJ*(wcW!)J z@O+MSBf6PIcb_mWE)Lx<$kZ5`g!h(- zA_*Jcz%T*bt5Z>T5j1)fo#lttad3I=#G93rQVk8sGJE0hIBO`t#W`hu$uZ2Z<;yJv zeGH~NvVZj;OIEAXIJ>F*OEP)-!gp7+KQB4LQHd1=X--K_RYE5O_@$lJek?E|y2$50 z{xS_3D#%-s+l=8m1bcWO=|MH%8rO;sIxK+JDPr! zh1B`2)C|mK>t!X;f?+$TIA7ZK-%96$ccZY~CpZdTTQ!4MUk){c?jZ=KeD@4rzxJN# zmK=;-bEUmPsy__Z^sB#cuAbFjeOqhIa*UDQ5qOrCNUt@O6o;fFaC|&*@zH*+q}LMj%`u6&a(>4 zy~5^rWGjH$PA{QPxbNVpB%xl_F(LUL^*bV+6N zBG|n^WN?EYmkqU}Q97gosp5Knn#yWs78UUQl%bwz^f3ns?9AuC%;k+J>~hL!w?lAy z${=JyfmAv)`&uvSn1}CejpvX!I>b}nN)MtCuydzoXUb0EhN$pr{ks$UIYF|!)r;TD zeIpeh_Gw&Ao`{dB710`KnwwbUI-d+A= z{zG2ap+`-3cQ@&ZQSd&^z&ye#;m)GM87YgeH#1Yp+@M0m{sHlf3q3|kPu};i;)3^e z5?5BF@$xQJ<+c_DyyPXUaR%&vVu9Wmd&fqI8}#d!wTy!1o&QwgJD4m?R9PiGn9kqd zw#2w+`&zw&a&%IYqy-Z=XQdZNSaEI0--}LyT{usKcU@9PVV4rpa<~gYshvSXt7fg< zW4DR33H}o^VV-~ka3cR=1HO+@ZGvi2U3=1X2k!(|TU^J5J!$cz6!{M&{9}BcVBv8c zHAYf`5a}32oh}*}7Ml?URP^%K0n#Kk7BTUuuGp%_GKwZ*_|jlVbgebBvv$@{FO&A2 zO;6xlEl5Uu*FcXJnQ3#uIW4L;Uws)hBU&M@VCvUDNORRn)97LB6V-BBnw||Cd%1^(w@7G(J%1|>Jm<;xTXu7(_jryt+4=w$lsKNMhYqwZPw}^% zb|Wi}kni^9?C0C|_KfO0jM|T0wTpCLM8Zu>j!n;wR38L5;(n-yp0iZ6PJKsSPVvn7 z09(pi8$e&}!w+X^eE!RBAHcP!In`l)3DISR^&@)*^A^8lh0P|Q!z+Fd?I|HH^OpZR zA!xH!;4L}>;Hc_rJFkc9Ly4DNU4HrqTI z)M)H*m#0)xN~#k$KysMYQZjegzfKCMLj(8n`8A3y9H~8 z2OEGCEm3>9-K$Q3B!E7ADsAKM4Nea;AD_6DHSe)^J99t!_MPj+JAKI_=0n+GBSYf`NA)C zTnhwTeS2vE4Lh9P-rn|1vhnpSYIMPTh<~F$0Z`rb5!dR?9-&ur%%tmk9VQP+QBToB zmPcIphUBktL1@BJ86LaSU_!xiLrE++aO%UjH<*<2!5^ zm~0r@EMTnQn+A83c9i)b7wu1X5dvIT)WI`rUH;miJ!A!Ve}o-_h`X>KQyvEi^Gj6! zh)?;~iR85#vdWYI`mbCp zdSdcv`MOkK)t{WGJB#_xPU;Js9~w)x=S7!>?nf@wH-Ad;*o8(rH>Z#WH9i!}d#${U zw}ra*Htf$w$5{i-)zW^9U6QAl*8@zq2sn?DoC)}6Jj=|%_1yF1FS4^p`+6uLY7O*m zim_rODzeQ%V7apVQF2@XjRDVV=g>q|(-$*eN|8l)g%RY2_6ncRw?UeiTj8&!7O z;k`=LTL>XjVk{guOiOI`Vq=Nl`h0h>SURDOw%E4~wQV~GTUKTZ?>lCUA|WmUDHa^iFccPD|3@4W!z7gFCHuWDps^H z-I~u8X?_A-oV&PzJqU57sgov;skHXxKBgEj5p6x_k9ii-#H_SYzw#@!R9tidu%Qtj;=VC^GtQD>X$YX|+=0(%Zt_(XUE%w^;a*Ewg!(mHEdjD>nH( zZNJZ*nT22%ipQ^~i9V~9EdAaqf5$1>?)QqjzB|&m$?r2N%Ge*KxeL0K;WYW$t>$ue_Gtwa9E3m#-! z`>vy~5*uTq(4BMsl8iIa(j9v(L9G}HzOtc(Hzt=J(vU{J`-qsF;4Gh(hp`2N{mN=X zdrq@W7bmmEZr!pp59_w7o*)>M_9l0mZWA}KGwZel|C~Bb^AIuP8iI(!&3aOLPS}pV z#Ht)Lxn9z5cooMg9{JSF-kOHR=Pju%d~{zPp9QgZ9U~(*f1k$Qay2f@wh|4ApPA*$ z_sCL;dnZx@@I}Pc6t94|uIVUP+LHO7&4R17z|$OPaue2Vw)22+{X^CYzQO<);fGmC8vBk?UP7IE`qin! znhi0vPFg$)`V^U$1-Yvrg|{17zv{6xPbZG0PYSsM+OmGkwdTJZGv_Jnn?7vYY}l4# zi;ZWxoPtfvw}IW0y%l9+8QpSChahpa^`S9&Yrj7~ftGt0F!sih@lSs+h$|i}j5oBO z-%IyaL_B9vKW(+c$_cH@pTuN&ewa&#d$u?v4++_=(Uly|)Jw<&@31TOt~Z+a7Jz>h zPpE0e>=*WU^sErjKmJ^XkLpvmaMb9XilCTW8{U836u{1p{IGTKwa=?{I)EYpesdfb zmdD1t<#&vA$|ch6S_C(fn4xu)s&BzubMuiPH0(h$lYTx3QH;FTna&_~-EfuK4eou8 z`$t@q>_Fh$p+y+3vP-E%mZ$;OBWi>(k#c^|RmJn#?89HD=rbfUs93e_Z=KrHyQw5{ ztRK(8L-Ts^LJQl){5|q3qXj4tG2v%#WTt%Gq?OrrPKb97j>ih^6;2Tny3(@&$IqML z>U?t)?9F0umG*$5kE$54!)^B(wS?hZE>Zr%3-$=`_X$FbPfz{1CrJi%>^@b*sT)Sv z)YsentX}Wsj)=Bxg@5zdXNLSd4#BMevogiRSo%cmJ}BY+O?Cndf)?S=I`w-gm)K0`&dxqrU z6sAxmHKu=Q)nmJ6!JAcOu{M2RSHDF?d(U}~ecLvRf;Mw%<3cqx+FG03k!D%UpW7{h zwzZhhKy6qyFwcoc-(Q*4D7Y+S#qQ%(3|!yR=rmphbA8tdxmDnGjJXKUqfchg^)l-5 z{xBI^s71bg`u!rK_Q6IPK_nh5{4gJ0r!jv~c%Y{@ATrXKu6BHK{U6(Y=4Uvzw^0t@ zufulci{@(n+q;?N1vD{w&u`isHF-Q&v-QV(UGd5|<7iL|zyzcCQ39)q(f*6WK8tR` ztWlUO{ajGSY|%ii*Kz^0B5zLFuu9Vd(OPw4>y&BRAAV6Kie4enA>LbjnfX<}z~SQ4 zSCGOtMpku9>2Y8{Y}C*zi19_;*uH#G$B&GrdR1&xBUGfN`ov;I6g3+1ZElZixz+BlzSQdEGb}k)X%!I$m9bYWH}OQk{bWA-}q5JnvGvNRLJS-N`$-&1;4~) zg;+9!lFfKj^i=!9WHvF0ubW>paK}p60;ZovR~fkwx%>U%b)mU9j;RuD}n_-8_CzF(AUUSSKjKO7q2 z=4~K^wyVCvQkGe7U7JnH#*WxM6zQX8hGlsr`p+ZAlLb>ad5$CNI~*c`1_0g7x|ra> z3j7i2CnwL~?)*`Dmy%Bx+tg>~2=jLvb8hxUr;l!^y2hlx3{bw&T_J@-DO^>6_;%`{SmTLguAnS6YzCD(ab>g`Atf*3~>u17O zzc%qw%!kH?U6DI?8hTESZq;e`LHMG@leMd_7JbHtz1*o+BBH>VeI+(pxL^H6Gy9YC zAtN$i$p_JZJ^d-0++$aLsQRG#0xmu2YfCLoJ8|?y@D>{Y(zyxTN2~{g5{b(;r3)Kh zkfJnY>>jY$9rZi(9J*Yjo-zpNSSmOApS?qUp$>37LMwCuYKw0iDzox!x3zl!{rO|~ zbgTadrN*E2?bG*?GHWgkd+N&`gH)(tRPMGqc}dRf^0a@0%mGRx zmQjWKa-wnf3)P2RM;;19%&5^bqsBUakJiT;vD9h9!vh08gDFwoic}2OY!%0A7s@G9UHTs_0ZgP0iXWQ<|C@ug= zkLFw~s0RSmwx9)x+kr0Tar3_Y zu~R!^e1HJe%U}KAB5iAe12ZQP>phtPKYO;hIRZ5`GKSIJVGd|))3PJ>zWM<2;m493 zqD!!=2XqdR|MUBN)JszFap_@nt@f!;oo8?HtUc^VEGJYavCikHbld2O{csaD-c4u} z%)=~v;b(`gua0Hxbr1KZ22PDH-%q@$q}2|r$=wcjE1}|D`B6w;GTU^86JV*4(tu;f zk;5COHMgY;xai51iZ|WWpZ@Qh< zLbs9WJynv!03m*PU@23m20f#h>@WthE~643xC0?L%#GyE-sA`WMo_$JhqlfxjAB36F=8XcYlW#cJ2r94H#)>ai4_AZ?` zft}w&b?eN~nsyl_CZnrqs&X~ULQ+(4qMnV!32_2XzkMS`S>a}MxZ(dUpaA9Rw)9VU z*X@g>3fnc712^ich+fCAeWymyR@IgW;kO+INDM@nQ^yUYMP0PH8c_sIDVZcssQDca zgyHO0(i?xCdV0kPREsJv0D0u~3y{N4O8o=}II@pZ${%nKL-gAvY6O4wAj-nb2S4)n zadHEF&vd@{{@8%rqhzy4M0UAsM2Alj9k9T%40}%e2L}1(3bJ$2lS$dUpQ!&p1cl6e(t2+Nv>%MWxDw%VjX=Bl+cMkG+uOlsCWj?b<{h1m1-!x z%WuSQniEjr?`LaY5S?O3)|vjuF5bu#=zCQB7V~K=fSyO^3kN)6WfV+a0|Icbwqtjg(ok+Evg8$I0DK$G6fqaedLm5s*eHQ(9F0EyL<&n&} zw!2whY!*5x5hTpX;a;Y8t&=)Pc!t-`jYQ=({vY}%ss(JxnVtP3{DDb|sls&G(U?HZ z+c~>B+ZI`@@x>`({27?P24ERv23hJ`uI#Q&a3EERl{GJVYiJ7UFNUz$awBpPezr8r zXM&>ro@wu*#_i1BbqBiq8P23|ufkdC|A%mf3wv&`Wsq^rp2yBD_gmoK7qqY8jNQ9} z8X_{1CieFLik_4G&FeKH;O-U3D)>*v1UNl7ktAt(DG0l$;-*wcCk_(6c#6+7P9G?W zDzjnYx%07}koI4L{A)>4yRV~} zqs-Ojaj)JEusM1-%YPyFYu675=>DHC|Nrv~twTD~mrOerqF_V)2xzHqi-SQL+^Qcj z&-c8H3|L8SU&7`8#DBo;_MEGq5$J=OWEcq~$1z=w=Ud#!D7RQOTZS4vN!H#|^Zywz zn(B&I{9U+ltzBF^JzdN@^6XfrB8V~MHRO)3{rap3K5_YGb!F@@U2;uqNC(f~w(s}= z41sw6a#Ph%006@I(vOn*RVvQMbI-W?-jZITHAUi|)Q_<6@#f-X*)7@U`_}m(kr(dSOo!moAe&e(F_!oOdr@;xxI<{23 z+lRW1;xcRVRfIPCVq6k}_DkE3*Dj56P!a-Nt$~kB3ePSC3qq43V5Lr07u?Rplx2qt zqk}bHqPZ#8XIr{%i8hv0_ne_3*R~c>a~3~F-H>l}$aVdeVRH?C!l99p|N7 zV&~PpXg0V0Yhw!~?keAL{1v!s!gu^Dh*y02vx^&g?eH`@^nj8#jpK5a%a6i1DK5`^ z*Aw|#)bo&cFNS;Ss_ti0qTkSlA3w1!ovv`PEz)2IM5tZ*+WWU`!1e?_s?5cZgP#R< z6DB7ocd$ByjjbBUg0vSE74@a8KLbqB9~+}wUe$3v< zAv2xBk5)Wao8cJnreF1!>+?2h9O_DJOq2EC`#jQ5x-LFt&&{WEGANNldjd=z%ZINI z_s99~&Y_do`C@fe%D|JaTs{}|8D>kD12}0c_3l4@)FN;USY|a%OjR?1hzE!m#$E*K z*|U|Rv?;cEUZL+RaV*&nZCD)2LBRVZqP9)+?uJUx#_G$ zep_zkCpol4olH*E`_wdHTs%lu-YaBulB z{TZT%Q?^@EXQTy-L5Bfq*~cUsU1 zHJ&;*WAgmyM9@?|e0Td|`R6^ETO@uf3D+eCO>5MXH0$eyRX@c47}YUW01lBTdqU@P zlrCDQZl~5Qxjb5q5`H<#xXNo3v1Xd*Q9tBgV}C2lNd~e07HDE58=h*o2nb{D&tI04|oOQ9mL95M9HQ>8!ivW3g+qyh07jGY4gzGdERv3LCaX zKP?}xu1~KDY_n)x+^nOmIn7j*iw##$F|~)Bxjtb1kiNzzrbAN{lXs4S_!Wzy`b71@ zXy2*tjyN5ykL;6HA5MU1p$SN2V(h-pbr~P~FAdc-^RAS?0zklp#^0l3w7~3Q<(mI7 zJK?L@)zmj-ZZam9?r@ZX5i$FtpY%mq+8L`fzdG7)IydYVT5mkZhcz01BT;C5wgWkh5Yx<|salG{((UdJ z4o_w%%gqWG-p>bL_!$&drDc}vmT9jzc_Q5`-%9!rx^FzY)@htEoMZ8K;;BpqQf$Pq zky%gOLqv@AWDP?=-rixTQ@BV+40H2h!~(Z;!RE#SK1<+G>S9$ ztiBqk>+9 z`0U~{zgq_MG%(b_|C3a_hVukKn1cmLLoLAyJa(q)pKa9ue?X1O8vVmjXs!aH6-}At zec;pb0S~?&(6fw#<*zujCXa8Df0{G7k=O3qg~od1yX4)BKYsylkT zgn+WDZd2lx0zLI8?o0p47zYNb5&=PLeT9E_;T|FdNmibsO(!heNcqprwT?Di(gS$P zX0%`D(x?J?hHNDZ2A_Tdg?}9{_SHzuCRIAAMXgH!FP7Y8@p6@m}>k#jJS_R$3Ps%Aqyi4jvcefw(B znS+TZ@IS4Y6iFU)afCQw17Ww``qX0K-yP=t0s*kLe%&9TQ(%X=S^j!me|dboK8b~M z_WuyF%7EiGzr6ac9AFyG&2T3&h87Thi-`slc>J+@R-mDdwZ~(u!)YLvpFTW)^exys zte;hV^Z&AZ-hodPMd_aa3s92bG2Wh9LD)F_($VHS1c()s?7Qfkiz;1R`Bi(8*A?|# zzO>3|`wwQvrY;HP6`7avzfVTYk@2lreJpCFI|X%gwD=1vd~}+xe>Y}LNq{{U{Daj~ zKY(I@YfHpJ^~4fqyS~-LXv=GkyWNl-U#aqEeKX0aX{ql;pqI6~x>(o~P*pdKZbzku zcv^n94jis7wI7 zzjB^e*M`CRKcH&;UW2k(^5zjgY4x{W#t*IqKgA7}R*tw&9fHIEL%b!nhSuW4P=IlIV;BQ%|G+t3ZFH^ivrKk&rxt#|Hj zS`A3@L@aeP*Bi>oMx8f0zb&CnaiyG55*z%pbmrjBUw{u%3HoVzeI~-5lha|%Rdf8w^R>35PPsW#eup@Pu4Dq3_I!7q37yZiL( z3ujeYm+bc@+c_3T4WxaERN3pL>E_43R-jF6MmJuaOBowWRv1l5Yi3}ZS?&7zJ$kzo z>h8MgZMWBD&#iK8=}3*V=oKD&eUtLIDoV-~T^FWU2*)5YCegZ_w0AS%a|*Ws^R?Tp z^PSti($!uboa+O)=$aq`Dn|P|ByxP6E3VSX&>|Y`VxfgZa=)Dm+&zB&QyBwl@{4%$5g=Vy* zTcDKBX;&KP=E~Ka-d7(h)X|3M^v9Z8jDM=d1qcu>GOt%C3*kQK7o(!pf~h8oR=EX?{)p z?J|%tH`O}Kj8@G$`sBju=IZ6DAF{yCnRd41r4D#}vsH&>3-+JD>xVv39_3MvN_XyT z65zdiKikytT@NDf$i};G$1+QN)S-4CMC}%72U@zKLg?=o)tn&b!M97v*IuLKAm6d% z7fc|OuW%d#`}CiseZwf;{xgT*qN(4$Vsui!F7e6d6utx)c%PH@0k|UTV|esH54O6k z&IK(*_IQXpGGG}1RiO8VyR&Xub%H)Du!-3*01>oa)l^C4pv;J;J|gs*(A|Dz@X1b9 z$%DOex~f8UE=NgtRdQcjsmBFjPbj?4(i0wr9c(hq*3b11A|N4C;R{pS$C+=bH+Q99vZ1$AxL?u-0~{ zlutNNf`=Fc6kQTQup#)!>$m@_)!hgGeRm?iT$`kWtG|%k&H_JX=WX9uW1(T76qPP2 zWAB!g4|$+Ug7|+9F6L=)^_zRuD4_3-C?jE<Fc)bTPc~0-S2iQHxJ$O$#XPo-!#zUPjoF=ltgd zDgXu)twIk)R5w3P2rbvBVl(v?V zu=Uxt=eK{)G&!AZau9_1-MBVooHwle4`M+{XIIZ~MP-$bn~&di_X%=>%w$QI@pl8g zmhK>5`1zsQWL<2TgUl%k?E6_{$aNW#5teFKH<<9Fyo==o{(j7OYgB}f=Ypu|;7XUt z5z38EGIqis0@*|kUm7j#9M>H~Y;-o7ybkN7!{667TUc)J8!^I$>g~MT*6Gh#0*J)f z$Qnm{LAaxaK+=c|19ish#S1~L-EU{3w@Y;3{De>L>n46|CVQrPO~>^Amsv@RFaR>7 z@}5J599T*TOA?FcNg8N$8_vPD5&1*w6b&euv~xbE@kd?|P_9pk>p*f7K)LWX=JuJi zPdtU2uyXZzmbv{Y$><0HF-wWQ>@--JnPGK52U1YZ|Lp**hHcZaCu$nKU6KeL9;wx~ zFwNG#1oE_YySI=M8`c!`dI=oS(0`6~=p%ukYZ@ztbu01He=TK~dA9h%&glaGgA~Hh zc2g+hzf4)gqYsO_mu?gMPwJB_uw+Hs-5|$&&5QpvCM**No?_sZTi|SKW$!A*kFFUpr;DowtaxyZ^pity7 zubW}U^HCb;M)6Ux`{qjog{<*=)hN0LfA|=KVYk9?s8-oXfBxK2#Lb1f!0(Bt{}}L{ z#$FHs9h_xU`yFBiF1#AEDc-JhM6y=1w`AYVIH#KY!{rywvaWYH?RVrG6y*-n_KJpD zq!=ffjT_pX22-?@etG;H6EM)rp5T6-|5L?Wd?@*!Y`HAQqi=KwK;alt^wP5CkN^L} zJ}c*6-^(_E1BU>)_oa*JPXnzt?s=tST(wD*v5BIRf;-6ALL#4bIWF$Y#j(=@e;mE% z&rA1Vma?;j<6gi4JI!gNd&v}9Z%A)p(Gm8@-Z>7L4@c;A*!Of1ZCLPM;&m{t3yO8y zM>g`U37ld||MJpIm0s*{&&3s;5whg&=lPgsP`4awWyt*Y#w*i~%L_kyuN5}1CVQ}Ucb&olm) zytmkoBX|*Q3NK8N^vs`82};7cCfz@?whsYU|MPO7g@*3><(jK{Yz;OKwirseL2P#L z8*Hv)#JX(Z=FavZHU1`-I<-Z`{WV5-o1OdD8hQ2y=9dM8y6bF*d~J$6m&+x+tvx_} zZF$wM>*coA-);tXj1*R3Nt$xtc2tP(!9bY~IiXz3p0lU=i!QY#Ip)@1aFLvwq~nRl z-@8eQm3ZCro;LNF>d$RVK`^;Hxw6xK$c4Ch1fdf?^*nw!==t8JJ?Hw@>;F0~YJUks zgmq!`^-00w{f6z0fl}Holfy&HZlq`Z*Y4#3x@KLvv{c`a3W57g|Ou301lK zSgutWl!jHbGC%rWTeRE@m42&<=Vh0te)+QdFpS!N6qBMKleEn*fR#$G#OS<-T!4%-$U3X1!$Ks&ajx9Ce_l)phDl=jrTVFjvE99cTLg z?#A=>eWFbngIdkvxLe=r9HbG^8Lv-5eZ9L|iz7n5of#``GczHsc?tZ~VNq#4gZEs$ zP<>g-o47Uj}S# za7ytp=+~y<^BU_#h6z~$ADLgVNQjtJiIE&E$ru?8zk``E-aB{fPG{QzQCf-gy}R?T zt<~W+O)(9x04)}CVoR+83uk~W1p_EXQqQN5_&7sysV||Ef%d-22Co3_2^mwDsHV!G zu_(*GyA=>Q)@}eRD6a6@c`{4KXkHcHeK@r;6re1RXM)1!*4yXbL5`MqLS0umae zqyB27P#zF6g-TX7h)cV3#0zuvd`K?I>FoRNGgAB%h55yqg=wkrw|T+Lv1$tgNK5Dd zlJmOyna2XT6?AR94E6)9?ZsGM_=WcOx+fFTHL`H6IinxPTBXz5-XB@VEe=;%wv`21 z+iS$UyTVt!WlS2_b3ap?KY zs3u+B2dxHH?9Zol2IKO#ynt#iU8GFpu$7`WBc%AC3s_L~*kZ(sz@x^z){9D}YS;g@4V zJV6J_6ll0*o7N|`gH9!sXVx4RhQqQN)r!Bc_b_93WBw{U2JMD9oYdvOxRi326ld1ld{sLV@YZY&xfyt}>MnYu zo9iC+bhiG10f#=N?1TS+BMbU9b%x%f^+$NH4}nD#YvVL|y$T+;)U zsP~ziM2g#oIA4+rT6x4`L|ZxP%2|y29`|i%5Msg<`Tb3)-iEs8S2@n|q|=u~&S6R) z;$)QFYv8VJAL|*;_V!(>-sqv)Ez6Im-CW*{z)_P7mt1Gxc6#Kfn46fJ2!=DA6czld zNh36QPz6)@{fNwChvst9M=rs)?AY<$t1eU)(jR zR7|G&l1xGn&UGMr`W+y*k{Bv}1NZJ%EGVS`uYH#dFv2R)6k*AOqe-fhLvELD|v3Ua-tMCJdXfHkoFE8&tHx;}R+VS)5 zlqvE$@9vUQZP|^j(8?v2fv;7Q7N5ENNe#=j@4SZHjuMD7K)I|h zXU6kr`3f@=8$1Vetb$Pam>pb-LFlb|k zNZ6^Cw(F;}RiDo~7)g%={k`Uj{jMoT?je`Yh|}Xw;z0gfmw3k77g1cuTM7S_XeV^d z_~X*PxEZC9^+2RHOWZQDPWfSm(cYIV)anNBo>PEWO%{)5P2rjhe%cqe>oCuV8QrL9 z^8A!y)_VpMV-!!?6+j=q`$BY~6gwPP*-zax{G`@eQMEFYBZl|ywXM^j!?$|U+ix!7 zLffk+#Y5L94V=6F+QQ6;;JMJsDxQZP*J@3IoaAKjJ|-=m4xe($N7yiAbVdj4r{kE8 z(NW(}=KrkE0);vBqKd|AtUsf{?>`%Q@%i%;G_-ibhtu1Fk7i9fRw&m9d7gVNu;B?+ z?hIYjpzJ0@VMho1QRczPWpWlUq;Es+SMD{=$BLXR((IedTnMn z#4$RdaM&8t2WlZCUPLt*KqHuS&MT4U$jQsk*q|Tg)oOCQ#dqMPr|>X^In(}JSz1Xy z=Tg^*IfL1BKj${vo%ZA^dzqWf@8s%tjE*+Ami{&FoohLSxbZ)%e(|*Qiyt)5xFT|R z<&VDfA)jt(gxJaDO7*ER*fbnP-pq&1;2diSad3|Exg?>LwG_j_#gCk$+nf1gD}8KE zc4W72MO=NTdjhAkwTt37HRiW}L@ZbA>=7g6*UhHiddl&kTlKLl(AOk2*G~F!_&R=> zVzqmFJ-^7<&d@;E`uy1L7doTiF_+QH%0YkU0<^)$wo-AsEF8ogKw=*pJilRFa7G`=w-ZmV_r|dTfWoYazK}LTH4dF{7*p zHo`WsuCD~OxLh%lX@H_6V-4gutNCz$ot40@7jD;89fiK)wPnDoi5Sk9dDnLh!>8;2 znxLZ#TCG1+#Onp$qMBI(D=`or*GAf4}GM=3qC!Kks7bOBRu>3va|;sm|+|}%^~s6>NB@1sJ!ea z*M*SA!@9o0o_+9+WWMn!VfiPwh&0Qd3xpT1r0zeT%?k`^dHidZ;N`_@S};?Y@)T3# zrELjcV6V3y|LUf8X!r>9HTC38E`@xe>Cz+zZf~-9;OkB3{aF*QnThS0z`fYn!<4^; zMqe(11y3y1X9%O|+n$KCQ(%aEu1LtK@>r8~GBWTv_m3EU zbL&|^We@*8TpxNF0lme@UQI&@kdo3}veyX%8B28;V32 zxE%$7y;(NyF1XM-J8%P|)7;zAm@IL2+}p0_tFXY|l)e>Nm-6Ybui&hYsP&EnD@?BR zTeMX5nwMtRHx{0aLp0Ejk{L}8>3%X}UfIrU1pX~1f9Z#dkXZbdkEXT2C62 z!FR|+FQF3o|6a*;M8)J-Gm(JkGZ%pA~tpv!_f3gZn$iJryE!9V9M6~z9y~Er7rkIPZ^PT!^joM-f2BpI?&wdQ` z_Ci4iOm+29V!wfSzn52cIwmU>SqKGRv0;#Pz9>O7^x3XF#nxf@6eex@yhLUB$HYpy z%B-ZLEEsQYdRiSK%jji4??++}i%^J;I`iy8McaTGwv1U+tex$JF9RgOO+tLL8556^ zBaKyOIpKO*TC!q=aC=MD1pTs+t~>*OM`UOPe9PIpHxsuQrZDW{O@lu{e@zVAOYcTPKfrS1HA)dgDo`}D4J;uXOH0KPJUAI zv$r+I0O!>(5}2{bK3bKmkJ|0-NRH*WXR*(`FCNliybJyABWa-YH(AScg*}N2dM7e7 z*s5^k;*})AjKe^Hl+zy3GMD(*lb;EhWv2o7agSv|E*P-s(-Pb7XZO#*Et40Dg@wGc zow)A8G$%SUGpS_h7Jd2Do2y>1U3?H{$Fn|L%^hN*j9YeLBUd!7-A_{?=T zq$xYWetrzq$&--Zc-S8eu`01G{ivQ;%*U5{@1Dc69-cJqTmgt|B;&brUcF_y#!^yF z#^ar=Y>C6AZJn46^)y?qALGr_E!9;XF$yH&lAnD?QW%5m=?YJiyWdc6-qfX{Y)0VJ z%1uH!dOOEE%ihTrXIwSxjPuKi6Tt3eDOi@+#*oU<4i(frFbn#y3*OFKE8rapAxnzV;d>ou$8rDbiG`mgIIUk z;T(yw-aAxEsyD=n(C<6uzq3$eDehra=17^VUQW>~y5a=~A7K5dJ7RCo8MU@b*P7mT zYzil{ut-J7UlR~Y&zO-XKJnNgGMC%8*pEsVF#en!oNh_=SiA;LF{zd)w`i&}iqA8E z%fq(D(A3eI*&g)n%rnCzZ%7`N*Q$MZXUrF0O}{bk?)LR1ravztl-1q2?m2i@?m6$~ zL3V{E$~o1#+qb@Fg5r}PDpDeL{aHrUDaHlkcl3upW?#ikgX;6AcfQ7Rt9In+mo9Ts z-yJjVXcs5dN$d-v=b{HY+ZckT9|&7lTWArOigblZ!*=<4&G>g;S7_e zMV1p>4HL=86+miBf#6{4(+fjIej;2J*nEF(EZN2eE8{UT{IMZJz@Vt%>mjxm{^5}L zqUBM~;)buT*wSjyVs+xh;Z8wgn6!w?Y!Adb=#`&{t(L#-{Hyox#nxw+{a5edQX;v~ zZ^Gn~-#L4ZCxNXhY%%UUp)N1{aK;;>OiON#?;?-prExhW!!D8(Rul?}BK z4OjZbrwNLt_5A3QEitAVSc@S$rg9@;>r07t;Le=d8jJR-!*Q*(x}K*T;OqHQ;o632 zrfnWTLzVFdQ+VzQHyu0+cI`uHT~UwQ-q(-K&2L#););tsJ?1y&*qo9K%DyaR z^l@Ome}l<;709kkf6-$W6BTqE99R9O6q+qw5FH!FfG<)UB#SYkY5-mEAJ!ppa5iB%PcFkmCIaEOK-xLsh|V z>eTw41ryH&76~=tg5PYi*okuszNFC}Iy!Nk64GKhq&=CxJv^MluRvF5h%rWeMOoBgqS;Eb*oqGgKeb>tL9R(}%Fq9yLkP(d$1HU!-*A z8CDYFt;-Zvzr{FUUE8h0jwE+Hv=c6;){-ot-#%m|oZNO%X4+ zl}PLa^Adcywk~b@N_AblBVcCo+#Hs}uXkV^wbPMr>>zLBxQum5O%hS$@O9hxd06`} zIiQj^bz{+~r!me8`NTz7)-Zt)6MVRQ|6uwY43KgNmrC^idmx~V>MvH`6A>@jnWs>OEgHFjW+DC*-L?|5Qj(ovo}3?|#t&XLRYi3(ew zy%ip{%hH`BK#Nb%G>I-GuO72BfwbKMgulf^XYW1!qSL}$Fg=5eB=jfi)!ZZ%Hi;UP zci3WyZ|Rr>t*r1)gU)87RqUN|x)z5@_qG&)5~z%j^MEym?uM}meKCQ9Pnn9XS`L@X zZT7aje5tCZ2e@H2D!N>D<<^IW8U?D<4|~rzL2KPFH${AjU35$x+`It3=PZ+7{WeEl z@66M8_n7NmOqM45XPaRVC%`;_bgy-u2)9m&oREJTDw`VR>ffFK<_RmNY~~9&4n>Uu zCpt3nh_^RTHj#u?n;s9@9sByM)T6{{aD_sdW9|BP=FaQJFss$cVt$R5lIqaJQZNXV=WkUH@>NeUq**zP;gUvN+c_5wSDS@A^^Q$d=IE_02SD&cl@ z+sU*^*!D5>);0)xxywl=P~`(E@%b+nt$*a*d;5@LDnOz)e!4cDq>-H9*`dq> z8S5*xTkrTNl4gQwcfd4wU*FeVw*rrxF`hpi#b2Ma7@>!T1rU$pK?B#F8BO|JYU7#uFD)4nTEe4w&7Lh|5%=mW3M>(HAd1dd1J0!ChTY^Y6?gaK_eeQfxLqe zGnc@u4jc+&My#9f{4zw`pAUZ+8stY5m%Qu_wBW2I7e<8p3dn^tveJQK1g$Ul1l#g+ z0y>mQ=D(k`GNjDuu+_k6Sm7M|%R?GJ@FE^=&kR;+2xSj>?QAF)SQ#b0JnFp;hf%80 z3#lm(LFTVnf$de75yg797j7m`EMkRr$esT&v;T3RSn1S!KRVn;MmotlHcj2YKfQOW z)3igl6^Wl^jy2HZbJ%3Mi0H|;YPFUvR*hTr94%c*@vZn}s{b=a6|I`j-p59hW1dw` zT2uvo1zL-`(Kvz#b0LVe_wLJ#B{!27)dehsQ=#QZv!0RCh?jZuc}|4R2Q`@&htL7?*Ho$ z{`V(|Cl>gm-}$>i?<4VCTxE%>b;6-vqj{mY9p(AxWLeS}I(tzlPL7D3R*S z!0j4Ls3}8YvYw{BNRcU0`|j4t*Y8aOc?yZuo82!uA9R=i$AX;O*Vn^^IPo4`IcB&Z z;dtPi@X7TN%+8R~Z-liM1Y0lv0b1uV(GpGI@S805Kud7IhrQPHriOGwJDgaZP}Vi1 z7hPiAKPA7t2^Vi{WD&!kPr28aBi@)P(1?_sTIT{suiYe9FqH#n7`pN2q|fiJUpjC{ z*gDgc?O$#^&Bht9T`i2C{G?kb*qv0l-HO#-D2mV{#46+=QspLIK0+wwx&?`2s+pMv>Qn1#Bxw*(d8=h{-J_>QjP5yz)9?-cwyIjd zi3y}1qSI+vdrK^7J*qJSv&Q1m=ouL!qya;k@=Hdk!8fdCp%X0mu&c4)Q6nO_@qN6~uB^q}aSL$6L9!jJYT%|1 zXrhv@izHa73Nv3NZyN79VmS|7LWpS=g#G=1eXjNa;znx0w4WJMyQ?&_Sy~=eUQO1B zBe2-qr5Anu?R=p{*lKj>94A}wiO`wc(2`S19TFDH{!>3)xTv3&tTAVo5^=Nf1)L)r zrTM#UVYu3Ai`2f^)`)QUbf!Xw5Pi*6RY^DWy7h#VAemL)m~D}#Gt{E%>B*w=P@nOg2ns7^_7ueuh`ui~vNy>TveNFsMVSyDlK zDt$os_#}0iEdoMUl2a<_(XR-iD9@jEmUCiv0)I|WTqj>#!d{|m@dgoYbwR<*uqJ(-1$vLL z=d`<+<7HF_c*>LBQD&2@8>HjYO(Y?^C^&_c-Cesl?HmSdR+2^=hFWz zHEcnql%~};Zb(e7Jw#7TOwc>W9-=QNC+I!jTUtCBmDTER6Awi7c2|29CtCtnaSXq1 zKdV1CFR!m`3gMnb5gcUpazt29e0^0)EHM9mJfex~Aa1k6FZ%wH5A?o@3f;y9*#{R9 z2AfZ_#z=CeH8?Eo^777w++tuDlp3L_Q%h_)^1=F>mrFo6$#H2_p}e-X>YH##KboWv z5h`V9c3w9cx4X&|9j=|iq}yjl?l@>Bz*QE@qNJy>)rmbhX0%k@L;~I3b|KUa>K;xP zQ7jlpx+IW6KpnyUTh)DtyLAx}{lP(Jd!%Ce zbI$<$LY%C-)iAG1i>777NNdUv%jyeCchO$GVRi@4iN46>O3q+R>~+1#RWEQ6BQq@; z;{dK*lH=%15Vx}po9#y$5mvpXbuP=!wT}dD)c9=L0-7+Ukd1?h(}m#5oqTtC78Vvj zv$^gJjtFJ_1ntB1hHKG~>Z=I;HahCuK5+46bw2l|#6(x3u`qQ@u@MJ{K~fFLt3j>H zh+LT3M3gRiv-r)3XqxGFHXLOh?KgD}H-o2c#lamE<{y?mMc&9AfS|TZ*(fBO>(l^h zyGI_y8NG4@+9&hgpAK)^oBk1g26JQ@7 zC~@Sg5Q~vKDj`08Zq-Y|e(*HyI0lNXa7k;Lio}9@Qg6MqcI1-F%E}sGrK)7}MvQ`c zOq-@eQk!@w;68vOKCvGjYR&9c-xBtm>{Czx2do2y9i?418FngQWKU$`@b<>gSPXw* zpW=xXFnnC-vlyw(M-Oz`qk=DOo(8)%D(ylqtCKcH1Q;Tu&d~M?^2qwy?R24{Cv2ym zjX~E

aWm2L56tr^NaHaOn*f3f~qwHtSwltKQ2&m2z3@7StekSSL1za=B7x?c_ne z<)~Olp!63X&WniLsAtn~#WR=|Y?xGw1o~#8d39tia&tmm1R_J?yF(D34EjLJxvDYhH%UdqUKK;l^^qcd@`+QjC(1-%MCJyUeadsB^oOHz(e zH&x7e)`zUmNVU|0OOB5xt#f0~=*CABXgR9>W4A zC7?`^^G%#O@_loCv3WR0tbZ?6&iZ69%kG2O+Q1+kwGMYL8;U*eQTImI7eYVt@xgFu{0-BBpNCNvWe<1{uVfxmU} zGe1r|dvnn#ALWzRFfZgCP&v7%9z;htOXI0kki{W30|tn%MB+U(avu*slTc8TXpzDFH9X*JI;cw zj9b!htjnW;p1IWG5}h=8u|Ar{*flDI=CP~c_1WZ>V`7#DhlJLf6lQLA!J=MX$X9VH z5?E}H5amqkgM&}#I-VL>3Ci7nc6Db)CqQhyE1PUQ6gzfBG};0JWeX-_a4fuj3D`HG znA_oRXN;k$kIt%qcxsmCwS4$q?rIz>m`O$ zw}*96K2xNIln8y_#~P{Ye%vFcsIZ1plCT*&Nqk#Vb9)fAFv%E&QG?zNblT;64D?H>=D_Vwn)ngD|>4v zGUe6P2ft+9($R^fQCXXmRJJI>?NSdQEJVFmn`_UkHs?KEJVR=pD_Xl8=dkWS6l>s} zCX%{li_+Doh>80YYS7uIXUij(2w9EkYher9rC!1d6~8Cq4ZKxjh90PPy>kcut@)UE zSXA%xVC&Ddt2TVlcMg>?@%P!F)b4H=zd#0LNC=BBmnF~McC^f}y0Je0f4CB-Y4NP* zT3XVsAfln|(-kV0rJ!|JR-+dpr_6(6uT+UPso}ZgrOv!hyC-!n+IQwnH;jgNXXfYi zE#xOtzTN&o|WaYT8Wkhpxp|vwxVbm0E-1nYRxa~cz zim(O7{rz2I1Ld4!5@SnNZUnQ9&>NSJu_&5{4i+|UsWfgxgnc{GxTdW%e3Z*NvCtFk zvQe?O@2l9V<+~gL&Kdry@)u#oyKzn)j;}T1D1CElva{)Bg31ez*ZHP`b5gLounM=;-@M$q=c9=%%B*2k=aH10uBB zdEYias{H1G_kPgn>g3E&q`VXpi*ITLoTcBI)Xx8aGAgE&6?ZMav)0zlzl{_F9rZac zcR94VUa6PXV%lpe&89KwuEnaWg+opucT+Rg4(0+hP+D9bTUJ6$%-LOY;bjqJS0Et; zNUbN7fSd&qSH~FH-JP_ZSU&RzzyC0U*CYG_iJ!F2+-Z@uyt_W!jukyt8RK+Xc$v;W zP^?q-CcPNC3!xktTcljp?u+3{7)`Gw>ukr#;R~n3u)D--ehChA{|9}1L+C-}?UN}_ z-5Ju886E#%ZO8m_Wng`TO#f5~hs}6BaGW{+Q2q!v>B0Uvv~MHZ;kiVm!lMn=3scC? zw1yKoE#=RglqcT6*^R|Mgmk1tchxCT}#yqY6{CrSvN}c@A4{q+m(^qRa)Iu8^<_?JAbqH zy!9rus@#&tHL4g!tLvB{Su9YWq!;*`milqu6w-B& zoKg{QlM0M~he4pETrvDj>T70?rBb4p#Ha78mt|y-pS6uV4&TExxfC_QzF3b*a1MbJ z*OYRGG2!rSDW>)dhgw}yM_N4xTjKaYgMNH`Th&wa$OBatqj2MPQi5;v<^xq53zY7c z^kZ|HfD12M^b1D_3RtFyHhk608cAL$zR6~Ow~5taDwG8Z zLsJWE!wPNQ7V>v)?E)`aNytD_L!6&q)!*ZmW4p;2KHur+dAx7?;*2r*nsTK$ygq%z zAQii<@Ic8ap_Mf#la5JlAy0iIE%_0l;$DwX5qX;|@biFt!o6b0T3evMB+c z)n{03TXxLK3Ylcv>`*GRb@<I8ncr>!*J@Rc z9XRlB`~e^lS#$u5JDro;^K7J?L%ww$l|U#q50LV+{jT;4PW~@iG0%TGN6QhUj%?Gb z$W$K(nIom5m^NW>+eU}W?SCMR|9~5HdWRArGcV`oub-Dh@;t3Kj7XP7(!SKW$j{GT z!rsmHCoI`6xH52-4`7`;`iNUxM`-cQOpE+=%6d{xXMWp&TXo)tVDJ09#T`~cKuHUm zs<@{&${~+|Ki_X9T2N!X8|baVABf;H3c1p_fgh)9OHS$-v|8g89XgP|4G^& z2*tChN{zoSR}utLIIa9l<{#NN5BJd=pF5QwS?Vu<(roa%krj?_tiO|~9DrAqd6Vv6 zKxPoCP>Ty93EiLHyw{v7^Y5#Eo(X1@a1P0rx!7;I6g@4k7zBWwtF9wOD<9JutQY4a z-(zL0oWJ!NwZ~d>Bg4<%EF_J*qj`n(jW~Zn#8SC9kDKpGZZj9cE@VK~?|1B*_C`KP zP+H%{+PWnNR(=bcctqLRT*l>7pQ8emltT4Z&F;vpe=y<<51W&*@`IT8mrOOx*~y1? zav_S%LO?xuuH*@xKeD4vp8?B%CoG-D)E-#*)6CCmVLq}XceW{a=PpBd5<(B{W}}=x zSkD+;uIq$hfo_J1_la{MppDfhbN`EY{dy-1;PZHKL)2oZsbIcx_Ywnd=#q?29`##! z*Mz1nk%e{C=?BI*xY|d9e8v2}7VgC(zo7PcIbY?7^bzAJo(=XNXxs-gzcl>DgJ#-@ zkW;Cp)x#{%;L6%YNXUg7>C5Ahj7*Oyy!|u8a1l?t<5`nEUzNj`WSk3@ztqy%`SL8y zi(7e3S8TNQ3Yn>=)~9{7US48M?Z*z--^XPKPxKT~NIJ`6X71nW=q8w~@6w0Q%Q!!q z{&`l8iIUbU8efwl#(ei=*y<^1FZ)zJJyD%bmi5b@b|b-+^K_Ol{<%4?;lN19))@f&J`Ll(rE=6aV}Yp3PNlH%EE8?eAF zg{7*gVci1mqsO4tz8VR=p3lAptycSf%U@=atLbzzNg9x`S}AqR_X`>>Gv*3X3RhWu z^=#E=k0}gH<(sVH*Ii4{L-vsA@s_>isN6B@$@w=2 z59d_r5Aa^A>MTi?i?U5tGme0C`?Pm#j>{-N^$jCQYoM2HoeJ+ zn8+yn+!&HWMaDOb1D_b0KQy?~Y9&=0#1M?ADz-{Ral%(p4Q5p-9-FLkHzw2U>1eoY zH5z4KoMk+m1=xikVnHmuA#9_%RdAaI>gaQ+7*t>X#-0uUNiX5k?ty5nn<7P@i5=07 zQ9xGpq`oIBXiNJCrd!EHx%x10f)i*RkuLq~aCIHtJ_R_q8P0yQVmbSL>vK$8W_(jC zLBvz#+X1*B&yF>0L{fUS;oB%@_}fjtgg%l~zY9v%aVhG}-eG>33xADzj=y`OVqizq zTh!I_++-EIE-IL-ycj*y2OX}F7qQ?93J<%$+M~-O?4BdU^d*S{-8#G>#IE$)#KAf~ z+y)}$lsOb1n8b=%T;qM_+BlNXdPBfP(d+&0le@|FNkZtiA&9CppQE11sjpX3MReV- z85(!;Mf3L*4h2U7UeQSG^W3^@@&s=u(M4U=h9Y%qJM{xA^?BD@&fv(N#k;c`--RTT zMdBWk$wTS(a2h=eI{e4YL3|Tu5*gW$^E)v;!AY;I-s{s@UI0hEscD_-7~~`atZC$( z6bjrIEpptxvJUZnoWlAa3h56W2rrm(5F0a#0~7 zR~%1rN=sknX{5UJ)^MnF+iOE`KV^LVYr4>nYCA#x`7qCVA+PI?664i9B#$~$)g9-(wr^`204uC$IXIlZptB4hx?%Vu)FVJrl+`GXCbX>t*B3p7u#Gp9t0|0{o%%%K0_Nn|qjter#$Nhas;r)~N~tR2$n{g_Cmyd47`oHC94!YQsarF-qc>4dzwfz+&yr~UR4?f2@)K$^qX*U-@g5$vRxEl8?H? z8X1<6L*0JKHmf3>wO0GQ@DFPBf<(>FF8$_z1xQIAHLkkr4|>nl1PiPDagtO4GsadV zjUxV5za9Q0Gxy(=^mCbgEviS-2X*|`xF-+d)`t@uQ$Xh})U%=Bf5(LO^N7Iw`e#lo zPSfFm>IbV}9@dqqz4R~t3H)I2HdrgYc0tR9|Ck4(@z1I@l7QF3*!g=@791UAx?PK* zsE0;Uw*5vIpMvcVP|BRRanv91GBy=xMRF8@K70I8J22loR*N7%|h7`xa=1#$ZK?Y)%-$ik}r zGl`xSpQNceoA*tpe#C|PJ@mS-)>wkFwL!)$=S}VKjwkK>w>A?8(B8^f+wBBgO`0TX zJ8b!*_mL4<>C&`q$8*l>!)MV%08VCbjwboL6;H;?jH_HWzve_x;zHzNgM1S;K?JH% z<{tE1!>GkNBsq})+5C)3JTg*4Y(x{;aDj=GM9;-uq&N8EmV>L=YnQ2-oM*y_TU2KB zgJF!X@8>~}V3WYg#mD#(iq9``7_2IEFEy5p&VNQNKKSH(N6X|=+2U(k*d`cm@u8vE zCEMAA#{)$Vb4*Tz*m>|F=G$<+2k)Vzx5+d zMiBzoDHdt|9oI04v(^$Fh=Mg|?@eW$3kGujUlXRz_K{fcVP}<^maifU2ZrDrD7T*v z=3=D#?+fcrC4ZvlU$T9C@H_Rsj4<=61(L@k_3UrAMLqb_&p)V5RyS zGXlHjBx`a}TX=oaBcG*WKi=cH?L)$}_(Pq|lDg=W0#C>B&hh<-%<^CLcNZRUec0l( z5r1p`-skv3iq9RXB9=bSr+S3S>|Yb>uXU(bP%BEBsJQ9=ML!$=rD7YT(O)w>yd9~ZTa)J-MWn|ITDFH66_kG~e;ER0 z%M91|xVlpLV^YyB^V><*(`UtOdlBglas0?>3L@9`yk z#S#mh@d_@3rY1SFfS5fs5#~Xs-m*x_ST5g#x9a*!d)9BPas??XH+}PV)gUXJBnZf- zNC^q@_&mc@NZIvY312$~9sA0~c1D3!3ik8I1Q)NspvTyI#%=jlX=vZ;SDH$vKz3eo zjaQQtPL{tTU7fXdB=tgU@+j)*gM}YICoHV4o1mS|L1jtpk)X%3;cOgoHzWl4{Q4GJ zBZ~S(TZ-gT9;Jloz0C~e;9!?C@BZXqeVE=oZeWab2BYW4Rp2R?EGzT>wRh#wP_S*k zL`A72BSQ69vdxfPQd;n!)Y!7GF)@~yl4LK6L9!0UzHcMT*czlPH3lU^Qj|SQvL+1Q zJ)ZY`-%RH_?>XN&??2x;b=Ipec92Faxr~M6x`_} ze57b-RY2}0>P>7x$|bh`bo=X+-DS$77pThB7v6PjTeY~6)1ZF|GGhf%{1MahWxe`T z7dEz%oOaI;nm_E^8v7owt?y<}B#bWxv1M%x!@Ay-6*Y+d(zJVRD*^=r$FJ*|8o_Js zeaoNM`DGyvx*q>dh8%08g!M@iSlZ6758D>>nccZbZnjxm(B7Z5fhLdGj^9j$%^Mxz zzm_H-f9n&NlRB3JmDLpbQKM($)y}4y)^IbjmAK*+vY)T|&RmX1*6>u**oYnE2UH$( z6(ba{@GIPFZUVi6Wt10FqTY@F7HsJ;bcNSV5 z6lzn*Sc`}vFuLUi!S ziF$j5n}?_$OnHHSY34rmGglautD7WupE&CN`Z4JZVG??t;M3E5H|}&Y+|Syb3=oSJ zAJfXLwVg?i9xd+~MLmzJ*|f6nDBLjYU3jRY+4&JW2N(pn^7PL0)-G9(iFtCh>4~hK zCz{4598tAvZz0B(dTA2ovWoH7%2zh0an78BW_9<5$_5(zXC6SOYEpmC>dQOa9$lDd zv|1gVgKSnsPOlgEE1n0<$gL{DJda)CnDq9>3ChI#GUqq=I%W8Y7+BTU`f50Dn=oIw z&kCGQ6IEf!eLBk>ysiUA=DtC-4n9FzR4LUPt57Y6OWOYA>bwO$ajOH04lNUX=PsCb zS>qPW2KT=feXGN(P$Yh?=%yGkS<&a!LKkke4mwaR-O5ce#F{If_I!%~O+9degnRy0 zc&+%Ga`d`UGyApAj%&4s{#aK*C6ts_9R4NpRZgV<3Vv( zlcHqDof5O>!U-7_L2IQ+QEOY?^};45E#JT2HE%z+MQ9}uLhg0d5@r!qEI28tEjwr3 zD_^_7rxZyg8^0q><`MVUWDg98vrtvW!!dk0hsj_|CzS7Vz?4c&ja6Oc_PgTqUC8m> zWu_ins5O2vehLZC6-mn8zEN=g=?^76reiK&D#}sPh**BhbKWzVstwnkd!w(pVw7!- zj5hI4pRM^lxjR`5{mm89Hw|*y#lR%tSWtsSX|SX6Hjab850}n881BRpt`DzwJP{2g zh{OErZY_QNp2%IV(f@3;OmG<08NU|tMeYH{r_Fr6b|h~fu|5Qznzs1%N$mj6GFf}J z$>eL>aq-TZR|L+fQoNq^nR)vS5S~5UXu?)ceqa%7l_b5GT(_+gMCR@1nE`#3U$wkT zIZ5{y7mc9WDh8AC#A|LARZ5Fq-|-Q3MXoUqen%=7xiKq@w8$>V-{;MRChyjd3wctrT!o5eC-sCb+)9O6?E**=Mc2j zY>rGz!QR{a^xGq34sq;2aq2mz)ps1ID)XPh&UpXWWm9SgWTqW>FCqlM6mNMS(vsmi z8)2~Maj*Ybo0(=U{aj&OyQREKycDou>Y9qey0KRF771}@?L+3;jvV0qfbux%AZSI@L{iO*v4&ahyJ&l*1f2_%lNalX3 z^YPnyw(CzJAQ?kGt1b8p7K^sbTB;bc(|~@|pJaFLvACQ3Sw|YN9Pv^l81+pKsLkv} z8x^Uoq2Z>gTQL7CZU&U0$A+7;E<27WM7SA^~Kk zr*}_*ql4CRa;xD}ORgJf);OLth#oSo`mR*GHO}?;50{I9u(zch{fqAWyMFRhUCaNF zvjTv;>$lxF*__Xm6||PB_8bl0_J7%rZyJbD-7@=W+w`@)*C}hF%6NA2JtzC=3fw<# z86vgPk!`0_-;u()C9UPV1qS*g{cTZXK3Sg8cXDa?GTn&z^~m$Zk5CR0dG>XEhnRJd zwQJ>;?Rb=`xLZe7MTy>tnl*7vMDaLR`ak}tkllXrMahwL?$)Z^rr+o4#9oi5ynqFW6-^%9PQ&4k9j2Q)k z5+Lm&m=Bp(uKbxtjRw(_Fbtt6fgLeXWr6}sST2NNE|Ipo~oZr^_`)d_lr z+F0m=o*g?8tj_1+I17s^tUuTA0yd$;;t9IZvg78aUh(v32PM-0*>OG#OC%bk8OPO; zaUf_A$Mfu?Ur;RqY>gKI)i_DrI4nh|39GZOtDHW~stFPc%5{WZ2*_1-pnJtZyky`c z3!x9A0(e1Nf_$fGIOt?mPc%QH6{MU1Nwudfx$R(z1c+MHo%kLmZ`lHO%=%5b~*?qGanRS873D@&G?$I@=~TnM^0e$3p$Md zkf{?0+v>}4nyP-}*@I>fM6pt+Fo`~g3K@%%BKKZZZWe2C#sF&uQ@ z7c(66os5WYCo@gql@akVQV5I`0waaM$VoABQj8=gBgx4~8|-DI4FF1xkv90hr47QA zF`y?w0t$tSiHX5rFmZ8lNl9wccHoxB7%>iX(QXvBpEq@}B~1pl_^sw*wg5#FMMzO%^VhiZ;D_5`%iH|4O4>2vY$+jbWp&P^moui(O>? z{>Zhr*hC)=hg(`&jyD1>P|mO^j65iD<;F@f)0f9ioB+tjbnDlAV2mfxqSj+)6ItyCqTO-kw_FuE+URuYW@AUsHiC5M=>tT00}=`ywb=YWhuFO zu=Nkekg`GGkf6{MMK=Hq4Gj&wQEDlS`H&kGi4|?a3bqq7X|@Qeb_rC_@dm54r5~Vn z&dwHk>n|oLDTI-|RQ6>*s62H9bVkutk#;a$l?0gB=9g#g9_J%*adUHXadD8o8>r`X z#!-Q@_7-&i9*|7MteR7Jk*6?4J71+K0FIY`d$a(QFfUD+!!lAt-FO$|RAW+jvhmIt& zIVc)uzTABkhK4}PYh;q>i=!Qa&mB5=L&c^z9pxm|PhyamV^7N^ufrR4G&EX6IZquB zIO`Pzpt8msZ{EDgA3_%=nw40*=p~llqSr6EJT=NeyyPC?%{@R#NfDYR5Yf`1U~8Q6 zklRr+L|}M$_*(s&q*|^fY-g9uI1PR|V48W#Ki^(ibpAX*s@k}oC6=q1xNtvh)3;M> z!cGs48sC1xEF&Z1L`a(XeX7a)37f7W?Qx}FOmEFFx*f@(v{23$yLucPw6vmty@{IN zv+SNb^Im6DRPHIvib5qM1i`T7hF3>*k3{)HNm8Q6Tod8|V~Fy6e_Ilbe%&rsi?~$M znW6&tFTQ4#l$Xo2UkKd-Lu=SkBk*{Z(R)EUg)qPuGCk91F#D5IU0Eros+ux~LCvi5 z>_ch?E-fwbmdY%m+eHR)a&iC<-fzitH`+V4w+#FMG*o81Bl*V2iK5EN9*^t8{@^0K z{_GWN9~d1SJzS`xr`IiEm{923uoWL0dx#Xg3Vrm$Tres%)zvR9(aL0{A0{RS3{QuC zbTExM_{to}``Fabrd0xH&-kU1=;*_An-4>y(S@*aYn)TsTw_a13o}aBYsi+p<0S3E zFqjAi8Hw0qN3(E%+AbC+KBvTo%8MCB?(St{>CIbU>TO8vyl6WU7-AQkt78ku@RF~t zv!TPny1TnqMUlIPpkA04EZ~C^fk={qB|C3lyV~{p#3`Cu|GO*T{NgY3fNL**pgb%r zj9!U=tv01ZM@9WyXqQU<#RY*JCONJ^TJE?AM#aZNHpFEc8)lwc7#sgBjY!Vh&q<4U z50v80ch6r2UpyrwWX@i91OhQVpuKSrSiWs7zr8#~4ocVY@gdwFA{X7DlJHpsLiYBE zfR&A=i%1)Xkt?%BH%b9xvytiwt?3H5N|grr&1t4RCcswVY4t8WJ^d>&!GevA4UmSo zteM_4-TTq(guFcdTxq$~11^PG3JVJhK;ZicoK~S#i+qvmU6)FNf|%$|3@iil?^h41 z&7$7px_}L&d;efUP*9LOve(#(?LX;dEY{Ely-ir`Qq@0zx&na`C`{Dip#}5RO{qF3&*RQSZlbd>4%uJ`55C{Y_>bj;O z0&(y#{9ePz06)oACN=PK7^99dW`uvijP_CRH<$O--@J`H9K8K(z0e3pcMmtTB*xAQ zjdsU4d3Y}$BC5h5N%|lSFSM<#j5nb{)W9sIZeLkLI{iU?d@*P|zd5ByuM~Tjqp%gD&II zq$~2a7$qW7_A;{1RoxeJCqEdu|20)5Xk$3CJ=Kwd@wKtIaU)s~f$-EVtJ;JeT=|i8 z0$!L7AP^b!Ukn%#i2EXNI#+%y==^po7ul;oM`sDPoJ6X^$M z-h;!1vv~BjB({|`~M%~ zM{|b7Xde2W&*;M(KFzZJ#b}uOVS_n?*#Y|Z{p(07|2Y27%KW_)ny_-jo4;4A7~NUg zOosJHey7h7DG}ae4hRcuqfW0h(7q|XcGXkf#YOu_ zg!k~mnk-6GC6eU2rFN4bMtkFUlEA+2OUN96@$0ly4O)nFQ@7C_VirGD)Yq#tKzgo@fjve!v=@dkDeRyz)jhDBCSIM{hl1F4?=6So% z`C74jvd1-)d>1-1H6r6vQ;YfcRwBJF@xZB(b9GbN!eZh5yUi~I-wn7H78P~J@G5z) zjQ-(p0fDe2sEZv|#p$2ZOTCFx4QV?y?9*?RcjySq#le2}DsdGRD~l?36I){*1^auO#4pXwosPJWv$&4Dz1<&0Z{OaOlaq^n_AFj#*t3y&ZkSLt;y=zju;@D+ zv|Oj?yYT9Uwl*6-KYn4sH6cDe-wbbk8W~#KS9~igI+}@@OIA~X5?rRbHF{{kwbG%f zw=T%9#Ju7QkA<6C$1EYcswt_2Y3oERoh>yFMsv_pSX7->ntfH6f&J z!(lpjs;jGS+_+(AXo&4Au{Z~-`y>2Fkx`zWSsU2#BGYzmLv5`x@dqOoi*0+^JvlZ$ zZend++R&hm!uHRbpwT%(Ee>U(qM|5VD6K$fw(a!=#jw5F8t>URTU-7nW@afJN+GoJ zTk7?aXU?dzv9Wz=ZS8)pFd~9FEo&d;yF4O^>u_X$IFBbD zyXYy0^p|<6{Gha$*bNS8>*M~^A zgJxak*ujUkx6jYc)^zMhPk_S!x&)kA?92?m7Ry&MIB1Td;>wA8wE6pX7a(D!=tkJ) z*VGJ3u(?+eK8<6u3k${4lmiTp9z9y)`R$>U<@-^;GamjWS8N)~hZX@-pFVw}t#|9o z-5b+PQw=fa4_S``2Gceci)3`F((x>1RpYe3w|ihyTw2;pM@Q#F6U+kiD|a7OBzX3eJ-6rMlai{OER$ME zq+TGW&s>iv#^IZnHQo$3x*k*qf&`~%Vq${AGBGh3-@KXetubmDIXJ$du{n?S0*(L|5DTZ3j@>0ay98y#qy1iQxT5${GTwoSdA(($YTA zev;^n2dtM{!M*3}?{;!t6W3udM$885<>htJp^cOD>bwT1g5tfgNAeyc5{)wJ9btP$ zH0pc`Pms}a zp71!WvMy*7w6LXC`rFV**nS9I+shnCSX)jpF)>-<`M4J^G7Pe=wa6Ub4pwy=cy|@b zVG}}4kiBq$B>N&IMI1F)7gTlU&Yg=PG;eT{=IOQ9Hb*MX97259jFl7?{`CI4yAv~E zxb(@Xb8~AQ$bQhJ?kT+AcFqxa6Zq@0xiqX8y5pZhlt+w)85KD&dZLYTbqwt7vp1G0 z`)Tn9P&agRdh!jj?1AYP7Db>pU%;ww{T0L$k>IO;_v61hnE$lxgI?8^Us_tuCn+;P z`j1gRapmsaLN5&FP$@ejtkR$vX{8*z&P$MqTV^B68ZzS07tytx>qrJEh;Dn7Ga!n}&! z*FEaDbHPc^qRi_8E8x`g-oBMdQ}WZx(@Qf1g!h$N%~x?63P1(dECdSF@YZ$f&A-Mv z&2(mZDW=iBw@Oq0#B^*+3I{LOZQlLk;ZLEhmysJ^?Pv>S9jAGh`Bf`s zrU|+CJG+!+K42Q^3r->eA)?3(-}m6qkz|t43*Zq6hqS4pgc25f3EU7Ta7Eyc6Y(!O6ty`0% z`*$}fc;X%ve_#`*9PsX}etPlt+Vnga1C5FgD-qUCoZ%pDJ+9XG_AZCClZCIR6}I$0 zkBYi!YMKaQm7RwRHQ(Oe_96_=fpx(<_De|s(pzN?qSDg(?o}3t7>;+ox%za%mq6{( zmX-?$PHN4GkN-u|roNVJN8gF>(w_xv;-<|GWzOFk9@9u3p?2xD!YzxwE|Xsp0w)v9 zl1chl>(Fg9-7i)(YsyRNj__m^4)PQ+;?tC-%o^TxE)vOvNgz#iz ztUjSk?J=6#ml6{*^!2TbVC5f~*ny= zhUfBF70#9ue5<#&m#$#efwRKkk0R&KpPx~e@t(O^VqItM*q$0^jO(C7WM;+*K#D5F zRfvd+8aO)_2+{K`xS^P&WbeX28TI2S{lnB`#QjR!0hPma9ruz$0!>vzXim!`byJmH zU50EN94@0D9!yoXrzy!*y=Ugw*qFI@F|2MlY=1X!^%Dm-ugAjI z-v{5?h6C_(A*s}pF{e(Q@)>`wFb7G-iR^-+Y6~@e{(P6VJ`3R|_{+}}XAd~XV8U^WEl;`1MU za6eYz&l1WC>YW-2^#lOqx|f!icou+8PqP~f1E()}X0S_|+pCUbY8-*G!`Tt9j?=Mi z&lv%J23lGU0mW^kwz_54N-D{{dXimbcfH$8*%v|-op(TxEjOE5|KC&f@~|&o%K6M0 z*M0vYo-FT{EM;B$bzwDE=k*)tU1n#wDev!_tI*8 zD2ESB$sW?6udCbJ#2+R>7?#TGE=W#3z59(d>?Sbd!!9oPJ1XRR%ee!Xpp5~?3h!CN zfI4e8O1)Kgu5LFzlqyaPfnXB4NubQWc1$<$_kSONavuBm#Qg2;2Xrh^XUH01KZYpf z4y0u97=A>jyt#^oXSc3`^T0baZBjbSS?^d?JjubOyyV?GLueiPYXjE2TP3UV!}j;~ zk-r6XH|lC@uMzmwunujnGQsnlfd# z-#>6Dxs4IgS!r&<7`hV9KU0vG-UULrBXuNrs2VeE&v`9t76TiD=*keRck9+`dfNH; zQS&bbL-U1)CR1(@2?(tVq%!#zLx$x=hFW$guRROgSSXm7n7IF=V!}%;Ny@5CcBNfu z`62;t%Q;x(fujU5PUNPzV;jG9mRb>#hxM@=OT$Am{!Ov`N&gedxfdx}TCr!PEsTwg zXI|>7k|UV8;}aA6W>#xz6wwY2@&Te_a6UZ{A%LMgC6?8OtSaj-96HiVva_?#HX%Vl zpeLAH4%;WdqY!oE3!u)BiVey(GZ2VD)8@w+L7PjoMUSwePfs~)R%vCB$)S6r>>lKc z)mmL$g@xLTJ{VX8y|l>EpHGo@>k8Q%?q6_KGXlmW2U|BzKAd8D&^_DpeX^AbDRsVX zbHs)00(4u8zQBT{kz>ozt?WcRI|HV+F_N_?Bjd{Eu%FG-&+Bd{PoC6mm$@hmIs=@c z5?+@RPfmBuN`d|yd-BP+7FKI8|z{WdRmih~~eZfK)`u<%vrP~(Rd6Pw1#cCjkC5Yn&N zs9Ay~K#NY@S?h3t_BJ{u#uRw2%%`7q{=KEtZEZSrKb8p3H+BJ)c-V~OZ*Q-_BLt z1Mo;PMh=|^<%X=Clbb8Qt&F%bbPW1bZuj?UkXokt5!`YXB`^g?VPy}qFJ;A ztSmX`(pC1h#^K01Axl$}h&xvSw|C1utgX|*Cl08rcWS_;jv*Ag)*25=)`nM0gp>yj zIlVuCn-=&Ky0ko?#C*3t(ru&e$UFO%pGheS`W|)EM5r7jiHl)-JNFe7YKGlawnoG0 zxvzS$dOE%P9lCY>t6i+9k`iHnfO!LB3mi@#dN<25k;8@isIkO7PPoUXXjwu>g!tFx`v1Cs!|t)li*Va)e@K~vQ3%vk LJ33 zS+bL5sAL=2V(iO(&-ZzL&%O6~e)qZe54axA%pB*;`JB(^wVuaD20HAlysQWWf*qx+ zX^cSZXMp!rEKIQNvN5ZI7Y5(UC{q^r31YeR2tM;*wXS1LyqvK9_CAgXXHPE=M=4(i zA4f+|UyK)a;Q(0`4w9lDq~YUek9G0#6f|}5a7375`~=S<1vMQV1TUPwAS)<+Ug-i- z>4J=)k-ngomZ^Qp{s{!)xEe}R-SlqC93=p2Id0_;*NZgAj3f#2LRaDYoXD7U*0gs;D@41c9hbw;T?H;ivEX z!2&P$;qM5%xyJ&ta*1I-0wK76AMB#C)aF-GnbRHMD50 zzbEwBR#>aQSNxL{=U}1IUznbZIs_*ZdR8<=Rd+bPH^sxy!Q(+C}xqU6A!_U1c`i39z9uF%>5b9eZM}KiK{z{ubZuJ<>G2K*L zSXv(5`u%ldr#ED5NP3_!n;eWp-m$AK?i-rRCwf#97lL=l86mD@RWjc72BoPywrWSN z%+@z{t9o?9!4*Z;FXaB(Lbeb!FR)GwuW0YzZ9{L~)mgA;e|xmIZXrM;Sn=xm@MxE7 zPkeB)2q(kPioK@MQ7P*3#Umo-<^@)V8tOKFg&(tYm_F$rUGL_KjcYXuxTh7d-@W8Y zWbyI=C5KY#!}iPzYB`sa{zN;7w|L|}tJdzC=jGvw;$Rrck~^W1vzJYg@mNecE6e{r zagJc~Dnq!xl(ez67Ku*E$tG;e?QtZ1*G<+_el`PpRIh`}|hjm4z{ z1Pk1L#Jw2FbjB&fHYDi6 z5uDGx8{W2LM<9a2tfgLdtw|n9DYNtwd1?fPpyMafDdun0aYWbH)99YTZ7QIYW6JcN z8BtwoD_4lD`ZBkF_1h)(Ja38G7D|%m+3P`UbiDry_1Rt=Up>%EG{$n6uf8FUXCI7-p#JakW;#!M|lf3K#s@Cyk0n1LD+}stOSk)`~`etY}nlvGotZ$NIU}kFC zD}R9_)NgLcOrC-@Ax*%}gof{!h(Gor&dP=`J{TFX78MccwHnPa6qp+<)rrS0ep5Fy zH!p`TV`XPIGd7N|Z)hMT9zLObU0q$hO&bRI1QBXJL=Ghvkyg=+3=FwdRp|I*{QOcm zJ$W+DUv%R`)*A#|8^YO@LbfY2Gc#pfd&U2bh$zU)(zLg?$JhS4Q{COu(?@W#AX=X- zw5l**WMqub`t(V+1INk9xw+C~==bwe9B%v|2S@*x)Qd>$v=RNN_*(>YA2~F1Zj@y8 z;ll@f`RC7{sbgd^TSMpS+%Rq8h253-qAKcXQBl#)H{a*x=U-7&RJ?NaswoDOn_`fo zy1l58sv2B1csu&Iyur0=*GdN6hbnKs#;yL!tEo|*qf}eO%X)vi1S6G{lx&^C4ZF)_ zXJ_XHuFYRy)3CI(9IEy&{`2Tjzlj8k7N^pg^`&t%2}hWkl7{iI^IXv6iqb+Nku5DP z#-5&@&tSEzD?#lIa!mgI`)_Jr!3$>{|C#I;u50rY+7PC{#;ptBIc<;lYCkBB-5Bnu)b_$|!DD%CgXWXIe+i zSY7>oWKuBrT|LC)BRj5lv5cUT5pQ$giof)(B_og83{$y7hm$`CeAB$dgj# z(v_)&e@~=%Ha0g0QhN)nGqF@25G8=QRix}OG``p5UugF82*cs2Zg45 z75Gk{J2y~dQ!RU=4uR@^Dx^{7F)Rm@WGFa2Tv1$7S~_v3dcueJJUO{weSICb!>@$Z zg;j%##b*How6wKxwz0+I@mU9$ImFMNHGuzh6K5HDlO`}NuVkD7;)MpON~m~euJqNt zA?bcw$u7X7$7s#q(G<3Z#j<4r6XM>XC_5R3ZX&;tt7|?VN5n~CVc<|^Nl6KA4%Qjo zci#};h|mCjR*d2b21@GcE&=`n4Agp+0$1M-%!H%OXNlIO!CQCqU!GTQjujaA{OlAE zQtUUr;;;?jK+lL4E)JUCEDX8aD2N0`2AIARo9tJl9_vyoCigQ-4IA>$k8A?z>S9 z%ciBNDZBo%XT}t+4w#(uJ0_WWF}{crHxT*gkx@uU?Lf(puoo&nH@7oRSVtDOf+@5n z8pC9j!1Y8DUc3P2>1krDYfP0chdewyEp2VRpW=jR^VJLaZ{J>h{P;0-lvFK}S5jh} zk&%ISJ3)1Iq2Eimc`(NZy2M~Q1sfY1UB+sI*~=ASW`H=o6<%YwSf20jM#;2WdB}X_FI|0hKg^U_`UK9aQ5@( zEk-6LoQj3sx>8XUD=X`DU(;14nIb3|yz@Xff)dMhZG3hKn3 zF!T0)H?=iXXJleBKI65wyHmCOyOs0E5y7a&AH z$}k42Hk@DC-CB@TQLz!$e)6>O=F-&(J!2=QH{IRcvW_js%q%Pl-n}z&8z=$$n zOqb^DoSXvSe1UmkzQ4LX#b2p6W@bEAZ8c~C@N|)!lEU|QcsTXT#f9hCdEHHl|Ck5% zS2XE_U6Wre|HV7vLP8&WZR$773(?stD_DwehYHB$#>NKT|LV%GK@$lH3HS`Elos^+ zF)+H!Z`NEtMWO2`laG&&2x_?8!$ec_p_n25EtTpD0PGl6Z_nL__*UHqEg01% zwV=2*?z|vsX=&*S3KiFQGY(J-;=pnMaZgNI1AMx2M;cn>zX`SfrC5He`1=sIE7;MT z3L64(Rb6mV1B)&D^Upt@CnWTZYV1SQ9SaG?E-ij<@as@n6PK30=IH21hhI&=vaeWb?aedMuU&oe zgw125^8F}#bP$ij<;xEe&)QJ^{Od*a5~mX^Z1dKXEo}S}cGsKLMo4oEf7RM(;B28o#jsy9zyI{s*k0{T2M04t%RExx8g;H>tUsLnxCqLtW+{Ag zW#%>L7L5K1u$6whc6*x^#mTxG^{xc!DYcEqo+O(2;M|>wpFc74BgE6<;)`WOm<1YO z?)2%?SHKcCm(%v%f(hbiM|pYkElZswgoM-q`hCSAG|3Linc~VHtomuHHqTBOuI5W1 z(jB)s7#x~o{xL_01EWF+8`hS1b(FJbNt;O2T1@F-1on^O>J$903EJh-`Rs?JQwj> z7`2JwKG)x!t;c!nSl0UbT}t2r3Aej3h4iELiZ#EAi_36qP24~>EiQV41z4(W4R$0I zHp6l~kB|RBaECdvtvFMD{fv(SUlm+LMPAnRQN#C6$>N&KmX<5+sTVB*SASi*e*GmJ zIiB|zJO3qeZNxYB&fHM;yLabl3uAV%%D*4g*VoI2?ru9ZTMs-uc$6-gl-texO2M0D zVg@O>FKltB3d)`2{QUgqFJI<5;{&v1oZ33+k`}b?O}_>`1VY5fNxL{-slm*_*V%Ab z?d{Td^FMRdW2hzt3JTul`Xxp2;XVXLlx)b;mW0%UyaRxvOSXBDb>4?C=0L*Z$jIM( zpEok|RtERg$ss$p-e2eCcK}wMC9QNb8FH^*XiDtOdhh(1IiTe8dBM*|eVp1}W=L6;=R&O1iQ(&VP78hmTqD!)7V+R4 z38|@toRa34x_kRnqlrb*4o#Su?reH|81bwa4cYR5Ec8!!#!~zSBxGbvF8a?u7ojiY zt5>gh9mb z_hPz?)z#IBZDCMb)6*Cr00i0qsg6&djO^_*RJVS~q&0gI(45DQ7oeVr!BtB?r@Cot zheJB3iOILR&;+RuP8M&kNZ_!ir0=k zQJCymg^J>mGKN9pgI5Q|ww4H_KG!~nSd}+WSxmr0ZDS!Iy3r>pdh0&(iOx;hEWy5k zNy@I)8iGYnyr^E1^(kRt4S~?@+oi$sa!vlD0Xi-&F1W=H%n4~}#f*oz*0C!p5EE(B zT^is@ztOdUYo6IzS<}sGyYF{*7D8v^bWU`6R8S`S#6Srl$`6$~8GlT(K6Gz8vko@5r!;g=iqUNaWxP--mK3=0U2BZ!AG+pm3Q8O+sj+KRFQ5g@E z7Sag+Ji@+fbPcZh^G+JSi!k4zLdo7^(e* z$%e&;5k__e2R^LTuBo)+RjkPCjpwLyDWb4$mygSP_CDc}!8USQh)YQ1ecnxS6M&14 zZBNSkD5at+z3oXfDK5R&I_t|*9TqolW=?IX?aUa0Z*}!q*cHweRU3HKZoJ_-b3Gk` zE7L%1OpH1GsQoofvhvyu=jokI3SFkV2hxrmu<=I@reQsC+dy^qmHJvxP%7y23#ay^ z?COPDEA!dCJ09FXz-|Ly2-ElN(_QQt{ znYMOY?RGN(;^^Yq&3yA>+rkKekn8W?zo!#Q#04q_x(pK+vNLPv2Cd*9l>{RLu67rs z0ozq}R(h&-H)oN)lUM!bM=ULD1F~z^e;lF&Ew@)Kf4NBO2;DV_Ro%qAEN1b+s@gSO9{^`fDtruj3^|biw<$K z>1d@(N5EyeGef_K8G%U|yPs}V;fb4k<-GFMA=U+&W4l^hUq09rE{0>rMa`j!4T=GE z&^gTmtCRfdRUWvM=&4g%tCzokiyu$5{{%N3y1SevHn8US#IH}~re~Yu3!66$5r-w! z)W&jlt)U(1ktE{RUqTZDN>lLH*Tcc&8B(*%v-*E=?CdQ9;6I>rT0!9f#6<@ z0FV{BGrH6%`V3c8PE)wOY=E>lBHblqFM+(byG0ufT$q_HCVMU}LHcFqQvmI=kBo`w zheh^<#}t7@ECO=wFP%E@bBcB&)?1a83Y5F!(I9wto^kJ_D%`$&u4eJUWZSFPR-QyH zD0!?04_x=gKNwTP(H~JzwT`Ma~-pG}8N99Zshc0(Z3Sk-L3n7A~-e)ZV;j1}O>|@F;;U zm9!y9UoI`rC6X>UM&d&@T^c`rq(47`K!_&#Wam(z$gM!9JguN`9sK|FnKQkRI?HZ- zj^`o|s6nqWR9<=rHEQMaskFNW*}c#SyCGux1Fj@5UbOP3_*szr^Lzca<|^ElC)-=v z+Y?0QMu;Y9A)9&h*?=fG4X3(yts@Y79Z)ewR*ruani=_H_4h<;uu|+{L|p~@+J!}R z$Rl9C1+>|LiZyt^h&jrzUmzb0Zc<>)#^NIri literal 0 HcmV?d00001 diff --git a/docs/src/caching_allocator.md b/docs/src/caching_allocator.md new file mode 100644 index 000000000..6181caeee --- /dev/null +++ b/docs/src/caching_allocator.md @@ -0,0 +1,76 @@ +# Caching Memory Allocator + +Julia uses Garbage-Collection (GC) for automatic memory management. +However, it does not know about other memory spaces, +therefore it sees no difference between 1 KiB GPU allocation and 1 GiB +and doesn't free it in time. + +This leads to a situations where all of the GPU memory is used, +even though your algorithm only requires a fraction of it. + +Current mechanism of dealing with OOM (Out-Of-Memory) errors during allocations +is to manually trigger GC and retry allocating again doing this in several rounds +each more aggressive than previous. + +However, manually triggering GC is very expensive, since it requires scanning +all Julia objects, not just ROCArrays, so the actual memory freeing takes a +fraction of GC time: +![](./assets/gc-vram-breakdown.png) + +On the image above, red region is a call to GC and green region is +where actual GPU memory is being freed. + +--- + +To help with memory management, we can use caching memory allocator. +It is usefult in scenarios where we execute the same function multiple times +and have the same memory allocation pattern. +One such example is training DL models, where given the model and its parameters +we compute loss, gradients w.r.t. loss and perform in-place parameter update. +In this case, every iteration performs same operations and memory allocations +and with caching allocator we can efficiently re-use them without returning +the memory back to OS. + +## Example + +We have a for-loop, where each iteration requires 2 GiB of VRAM. +We create a caching allocator with the name `:loop` and pass a function to +execute. +First iteration will allocate, but subsequent won't. + +```julia +using AMDGPU + +function main() + n = 1024^2 * 256 + for i in 1:1000 + AMDGPU.with_caching_allocator(:loop, n) do n + sin.(AMDGPU.rand(Float32, n)) # 2 GiB allocation + return + end + end +end +``` + +The reason for marking a region of code where to re-use the memory and +not extending it to the whole program instead, is because we cannot rely on GC +to tell us when the memory is no longer used (it is too slow for that), +so we create such region manually. + +You can free all memory held by allocator, by invalidating it using its name +with [`AMDGPU.invalidate_caching_allocator!`](@ref). +Or if you want some region of code within [`AMDGPU.with_caching_allocator`](@ref) +to execute without relying on cache, use [`AMDGPU.with_no_caching`](@ref). + +||Without Caching Allocator|With Caching Allocator| +|:---:|:---:|:---:| +|VRAM Usage|![](./assets/without-caching-allocator.png)|![](./assets/with-caching-allocator.png)| +|Execution time (seconds)|`12.865149`|`0.020943`| + +## API + +```@docs +AMDGPU.with_caching_allocator +AMDGPU.with_no_caching +AMDGPU.invalidate_caching_allocator! +``` diff --git a/docs/src/execution_control.md b/docs/src/execution_control.md deleted file mode 100644 index 4fb4681d7..000000000 --- a/docs/src/execution_control.md +++ /dev/null @@ -1,27 +0,0 @@ -# Execution Control and Intrinsics - -GPU execution is similar to CPU execution in some ways, although there are many -differences. AMD GPUs have Compute Units (CUs), which can be thought of like -CPU cores. Those CUs have (on pre-Navi architectures) 64 "shader processors", -which are essentially the same as CPU SIMD lanes. The lanes in a CU operate in -lockstep just like CPU SIMD lanes, and have execution masks and various kinds -of SIMD instructions available. CUs execute wavefronts, which are pieces of -work split off from a single kernel launch. A single CU can run one out of many -wavefronts (one is chosen by the CU scheduler each cycle), which allows for -very efficient parallel and concurrent execution on the device. Each wavefront -runs independently of the other wavefronts, only stopping to synchronize with -other wavefronts or terminate when specified by the program. - -We can control wavefront execution through a variety of intrinsics provided by -ROCm. For example, the `endpgm()` intrinsic stops the current wavefront's -execution, and is also automatically inserted by the compiler at the end of -each kernel (except in certain unique cases). - -`signal_completion(x)` signals the "kernel doorbell" with the value `x`, which -is the signal checked by the CPU `wait` call to determine when the kernel has -completed. This doorbell is set to `0` automatically by GPU hardware once the -kernel is complete. - -`sendmsg(x,y=0)` and `sendmsghalt(x,y=0)` can be used to signal special -conditions to the scheduler/hardware, such as making requests to stop wavefront -generation, or halt all running wavefronts. Check the ISA manual for details! diff --git a/src/AMDGPU.jl b/src/AMDGPU.jl index 3c46f1070..15f8531de 100644 --- a/src/AMDGPU.jl +++ b/src/AMDGPU.jl @@ -114,7 +114,7 @@ include("tls.jl") include("highlevel.jl") include("reflection.jl") include("array.jl") -include("memory_record.jl") +include("caching_allocator.jl") include("conversions.jl") include("broadcast.jl") include("exception_handler.jl") diff --git a/src/array.jl b/src/array.jl index b045e3006..793be1689 100644 --- a/src/array.jl +++ b/src/array.jl @@ -7,11 +7,24 @@ mutable struct ROCArray{T, N, B} <: AbstractGPUArray{T, N} ::UndefInitializer, dims::Dims{N}, ) where {T, N, B <: Mem.AbstractAMDBuffer} @assert isbitstype(T) "ROCArray only supports bits types" - data = DataRef(pool_free, pool_alloc(B, prod(dims) * sizeof(T))) - x = new{T, N, B}(data, dims, 0) - x = finalizer(unsafe_free!, x) - RECORD_MEMORY[] && record!(x) - return x + + alloc_name = cache_alloc_name() + # Do not use caching allocator if it is not set or + # the buffer is not a device memory. + x = if !(B <: Mem.HIPBuffer) || alloc_name == :none + data = DataRef(pool_free, pool_alloc(B, prod(dims) * sizeof(T))) + x = new{T, N, B}(data, dims, 0) + else + alloc = cache_allocator!(alloc_name) + tmp = alloc!(alloc, B, T, dims) + if tmp ≡ nothing + data = DataRef(pool_free, pool_alloc(B, prod(dims) * sizeof(T))) + tmp = new{T, N, B}(data, dims, 0) + add_busy!(alloc, tmp) + end + tmp::ROCArray{T, N, B} + end + return finalizer(unsafe_free!, x) end function ROCArray{T, N}( diff --git a/src/caching_allocator.jl b/src/caching_allocator.jl new file mode 100644 index 000000000..03880f443 --- /dev/null +++ b/src/caching_allocator.jl @@ -0,0 +1,151 @@ +# NOTE: EXPERIMENTAL API. + +struct CacheAllocator + lock::ReentrantLock + busy::Dict{UInt64, Vector{ROCArray}} # hash((T, dims)) => ROCArray[] + free::Dict{UInt64, Vector{ROCArray}} +end + +CacheAllocator() = CacheAllocator( + ReentrantLock(), + Dict{UInt64, Vector{ROCArray}}(), + Dict{UInt64, Vector{ROCArray}}(), +) + +const CACHE_ALLOCS::LockedObject{Dict{Symbol, CacheAllocator}} = + LockedObject(Dict{Symbol, CacheAllocator}()) + +function cache_allocator!(cache_name::Symbol) + allocs = CACHE_ALLOCS.payload + alloc = get(allocs, cache_name, nothing) + alloc ≡ nothing || return alloc + + return Base.@lock CACHE_ALLOCS.lock begin + allocs[cache_name] = CacheAllocator() + end +end + +function get_free_pool(alloc::CacheAllocator, uid) + free_pool = get(alloc.free, uid, nothing) + if free_pool ≡ nothing + free_pool = Base.@lock alloc.lock alloc.free[uid] = ROCArray[] + end + return free_pool +end + +function get_busy_pool(alloc::CacheAllocator, uid) + busy_pool = get(alloc.busy, uid, nothing) + if busy_pool ≡ nothing + busy_pool = Base.@lock alloc.lock alloc.busy[uid] = ROCArray[] + end + return busy_pool +end + +function alloc!( + alloc::CacheAllocator, ::Type{Mem.HIPBuffer}, ::Type{T}, dims::Dims{N}, +)::Maybe{ROCArray{T, N, Mem.HIPBuffer}} where {T, N} + uid = hash((T, dims)) + free_pool = get_free_pool(alloc, uid) + isempty(free_pool) && return nothing + + # @info "Cache hit" + busy_pool = get_busy_pool(alloc, uid) + x = pop!(free_pool) + # Array was manually freed via `unsafe_free!`. + x.buf.freed && return nothing + + push!(busy_pool, x) + return x +end + +# Mark `x` array as busy, used during cache misses to add new allocations. +function add_busy!(alloc::CacheAllocator, x::ROCArray{T}) where T + uid = hash((T, size(x))) + busy_pool = get_busy_pool(alloc, uid) + Base.@lock alloc.lock push!(busy_pool, x) + return +end + +function free_busy!(alloc::CacheAllocator) + for uid in alloc.busy.keys + free_pool = get_free_pool(alloc, uid) + busy_pool = get_busy_pool(alloc, uid) + isempty(busy_pool) && continue + + Base.@lock alloc.lock begin + append!(free_pool, busy_pool) + empty!(busy_pool) + end + end +end + +# Public API. + +""" + with_caching_allocator(f, alloc_name::Symbol, args...) + +Execute function `f` with arguments `args...` using +caching allocator given by its name `alloc_name`. + +All GPU memory allocations will attempt to hit this cache +before doing actual allocation (in case of cache miss). +After executing `f`, all "busy" memory within the allocator is marked as free, +so it can be re-used with the next call. + +# Returns + +Result of the `f` function. +""" +function with_caching_allocator(f, alloc_name::Symbol, args...) + alloc = cache_allocator!(alloc_name) + # Enable usage of cache allocator during allocations. + cache_alloc_name!(alloc_name) + res = f(args...) + # Mark all allocations during `f` as free to re-use and disable allocator. + free_busy!(alloc) + cache_alloc_name!(:none) + return res +end + +""" + with_no_caching(f) + +Execute function `f`, but avoid hitting any caching allocator. +This is useful to call from within [`with_caching_allocator`](@ref), +so that the memory is independent from it. + +# Returns + +Result of the `f` function. +""" +function with_no_caching(f) + alloc_name = cache_alloc_name() + cache_alloc_name!(:none) + res = f() + cache_alloc_name!(alloc_name) + return res +end + +""" + invalidate_caching_allocator!(alloc_name::Symbol) + +Free all memory held by caching allocator given by it name `alloc_name`. +""" +function invalidate_caching_allocator!(alloc_name::Symbol) + alloc = cache_allocator!(alloc_name) + alloc ≡ nothing && return + + Base.@lock alloc.lock begin + for (_, pool) in alloc.free + map(AMDGPU.unsafe_free!, pool) + end + # TODO is other threads use the same, signal that it is invalidated somehow? + # TODO error if pool is in use, i.e. non empty `busy`? + for (_, pool) in alloc.busy + map(AMDGPU.unsafe_free!, pool) + end + empty!(alloc.busy) + empty!(alloc.free) + end + return +end diff --git a/src/memory_record.jl b/src/memory_record.jl deleted file mode 100644 index 85b01c6d8..000000000 --- a/src/memory_record.jl +++ /dev/null @@ -1,48 +0,0 @@ -# NOTE: EXPERIMENTAL API. - -const MemoryRecords = LockedObject(Dict{UInt64, ROCArray}()) - -const RECORD_MEMORY::Ref{Bool} = Ref(false) - -function record_memory!(rec::Bool; free::Bool = true, sync::Bool = false) - RECORD_MEMORY[] = rec - if !rec - free && free_records!(; sync) - end - return -end - -record_memory() = RECORD_MEMORY[] - -function record!(x) - Base.lock(records -> records[_hash(x)] = x, MemoryRecords) - return -end - -function free_records!(; sync::Bool = false) - Base.lock(MemoryRecords) do records - for (k, x) in records - unsafe_free!(x) - end - empty!(records) - end - sync && AMDGPU.synchronize() - return -end - -function remove_record!(x) - record_memory() || return - - k = _hash(x) - Base.lock(MemoryRecords) do records - if k in records.keys - pop!(records, k) - end - end - return -end - -_hash(x::ROCArray) = - Base.hash(x.buf.rc.obj.mem.ptr, - Base.hash(x.offset, - Base.hash(x.dims))) diff --git a/src/tls.jl b/src/tls.jl index 0fedd9e6c..05444810e 100644 --- a/src/tls.jl +++ b/src/tls.jl @@ -2,14 +2,16 @@ mutable struct TaskLocalState device::HIPDevice context::HIPContext streams::Vector{Union{HIPStream,Nothing}} + cache_alloc_name::Symbol end function TaskLocalState( dev::HIPDevice = something(HIP.DEFAULT_DEVICE[], HIPDevice(1)), ctx::HIPContext = HIPContext(dev), + cache_alloc_name::Symbol = :none, ) streams = Union{Nothing, HIPStream}[nothing for _ in 1:HIP.ndevices()] - TaskLocalState(dev, ctx, streams) + TaskLocalState(dev, ctx, streams, cache_alloc_name) end function Base.getproperty(state::TaskLocalState, field::Symbol) @@ -26,6 +28,17 @@ task_local_state()::Union{Nothing, TaskLocalState} = task_local_state!(args...)::TaskLocalState = get!(() -> TaskLocalState(args...), task_local_storage(), :AMDGPU) +Base.copy(state::TaskLocalState) = TaskLocalState( + state.device, state.context, copy(state.streams), state.cache_alloc_name) + +function Base.show(io::IO, state::TaskLocalState) + println(io, "TaskLocalState:") + println(io, " Device: $(state.device)") + println(io, " HIP Context: $(state.context)") + println(io, " HIP Stream: $(state.stream)") + println(io, " Cache Allocator: $(state.cache_alloc_name)") +end + """ device()::HIPDevice @@ -179,15 +192,10 @@ function priority!(f::Function, p::Symbol) end end -Base.copy(state::TaskLocalState) = TaskLocalState( - state.device, state.context, copy(state.streams)) +cache_alloc_name()::Symbol = task_local_state!().cache_alloc_name -function Base.show(io::IO, state::TaskLocalState) - println(io, "TaskLocalState:") - println(io, " Device: $(state.device)") - println(io, " HIP Context: $(state.context)") - println(io, " HIP Stream: $(state.stream)") -end +cache_alloc_name!(name::Symbol)::Symbol = + task_local_state!().cache_alloc_name = name @inline function prepare_state(state = task_local_state!()) hip_ctx = Ref{HIP.hipContext_t}() @@ -196,13 +204,3 @@ end HIP.context!(state.context) return end - -function synchronize_rocm_tasks(ex) - quote - try - $(ex) - finally - $task_local_state() ≢ nothing && $device_synchronize() - end - end -end