From 250deb334c2846a1ca8e5882c1eb2ffb5d349149 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 20 May 2025 09:05:35 +0200 Subject: [PATCH] benchmarking: tuned interpreter blocksize --- package/src/Interpreter.jl | 2 +- package/test/PerformanceTuning.jl | 36 +++++++++++------- ...locksize_121__t_transpiling_only_once.json | 1 + thesis/chapters/evaluation.tex | 2 + thesis/main.pdf | Bin 923864 -> 924279 bytes 5 files changed, 26 insertions(+), 15 deletions(-) create mode 100644 package/test/results-fh-new/2-i_blocksize_121__t_transpiling_only_once.json diff --git a/package/src/Interpreter.jl b/package/src/Interpreter.jl index 2326092..a0138f8 100644 --- a/package/src/Interpreter.jl +++ b/package/src/Interpreter.jl @@ -25,7 +25,7 @@ function interpret(cudaExprs, numExprs::Integer, exprsInnerLength::Integer, # Start kernel for each expression to ensure that no warp is working on different expressions @inbounds Threads.@threads for i in 1:numExprs # multithreaded to speedup dispatching (seems to have improved performance) - numThreads = min(variableColumns, 256) + numThreads = min(variableColumns, 121) numBlocks = cld(variableColumns, numThreads) @cuda threads=numThreads blocks=numBlocks fastmath=true interpret_expression(cudaExprs, cudaVars, cudaParams, cudaResults, cudaStepsize, i) diff --git a/package/test/PerformanceTuning.jl b/package/test/PerformanceTuning.jl index 94bca31..fe64cc1 100644 --- a/package/test/PerformanceTuning.jl +++ b/package/test/PerformanceTuning.jl @@ -1,30 +1,38 @@ using CUDA +using DelimitedFiles +using GZip using .Transpiler using .Interpreter -varsets_medium = 10000 -X = randn(Float32, 5, varsets_medium) +include("parser.jl") # to parse expressions from a file -exprsGPU = [ - # CPU interpreter requires an anonymous function and array ref s - :(p1 * x1 + p2), # 5 op - :((((x1 + x2) + x3) + x4) + x5), # 9 op - :(log(abs(x1))), # 3 op - :(powabs(p2 - powabs(p1 + x1, 1/x1),p3)) # 13 op -] # 30 op -# p is the same for CPU and GPU -p = [randn(Float32, 10) for _ in 1:length(exprsGPU)] # generate 10 random parameter values for each expr +data,varnames = readdlm("data/nikuradse_1.csv", ',', header=true); +X = permutedims(convert(Matrix{Float32}, data)) + +exprs = Expr[] +parameters = Vector{Vector{Float32}}() +varnames = ["x$i" for i in 1:10] +paramnames = ["p$i" for i in 1:20] +# data/esr_nvar2_len10.txt.gz_9.txt.gz has ~250_000 exprs +# data/esr_nvar2_len10.txt.gz_10.txt.gz has ~800_000 exrps +GZip.open("data/esr_nvar2_len10.txt.gz_3.txt.gz") do io + for line in eachline(io) + expr, p = parse_infix(line, varnames, paramnames) + + push!(exprs, expr) + push!(parameters, randn(Float32, length(p))) + end +end expr_reps = 1 - @testset "Interpreter Tuning" begin - CUDA.@profile interpret_gpu(exprsGPU, X, p; repetitions=expr_reps) + # CUDA.@profile interpret_gpu(exprs, X, parameters; repetitions=expr_reps) end @testset "Transpiler Tuning" begin - CUDA.@profile evaluate_gpu(exprsGPU, X, p; repetitions=expr_reps) + CUDA.@profile evaluate_gpu(exprs, X, parameters; repetitions=expr_reps) end \ No newline at end of file diff --git a/package/test/results-fh-new/2-i_blocksize_121__t_transpiling_only_once.json b/package/test/results-fh-new/2-i_blocksize_121__t_transpiling_only_once.json new file mode 100644 index 0000000..bc60587 --- /dev/null +++ b/package/test/results-fh-new/2-i_blocksize_121__t_transpiling_only_once.json @@ -0,0 +1 @@ +[{"Julia":"1.11.5","BenchmarkTools":{"major":1,"minor":6,"patch":0,"prerelease":[],"build":[]}},[["BenchmarkGroup",{"data":{"GPUT":["BenchmarkGroup",{"data":{},"tags":["GPUTranspiler"]}],"GPUI":["BenchmarkGroup",{"data":{"nikuradse_1":["Trial",{"allocs":768767740,"gctimes":[1.4209871071e10,8.529233725e9,8.165943693e9,8.180014668e9,8.231263428e9,1.1110946388e10,1.3136749872e10,1.0515143897e10,1.2978886885e10,1.0709110363e10,1.2408937103e10,1.4486745203e10,1.3229416582e10,1.8353010658e10,1.32173253e10,1.1621004633e10,1.1136122325e10,9.614762707e9,1.4564265563e10,9.399404156e9,1.063983064e10,1.2513746965e10,9.039906393e9,1.2382209752e10,1.3127092115e10,1.2713843793e10,1.1111974511e10,1.5837882785e10,1.5005237417e10,1.2439743996e10,9.607861366e9,1.0680724758e10,1.4012997282e10,1.258804731e10,1.020862355e10,9.630750655e9,1.5428270551e10,1.746317266e10,1.3141055589e10,1.5009128259e10,8.453648604e9,1.6874341516e10,1.1411307067e10,1.2542892313e10,1.1232296452e10,1.3458245148e10,1.0818032806e10,9.239119183e9,1.7897566617e10,1.565065385e10],"memory":54082712568,"params":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":50,"evals":1,"gcsample":false,"seconds":43200.0,"overhead":0.0,"memory_tolerance":0.01}],"times":[4.72169572882e11,5.0409909815e11,5.07815085942e11,5.10453558146e11,5.10478958938e11,4.97262381193e11,5.0260603513e11,4.99542972531e11,4.87993778737e11,4.89021704445e11,5.03746768492e11,4.89869107858e11,4.73146154356e11,4.8171801387e11,5.08579879922e11,4.949573335e11,4.72187897068e11,4.99229768599e11,4.60419913288e11,4.69019613895e11,4.50583091837e11,4.72792727311e11,4.72333754492e11,4.65152305777e11,4.82234976786e11,4.72238483765e11,4.73826923338e11,4.76267120461e11,4.87120033427e11,5.04120244741e11,4.69559064737e11,4.72201757593e11,4.69914031792e11,4.93629873162e11,4.71968584791e11,5.01452793581e11,4.80458931455e11,4.83065538379e11,4.99070229147e11,4.71609869279e11,4.71492369998e11,4.58522950715e11,4.80960881323e11,4.91960762476e11,4.73412762655e11,4.69283546561e11,4.66574358844e11,4.67318993209e11,4.5724723899e11,4.7334516285e11]}]},"tags":["GPUInterpreter"]}]},"tags":[]}]]] \ No newline at end of file diff --git a/thesis/chapters/evaluation.tex b/thesis/chapters/evaluation.tex index d0fa1f3..1d28846 100644 --- a/thesis/chapters/evaluation.tex +++ b/thesis/chapters/evaluation.tex @@ -62,6 +62,7 @@ Document the process of performance tuning Initial: no cache; 256 blocksize; exprs pre-processed and sent to GPU on every call; vars sent on every call; frontend + dispatch are multithreaded 1.) Done before parameter optimisation loop: Frontend, transmitting Exprs and Variables (improved runtime) +2.) tuned blocksize to have as little wasted threads as possible (new blocksize 121 -> 3-blocks -> 363 threads but 362 threads needed per expression) \subsection{Transpiler} @@ -75,6 +76,7 @@ Document the process of performance tuning Initial: no cache; 256 blocksize; exprs pre-processed and transpiled on every call; vars sent on every call; frontend + transpilation + dispatch are multithreaded 1.) Done before parameter optimisation loop: Frontend, transmitting Exprs and Variables (improved runtime) +2.) All expressions to execute are transpiled first (before they were transpiled for every execution, even in parameter optimisation scenarios). Compilation is still done every time, because too little RAM was available (compilation takes the most time, so this is only a minor boost) \subsection{Comparison} Comparison of Interpreter and Transpiler as well as Comparing the two with CPU interpreter diff --git a/thesis/main.pdf b/thesis/main.pdf index 13f357eca10ac36fe27949aa0c9e015ff5b04a53..7eefe640c9403616eab3aa87c74e54aab34b15bb 100644 GIT binary patch delta 9146 zcmajDWl$VU6RwT#vbaNVcUUB_xVvizwz#|d;ts)`-~@L9A!zX61a}Wkf;%Vg^L^*{ zshX?$uCAV{uIa9+o>^*y+_gX&667;YRuNfYS!&KJVB(>RfLOA8DZ@A4{DBWFD~>rW zRb?whtJDbn#i3z&;dYxreAG1|2qaYip2IWpL zujXQ-L}4h$7WEcW{2BaKmV_scNn$poIw-i#`TQl=UEKRf?;)f?ZV5Bo!v&7{F5e$> z)E~&C9vaN|1(Bg)mry+sc_fEGPcxj0_u4#Bx%f&(P-}>TZ1C*$2{t^E^UI&ftV4Hg zKV!$@I>Y;qRexNCk9*FkX4R9Mfny%beNIhQG;X?N%B=Ky$$NcW>TB(`Q!EmM){?PTSucb(1Phxa~dD)_Kuos zmvM0@MnlJ7_2q?h8_cevL-oBm8n%M8YC`oL$lQuh%;`qQw4c}bF zn1%GmycI`CQs_ynBdAi0xTc{G?R44V#lnth*-Tn085U9eQvn7R_ znmx(%m8^Pqamg(=yA%)|&c1+Ofn92Jx9ClTCZDG4og%e5Dg)>BSMPN9Nt`_0KKbAV z?Ly=bvFC33%AXQTbQ~*DRcd3mJM$j(V&cL-<|5&_ls`7`)9~(w@JXOm<056a=}5U+ zv5jkb3fJECA?0!!T0$br-d_v%h##^wdth@^c zeY9O8^%53Lh&oGX7rXue3C5_EF#q(ca~&~kI_`c+<;vzFMr2o@K69=*GeuBI!*a@} zrJ{z8eV#}xo+9U`;q^58$Qgs?QMsG$qLPNQHxkU>50Qdy!v@Lma8N_itM|FQ;=Mq! zDu_0zuDVGKD9dI)8=#@rs7|7yJT|Yd{ewkPHpgfA8e+by*gkz zlc$$QU+fLtF!{4SpPtLh7H#c32KaGxk$K2iZBRxz%u>oZh<^W(&ei_t;3iVA;e-E+ z1T}MiB`wTB0Wkw4b37-j*&$t+R&-sZNXnD$%NyvWR;5Y6Nmse9iNu(GQZ`DcE{yBK z^6~I2o19;Reol5E@T-g0j*0mPFZobdI&%VJ7!n=zVgKi6cO#&sYj1@j|KNn9rD$TS z^18!hg}&u>ES4ohva6NU`It0&o4=P~Ny=SQO1N^;?YClG{=?}AC&FA>jpsqE=T#!3 zF@glny{Z~x@t-}Y;C>7X-%hj^E6~Y!A^pON!PdUNtC`Zd6(Vo zAcYR4NV-TVC&lb*JRFL9Sw4j(!UZP?S6K5NQk@o7Zr0pB;Y3Hr8-n>~2H}3_-A(JL zikq<4U|$*P!G;z`!6%mN*!L3ZzRsv)AzIz~L~is)PUj|B!0UoDczp&>{e3>2CXCW- zg=YS*jnv-icW1)H9_}-_!ay4sc7yExq4v!mjBU|IvSmoMHyIw+9UPdhV^=7S!DL_v z&JY;&tFMa#DZjFFQs!?W1M8JBknjDu=C(*>n2&tEjA<=z-@*g*QiMI+ul}?IdDgYT ziAyvBr4qxt6_K5p$5whOJt>gR3Pcit_O;da%B(=3m}#SD0;Klh19XNloJp?VmM8^J zlB;Akcv>{1(}`)M?)$MH<9m+fdw(3!WeR6OepfdQrE&i}ws9?dr+kZ194RBB9^S=y$cDkhBVduDWR2i8rsP%(3lXM142;IE;n5%Z{OVa0>Gv+IKy* zdPBR^%MW|J8vFN614rAi5h`?iTTAZGc`@x|KUnLFny=DZ_>oMvoY0i0>D$h^Ru&(m z-TDcqIR*)ZRt|M(;P+G$L04#KMJ+`Oh%DwML!q;+q%AKUA2-@@boDlZA;e*VPW}JX$Y|wX z_#qmkQv-)5-{6d9M72ysakWBb2DGWTIZJM(_=#I%e%?=ZmbDqmXa!d%eUa>EJ3o{B zxp*2bV8TMwBlz_ciPr4P3|7DDCh6!Q)U5na%(C34Agg$Ue*j-*jV_uBDEm_)otpmC zBL$a*jd}aAHRLxuY~!0_U1ifq7kkM<^;{yxl5F_L>sgrt@rk#7naOV5+0*8<7ht@X zo?WLYbIA0ilzS?V*QY#5pkiF!kb#?|ily<3KT-rpPJO8$YYLVt5ME*aiCbJ!XBKV8MmV5o#kgH}^&?etm`fw- zSGCa!T7sQ)V`pgm-e5WHs0!!1k@Rih>E*ZFQ=XC_nd!RxNvv0t^S7MBOh^TQ5#c>J z)3gG>0&~<7NzDagpS`eZ$7WM00ZD99zm3tUXeJ$VMC_!75Mzf81U^66d~|JeUh@%q z-Z)=1YHUjt^?FrF8ajj60vVy+1t;sT#vmFva=m;J1h0zj2G6%JmcBAA zWkcj7Xr{Z_JX(f>K!^Lm2nMTfTYl;K8I4^7)9i~Z&MW1%w#;z8;V!R>-lMLKjSXt- zFHNl}bpI`x8VtO=FY8oi-c{CVoK+Ne$;k{|!CP@QPf4H4_03VaA1tLPn66Ca{?bfg zlmZg;>{38l2VIpc1p|OYDV3d)+v|W3I2-^> zceMZgD)2*JkKt%eZxl-6Cz}!@k3kZ4uLOc0e4?5~rOSMXsGa;i zrX>u;w5$~56grLDYF^z`8^nXYXA{`|;pwrt1(!x$PXPZv@8m(2P8RMSZkDEP&Xs_1 zQUo4wX5Rr4Gi+Xe-I=Tn2iDUQ`sqmOTOL*rfow2c;x9O6J3Xi5x#=(#sAJhsK9v!| zUC)KH&swTSin;zzx?UNM@A#fM7s|nGyFiv368UHwp^?A0`OLfAgD~bY7`>rG7Za>}4qjv#GBThGnq9+l#9ZKKEC(SHPjhHh~s`%WLS_fK_ftx=2O>3R!Px3VsSVMok+c~} zW+*IFbpqcM8{oE!)AqDiWFjLlo)OF~84P>%r`S#+l5wyw}mhL(eJ2KQ5L$(}soCY8henHb}NP2?44usHG2Hti*R!Z-uO+CU3IFVa8@5maIs^Og z!(i50TQrhQSVc({#&mA8k9Da4V{Q4yJf~MBgW>m=R8%kOiAS^LIb9M}@4! zTAWw5<{0A(ml@6!oL79-GU5}LJ`LxRk@*0%PsANMj#8y@qv=qz`(IELF|F#d97zjm z=kl$8abYN^4pG4B15=rc*nlKncvbbobM5+r0#j0BpcYN^stE?iO0fbI3^i>KxD<)Yc6V6caEKbn+(r)qG>O`&roa7mo$1k57rem2ZVx zfrXOSRjFlP5Ia0yRy_{l&u2jn&C&-{GyiNxU|B^J z$ENd|>TBkk5Ok++G$1snB(}2OD?e%Lwpx5QiW0$`r zznRQoLww<$k_bv$DjJ(9{MTe?81D>0>5@4KHLvw;` z=snh_eF@!!45W(&qS?DsxkWaZb1p1xl~3WWarAoLu5~E3UZ2aHW;v+QRV`GuEh{nT z;jVlF1~GW?<_hfU_UYQ7C09%zdr6gpjqh=i`XVor?3}H?W>SR|%PrI(Py%!DNI8j!Sg7Cr~#nBasH{}*>#WFlD z3KPN$jVWO?wcENht+;OxoKU? zx0y19s68|w!_g`NzNMhiD-6v%mAKiGrH%#W7$&TD8dmeXyMT4x2R&WHwdGDc>_f|s zZXU<$IX9G%$e8Dj_S@c`QNH-hv4^7fab@`kz1Lygk{#htVJDKh=8`z;rI_@2{&pxq z))rx*N-=LP^iIEj2zHIj9CZcN$a89}wjRR0BZiHrQGCjco&)FS6{NaQr6}_!CopAW z+srFQn$as<$8)9# z|B<`V4p0}MYAQ4NbduPJa+xzrusa3!5tLS5@G^P=&8+6ngq^%ObiQ8@Db@uBAX*Ws zGp=SGpZSNBuRZ!KcuIW>fMhAm%Q<9j)s8A9>!->W-xv2Ji(%$2FfkNl)eR`N=G%+0 zWc;C3vY#rT$exI^CgYWw&6UN1@E6IZ1lt(aMxcK+qZB~$ucMeew3FeDZ71Jt8om#) zdXw5p7f#j$!l=*o5tx^FiULi!9lhoeGWU+|8ADE!T1Fo^@%%0pJJQ)qc_kS0YL@3_^ULWU*VcfF~G1?{z#6 z_#*EPJL6aZ)_}^mR@xmc(PwDfq9mnSax^r?vN~g>63H{=Y&7!wv3?TQ>idc?)CZa$ z2u30@-WMirE2RgLm!ySi1l4C8uAd#`*4{_8$=&F(c2|kkh}ezRwUV^9G4`)foZZ=e zo^qU68~&ERyqdMPnk6{pPn!C_<Ow*=m4@Ym&510k=N(TQ+WlheJbWLvNpQSt=Rx-3aea9qrpIoV zt72Ew#YfZ0$lwM>Fs?Y0bG#K=)T<%)>UBc}h)|K%6MwN7G3E-YaU5u4ym;O}T6Xj7 zXgLv!O3Kn&JE5XxS;7)8=5xf!E4)TYN1UUuE$e%P@yjI0MnNY5{{a32^&g;rp#1~A z4LXT|&CJRT1`B|BdAL}?+>Br_BQuCo#o0pI)WedBNm`H_%>5qx{{M!w<=kOl;b}_B zNXW`^aY^&@aLaH@aPjf+^GZo@L%>oHX~=tCNr)(w@c%Q!2*jyu>16F;L&d|(4@oV; zWP;IW+dCMKejJ{<+JJ7F33=_%sqWBC4thBynhPc7Lt@AoUC$HEFGfj+RoV28dT}*Y zUf|-yjWm)Kqjlwl*sl>+>*mH3@!46<B3Uz%6_Yy!kiSE=$Yvh*4dbYK9&un3}5ig z{H=+pLsYpAsEO$ovoX79r>Hb&64I#DmH*iv)k_6YF@ndJKuwcC)@B!Yn788mrS<5@ zYSU=(HRI3f`j&Xg(zyG(l|I60tI-HT5pI-Qtws2+p{hz8uhK7`Ax;zm@|8pmh^sIO z$|ZQOP@_W1QxyMRV=&w;eAgiyG1BohHUvE}m>P^O+pFP%P6lA?RRDujLlnT6er(y$ z#opmwTa-HZb@(wVWUL(EJ@3}9U?oY$E%KseJJM7WOkq2k`mmq|QuGk5Ew*;dcU#nY zh!X__pBE(1mQYpUG0xDl5Ul%nvtaPQdws!Z9Kms0xLJr-TikkZA9{6MFxWy*wivP+ z_KwHgMlp;x;7GSz@m$5)b>w3c&I++k;r#pH!9dJVsVy)BA@YBz&5b#_NffoCIem0{ z+4XyHlq`yTG8s)Fb*|i|&_lN0!qEeeZgDLksYS;J&A_o{tUZ%vb538uG{FD%qk_>B zxu}Hk(>So$Kfd6Kp~;@W5hEY+rZE-$Qn%us_^jjsy}s*BS}qHGbR_5HxhNyaelLCy z!6%-9e93eT=f;pESCpOE(^3renBFvs^osQcmx${l&$1-xhO~f6b*O3)STO%*LL-&* zkufs=^L;|FC4hCX`|ek6lv`Sa5}g5_;EHZ?R&i~Dqm%`ay}3Rx*IqU>$gaaH%JYL?BBGMYFNqtYw2;bAivNg|$`D?yYfPF9Da1Z1UhGwf?&T)JmKVP?MMzo-QAV{8 z|M%Xo#U9BPpG5kXB2P#jYN-UocGO+ru_x6@%32RHm`axz1Z_SAG@s+asyZAC2T$F_GL}Ir;zEaT2EDq| z$aYp!b%)!k*I#a03TB#Hd~~rn6)${PX{+k{>q{5K7y_S?sZ-LP zqDUiMh`Xd#e(xIEZHS02lX6y}OW&5nb zNxQ`aK}o#&`B5QHOcRQADFN+U6lh0%OW#!tI*0;VdZsz2fBLdbM zw-SmFMmbXf(3pl^;*U$!;2)Px!CRLU(`eJSiEfvKenAcq&>$&ysa@X*(56VD_Hmf* zIl0Q8A*Ti`8za(k9jI$sR-68I)TTBJ5EyS4smo5HFLA5RAMrq3x`pcbh53ke^m3)$ zIs)ret_wX_#N-RX>oLLn83K@J(>?s88Tjnn}AwmVZzI5yQl`5;YryzCc9+aN|Gztwqkp(+#jzC{tj4eSyI$!oltl zW^1v~t12m#hcp7%^%OTJ{B_=x-rpC-Dyg3jHRrJ%&1=!tjD9JeS`jVdYef#p;u8^Q5 z#H%{srLM&byBW{pqc7Xcru(>KA?bB6yDmk0VcpkIa+B}r(1*HXuMlHa%0LaPg7kvK zDqc7*gNLh&pKFLH5OEg-+ad`CX$HPxs@KV|sZXIt1Vp z##}AzD^>fukF6b!BUH%*b3AO!1l|z-L*M%bU>%}kxO1q1uJ42W@xy;Tg=T`h*7#HD z;wahDWs{}gu(wNK514s7TY}q)Th# zYA8q}xK36rGk1|yN|)j$D@wEcW$DubRL~WtM8ED7>p)-KU(jcyYY*Ml$*a3p%2D?4 zG*qSO|B8ykL|q9A%cG5V5PM13iu2l0AWX6`A?_dNzM)qRs2C4)%z)M5WYiG^i?ziG z=ETWcf_0}+R^S1NLxj_)9pd~;cJpGsDi=$rezlKB@3U9oo?1~TM~2{*Ea#`#Dm<}> zQJ!iZ{i-@%wup%vk2AYV1oOuxwFHq94Q?hn-l+z|ewmOd2lJSK@{oU-pyVMQTM`<= z5%tl7u{aRF-X>I=;1~kDBT2cSUpLAXF| z`u|h>ruQe;p#mLW90!&rj;t;5-?3r}_7YAbcSv9zBCoZaz z9joKl-_CZWWKk!AmG(SBsF3e2^g26LmD)YX=P5h!oU{}q8zVqPS#|E`6r8m9LJK5% zS|$>S(LXx>Tx1#B|9&(qeAxeU0Ts(9_^fy0N#o?N<83SlXun&v=GRw+h+8fm1z}BBc!UjE@ufpO6k5V#f_VlVNwn&A z=~8MR}9h#cOgw7XsztH#DlkygsY0zq@={)%?bTcv%i?zWYtNtYaP; zpjAd#4g3A3ZP%Ld>YKh&V|nG7zEZn?W|=NnM?ZN`QoYxlOkw_*Oo2ov?MIx1 z#+-S@Y@6ivp&$kRh%5MB%jrzZqri>6faW0$rdC!VLJk0mz?*+^-!gnlAD*kH~9H)XqiP`D0oEf9;R*{K5mv)AOUXh PdtNYzo?b>x7W97rNd+ky delta 8696 zcmai(bxa*V)31SpySux)9ExjkcXxNE$ia%cJEs(Pcj&<#4z8t83KZ9(1#aIr_ulW% zFWD!%Gns5=HnW>wvJY**^=4pGk^(F+XRrmp3d)^zFW@E~yL@5;<^vie%m4!wPQ9ub zdhL0Bzx^afCN=QzNEF)NL;a8qC-gOSeXWlf7#V>-AH*;BkvMPU((MkoPE-?MuPZ38 z-4KTtc7Nt;r{J>3mF+nERE(gb?N`87kvmHo+X3{<_*VNEK1#@!h*HzoC2pMRA7$qi zfWFr`{(xNK>3v!PUV=F65t4S{dXT$k%JzKcO42!!Ufz%1r`2_=y z1hg~Ym@?caZV!^2#fT1IFQNqDSOZ9+qAQ3p47yaZdN)DhNE7jKVy;<8(XwDU`q zAS@be)@MZWW^B*k6*U-Em2*kA9dGK&aHw45f^`T!vo$@Q_Iqofp2;@g&YrinyKh&Q z=5Kg7gBa$S6#9t|ErTq{f}X01K)Fh`gz{OOz=_lc!ifZyv=GTNo!0tAy>dRWKasIb zl+~x7`5)7AQJT+!u0mB8AR7sXDdyG-qL71IIjfeTb0zGdjliqp?fy_lyw&b4#(W<= z0t#;-pSkaQjQ-}0ItF^G(a(niN1PGLI1dx6=IyO}K7$UiffHK@z1n!TqGx=GtUWqP zJ`Kx9Gj79*rc~F#orRY{W%EIu$a)j)dl-y;kx&@*;yjp{K3MAVa41>!tO{*YBILD3rJ&r29cz-J+T*6ZO!5b5`G`C%eHDBy!bREG7 zV$#RTg{s=vAUdJgU=0WK{I0@BKdd`|&@a5{!}N=4;e8h8RetBybVg^*O7(!YDhvje z1a&j)ozC_*s0>>w2}6s9t82a6YH8pC@;y-IV*DZi-FjEv5~Sh>11b=TFH~ScpwX!9 zAxD4ZQQo3AK8R+I&m_+9(vCCAO$rlW;892ek2;rTdw87>#KQ%|dkZC7lre{Cia*rf zc9ESS1F2%Jfp+?UCb?CAhU7o;_$b9^DY<836!)y6=3C;HaXK|ecTkL>h5I{b()gCi z&CA??Am>ROh=nzF+hRmfitYyKu6uq&DX7tf%t#{70it4`k!RmMCpeeMUsgKu%48IL zLwm^}35hnJG!)-G%L86a{ynzKLj zUT5??Nu^~oUNEY@(7AfWdeQU8p=a*b8khF9;0&mNh~kGox{)XT-i}QU5fUk;y*-~r zw-GBbEnW-|>xZD&kLM&59Q^SaFxVkqBvd3XnI*c6B+wae!V%e|MQndORB@h%X!_ zno;uu38ZT(baj0wF`XeC9++T9$_P_M%1Grn%3&r6ubr>ZpWbL!<|)GSG}Y@Nw~0Av zmBnu>%2iM*K+fdNH1l<|(ktU^p1*XcWxug6<=4X>te#jlvPW|f#r7cN)#tLm4_4Ha zc8%J$W^-oZ?C>j(kzN127_h7$i60?eILW08;zW%Ee5!3@&+g24>eAzF-Y(Q^pV}s* zTJ)i%vsb=QmZLNGxeu1-xU}Iw$)i4RjgVg{mC>&;bxo1IVVZw%E7qkGY8yx`XbDAR zeP@KAccdQNh(q&<4D3lcnpf|`bB^SjS-MX-paI26oHNLl7Q8>|&8`&5ouhEOS>qT$ zgDxZhbi2>kmabX&*Ecn4 z4Ve}56xn(j;LpT-E5bCpBQnq7q+ll)HpTxn;uDTxYd75de&~YS2l#utudvz=x5QFrO`29FTKfF zT}-ldGhBIXaRlxS)t)r;+#N#V>noS2py@KWs|LN3`G|bd+IIL{g;ruA2U;1K-z?Sf ze4~YwZ9RE!zzU~ox&@oH#vepLTN5B{^Zo}FQ~7p8_AQP9B;f^#s6q>38m3gA$_ul; zuvJ*Kq94z04-rKpaO-hhBo1Y*{h5N_<@L z{?chU!CFy9<29VZk2f}CE0kpx?$)9fF<}!9{z=Ub2;vuwm;a=*Z{4FTFz)zGtt$|9 zH_GklN}JU4wQhyhgYGvhXi}eRt-sy*4MviT+3`0POh^|uU$AE9V1`F(N0*}SOCpqq z6`_t_Y-BRZTsl9aD+4Y+kC@pwsNY97zHh<6F&@Z^uD^?gm9i_J60>{@f+ zqlUmVO-$U(ANZk;_+>#&9|J6nuJn{9{e=8qeH;rb`!dq=9VdUB-EwgaU*SYUl^CAu zS;?h$SY`cuO)RgzIJ~#RQ(4=M-RxFa6IA!~C+IGg&ycOs{=l$C38L-i*`fBEhR^K^UT5b4;&%IhzBlcixx;iIdp>MX zMuuio0Ut&`))<8lY?GS-i1bUdw2UZ&Gzd#-G3$KUGz_1#hAD!Zl@|m^c2wC}R&mKY z^g{}HwoV0l48{|FX@zaF;?$wyA3{&{u+^xi-XAzYHiH4D_yTT~-mdA)0*k1SWrxOL5=_eto+%OxCwr1D5*+k=0BRIL*avb@tV%%bOZ2!CM~jZQ4TD4%cjzPT?#xvw(W50J^LN7o0#S&I4}k8V$)fq?;@ULX8Lck(chrbnmx1NNc;HYfp) z$o>|ARbE>*%6?0BGZE)Ta4y`YbcmV9z9wY~OL)Ixd0ly}TI!yVGKOoX503YO4|clt zR;ED!A@W&?hVAJ0ukWTa*So*YgArGWYM-opKc@<$7h*IX=}%+jRNmI|;3apy2}+1a zJo!B>^f!|=)B60fncT_wgzm(YBN^_|gZS;sqU;*$C`oHw8edSFHi;U2SU;&<@@M*2S^1ho-k9GgQ9Mfkb~5;5WLvPGH%ox!i(7}A3;>WVKGdvK+T2xR2}I`O7W`76fSivTtf5>72D_Qi#M zfpM`Wa`>h;w3wdzf`Yd@2P!515CsX)bc|Q`zD)Cc%s`YlCELRX#~<0f;+hw@&+~Jt z>*@@fK^Am0scWz5#RA)k#d`N*((^*;_H3g{wJUYz9qkQYH`X~C*S1#|SKAF%SsL}7 z*gD(y>M`7tTvajN;eTAME?%f#HeMV1_YH8ls=FR^OeJTCe^Q#CxIbk0vcAp)3Xl#; zB-~i@0(td^?TH?IXAF_IiM}Hn(1^CVJ1F_3Y)xf;ar;O*srq|t)2i$>Wlf^tJ>-vD zdr#N%GGA1+mNAg(fDc9@Ob7 zodRT2HYU2o*nE^g=X@5jaC{(r9kE84~KG6 zf;4x3XeVXft8(M9SVfjySb+OIGwBjsP)Sy&WzWZD z)NuFeJOt<@X2qu$c`!>#t4KkcqH3=;U-fhBK5j4t(-c2uG<`hRv&fTmr(=@UP26#Q zz0SDVeTEClrJ+&KpBt!)ndsMUbVYYE3NcmrBzo9|uFZ8~n^AEWm+y!!idzTi7m1j&` zqOGZ-U`w5z!VdUlH}0#L7fMp3Y+((Ul5I2NQ*UD1dIBxU8(6d96>c2Q^WiZNndb>f zH_9mkvAigR<+h&r60d3ouxXqfQC;kk~r=J@GksN*M?KKK&H} zN;Qc_GE(fBT_Xynw z#qI2Ea;&SPuU$kYfuOoymXXNTz9w=S$X;kvq3Q9lIvwxpFZ?p--3BC_kcVnKTdYi- ziie)}@~(-Cc|>42m4sHeG=`*VHBIG^#_f{7dz<@X`Cd-6Iccb4z!uYE4c_~*h})}-mSy&ym8aKdav4JlwH}HpfnDyS6D_Gq z=S#5b`7dbY4bwir0^%Pk@gL{G-63R{*lMZBw$d-90r)1%SD0$J< zhz2|7=k$J1NS^{O6k2xe9IEC)LNK4{zaEa~*SN-SIyEY9JZsM^WE{ ze&x~a=<#;LJGr!#nx}I_RF#FqbL_pa7o?FR#qM z#^C1{66WC-kO9g|^GfmmlUwov;?$!5YltZ-mx`^My^jMmAHNXyevl_$9@B5QS}7p|z-olAh1fcbU<61NjO2p&391K0~1MVZFny}80zFH9b82AJm;!(jN<#sXpz?#7EbQwjCS5lUIGWn?w zTfFDo-@%KT98M-+^WR;cA)y|~g^0Qy7$!*VB?lMF;h&la&yeyrrMM9>!#%lSF(VbZ z5t75cT9A^10ONz`xd==UMGw@O@TFaL;^yDKBE2R_D9sSqcI_}M(SSJ#H!CY(vO3rT z$iFN*!p|*`)={1z)ayw6Q-NMmU%HUz41J!(K0O9Xe@T|(c3;S2T4{#KhWL5_*dqx% z@U{^O_(y8$Kza#z;8Phg1AmFY2=7nQnNLr5vY!ymCi|83Hz{OmLw$~F*! z3u2VuO7CnWYuA|a_%7`sZasq=VS~3T8wY)=lQexDlcgBSsiJ|p*{8NsMK$%I=W{}_ z0gxMAQF?G2t=T~gtFkc02M1YIJWHw12K|=NoEjYs@@y9WXGGR zMaZ?n&U_>rjO~juVYx&@2%$?H!TML1Mky`;{iV9$-6*DNPWz-X`dY3O7WMIBZCjhe z_o40N>Mwc*;I`;A`nPB~^OxG_9>)%M&m8(|6qZZC8l1u=Mnr-T z6Sr;uRDQ0n@Jlr(NBqb=J_rv)XAB=J_`h8XJHW?gKS<(1PTsP8t|+#T9J+ly&?W}n_=yHL}Eqp7uo|_NgKsOD5Pp65l8wpw51SK9^h*tk71`t8yxUmQ! z3{A+KkqjOP%mL3`H`OM4+TBByUbZAT6T&eX_ku=1CK*c-)M*Wa=%E%}n9sJZFoU)f zU_9^=`hm@##NZ__caY9y^jK^B!9XWj_3|I1W^e8xGBSuA*tfpKEM!#EYSp5PGGX=+!=N2}aBwFZ+E~ z)l_C`Ba?uTDkm&fy~iczTN#gy zz_1x!ot&cfAql^#2RQeJS>na%R5L;M&+oyq{SLe`Q6#i8*@Ni>-ruw?b*;+Gn^=12 zCNoGkK&~^mv63Xj6XBxFFxB3p@qcc68sSJK=^R=X`H0HeU2|(CYY85-Ib|12XE>pV zA0!h9f-3lRL<|6W#@9tZR@!~ROYyxISsSjSB9Tsi!Z7rLe<9?NNpTstj|yQOZ!&JT z#jM&v-Y)0#8j_5!ml~5=0B3Mpb&q+KKCU9`Ae3iG3F4-o-p!u`cFfq`e54PlfiQl? z*H1X6TS)SQG#=RK#@C}iYW+u))+Fjyfw?d-gMr+3<;E#B{ts#voBCrXjA#8d#BJ1L%*{vI%u>S6tY9et^pvk6ue!z4M z)bLYrHP#0CHae>~C@LAeW4zmwMJdAJorf8PA1~?D<62&MvBots#Wydn_qI!uXDl^1 zpO#|ZF3ci_;lgP%$aJAtb~`_Dy%(IGN|dSD~abl73*KmV}+ zNNQQ)Njk$~Ajnc+D@am*S)0n~a83syvruPo!yxPYZoRpQ_@rsDj*+Uqj;5g|$r`T2 zvO5x%I>b#3>%dH1!i}h*Ey)kN056NDp(x1!n+30fn<@#kJFqFCOxA(ZLQ!C#&hkj# zb%fEu{TBeToSVXWQ<0G2mtyNN@gJPJbk;z+L1AVBr#B&|svE#nv4C?t2tXQ<2$^^d z0Mt=x9*rD?6flrEFSRx-i%#fF09LWEz=-55MvHTR}i0Em*bC& zUMQ^tsr?``5YcXsr3sxGwnhkc>w@)Kp*4>2j1d$RtAWU?*Cg~2gJ8(7Z&|I!?&*lii~blpD_QA zRmsvjtHXf*rmWjh^>fSiAZMqn+L3GONh%&*3$6tB*Z)ft3GLWbPzUk9N(O!k1jbyB zM1;cUOQAU+xPk#b5i4OK6i509bB(CvrUXDAR*xN>-M(r_pmsQ2b#RZ%L5@gUxXr14 zr5=usB%J}mrWDaXIb-~E!Q^u*4lV^DqXV)IT|lR8S<93G_J)xTd3~!rnQS-2htG2B7BtJn}hdf^r|#5QU0#~R-UJYxmK;UX6J{m(d$94 zyIWPQ;g4xTKbmJQEsEQhhMgG&rhjChVMdWsL{^g4z>P@Xt^MZ$vJYDRb6pwz*M*%a zTj1*SGYT&;>T7HnQI#2(QOz%`2#nQ~qaJHpYE4bx0?$ta0;8y@T$x8$XH2mEd8kRG zmq&3@LUvf3Lc?u=KcHu3(&TBP^w3N7Xh=slD4*wt(Nw6YU;GoVGL(T`9~D4fS-1F# z<%?bQi+|W1K0iwhx;`^@lYkj_$b;ii+GRSFd0Hi#KAr2ubi|KxWZ7(@BAnjD5O;9T z+7(^B)`RH=vS8({8Pww@;5f1&ST4b0Yc+6aJ{n;!#Ha8i}?goo)oz{X31Aw zoGNC6Ul`Ytx~0)fyW4a78f8q}#11ixCT3Xhf#Wzvph`OFWNY#oZ)5K3G8vHCN9V#h zpzLoSr<5%UC7+rxpq$Ud!m(sJaW^d3ec#roYUzBC9*h~+BcG=q*-r&+f9wn!e|F{zY{nk1I#xHhjAh@l)`Ah6BoF%LB^tO7zBZr#P0n$2 zrrVw%6`NB9a-oDttzCnVLO+ezAIF>I6(9H^Br0Bm$Bswb0`V0U&q?&2Mj8_#F&8fc zg9t|P*(S6?lfM7jg}svv9~Ddbl1a75s``=BTNO8zm>xUsP>>=`;4{))N$gy-rXPs% zw?UUf)+ASTr%l$lM1|f4>kl)HTB|G~bIn@N+{wkcM(xf?@wbolh_&V~QASC6pwsqu z)BL)(f*)G78>G+nD^AVPDc{WMbyu$*I;&Tll4AJUH|wrt-nDDLlfIkk)DGw0NNCqC z4L31O`?||M*_(9Dkw-4x?;$whUBJ_;^^vCXU6V@ojGV%N z@^EiUv{ceZmeYBjaNH5g